cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

aesp8-ppc.pl (95741B)


      1#! /usr/bin/env perl
      2# SPDX-License-Identifier: GPL-2.0
      3
      4# This code is taken from CRYPTOGAMs[1] and is included here using the option
      5# in the license to distribute the code under the GPL. Therefore this program
      6# is free software; you can redistribute it and/or modify it under the terms of
      7# the GNU General Public License version 2 as published by the Free Software
      8# Foundation.
      9#
     10# [1] https://www.openssl.org/~appro/cryptogams/
     11
     12# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
     13# All rights reserved.
     14#
     15# Redistribution and use in source and binary forms, with or without
     16# modification, are permitted provided that the following conditions
     17# are met:
     18#
     19#       * Redistributions of source code must retain copyright notices,
     20#         this list of conditions and the following disclaimer.
     21#
     22#       * Redistributions in binary form must reproduce the above
     23#         copyright notice, this list of conditions and the following
     24#         disclaimer in the documentation and/or other materials
     25#         provided with the distribution.
     26#
     27#       * Neither the name of the CRYPTOGAMS nor the names of its
     28#         copyright holder and contributors may be used to endorse or
     29#         promote products derived from this software without specific
     30#         prior written permission.
     31#
     32# ALTERNATIVELY, provided that this notice is retained in full, this
     33# product may be distributed under the terms of the GNU General Public
     34# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
     35# those given above.
     36#
     37# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
     38# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     39# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     40# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     41# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     42# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     43# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     44# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     45# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     46# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     47# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     48
     49# ====================================================================
     50# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
     51# project. The module is, however, dual licensed under OpenSSL and
     52# CRYPTOGAMS licenses depending on where you obtain it. For further
     53# details see https://www.openssl.org/~appro/cryptogams/.
     54# ====================================================================
     55#
     56# This module implements support for AES instructions as per PowerISA
     57# specification version 2.07, first implemented by POWER8 processor.
     58# The module is endian-agnostic in sense that it supports both big-
     59# and little-endian cases. Data alignment in parallelizable modes is
     60# handled with VSX loads and stores, which implies MSR.VSX flag being
     61# set. It should also be noted that ISA specification doesn't prohibit
     62# alignment exceptions for these instructions on page boundaries.
     63# Initially alignment was handled in pure AltiVec/VMX way [when data
     64# is aligned programmatically, which in turn guarantees exception-
     65# free execution], but it turned to hamper performance when vcipher
     66# instructions are interleaved. It's reckoned that eventual
     67# misalignment penalties at page boundaries are in average lower
     68# than additional overhead in pure AltiVec approach.
     69#
     70# May 2016
     71#
     72# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
     73# systems were measured.
     74#
     75######################################################################
     76# Current large-block performance in cycles per byte processed with
     77# 128-bit key (less is better).
     78#
     79#		CBC en-/decrypt	CTR	XTS
     80# POWER8[le]	3.96/0.72	0.74	1.1
     81# POWER8[be]	3.75/0.65	0.66	1.0
     82
     83$flavour = shift;
     84
     85if ($flavour =~ /64/) {
     86	$SIZE_T	=8;
     87	$LRSAVE	=2*$SIZE_T;
     88	$STU	="stdu";
     89	$POP	="ld";
     90	$PUSH	="std";
     91	$UCMP	="cmpld";
     92	$SHL	="sldi";
     93} elsif ($flavour =~ /32/) {
     94	$SIZE_T	=4;
     95	$LRSAVE	=$SIZE_T;
     96	$STU	="stwu";
     97	$POP	="lwz";
     98	$PUSH	="stw";
     99	$UCMP	="cmplw";
    100	$SHL	="slwi";
    101} else { die "nonsense $flavour"; }
    102
    103$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
    104
    105$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
    106( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
    107( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
    108die "can't locate ppc-xlate.pl";
    109
    110open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
    111
    112$FRAME=8*$SIZE_T;
    113$prefix="aes_p8";
    114
    115$sp="r1";
    116$vrsave="r12";
    117
    118#########################################################################
    119{{{	# Key setup procedures						#
    120my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
    121my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
    122my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
    123
    124$code.=<<___;
    125.machine	"any"
    126
    127.text
    128
    129.align	7
    130rcon:
    131.long	0x01000000, 0x01000000, 0x01000000, 0x01000000	?rev
    132.long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
    133.long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
    134.long	0,0,0,0						?asis
    135Lconsts:
    136	mflr	r0
    137	bcl	20,31,\$+4
    138	mflr	$ptr	 #vvvvv "distance between . and rcon
    139	addi	$ptr,$ptr,-0x48
    140	mtlr	r0
    141	blr
    142	.long	0
    143	.byte	0,12,0x14,0,0,0,0,0
    144.asciz	"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
    145
    146.globl	.${prefix}_set_encrypt_key
    147Lset_encrypt_key:
    148	mflr		r11
    149	$PUSH		r11,$LRSAVE($sp)
    150
    151	li		$ptr,-1
    152	${UCMP}i	$inp,0
    153	beq-		Lenc_key_abort		# if ($inp==0) return -1;
    154	${UCMP}i	$out,0
    155	beq-		Lenc_key_abort		# if ($out==0) return -1;
    156	li		$ptr,-2
    157	cmpwi		$bits,128
    158	blt-		Lenc_key_abort
    159	cmpwi		$bits,256
    160	bgt-		Lenc_key_abort
    161	andi.		r0,$bits,0x3f
    162	bne-		Lenc_key_abort
    163
    164	lis		r0,0xfff0
    165	mfspr		$vrsave,256
    166	mtspr		256,r0
    167
    168	bl		Lconsts
    169	mtlr		r11
    170
    171	neg		r9,$inp
    172	lvx		$in0,0,$inp
    173	addi		$inp,$inp,15		# 15 is not typo
    174	lvsr		$key,0,r9		# borrow $key
    175	li		r8,0x20
    176	cmpwi		$bits,192
    177	lvx		$in1,0,$inp
    178	le?vspltisb	$mask,0x0f		# borrow $mask
    179	lvx		$rcon,0,$ptr
    180	le?vxor		$key,$key,$mask		# adjust for byte swap
    181	lvx		$mask,r8,$ptr
    182	addi		$ptr,$ptr,0x10
    183	vperm		$in0,$in0,$in1,$key	# align [and byte swap in LE]
    184	li		$cnt,8
    185	vxor		$zero,$zero,$zero
    186	mtctr		$cnt
    187
    188	?lvsr		$outperm,0,$out
    189	vspltisb	$outmask,-1
    190	lvx		$outhead,0,$out
    191	?vperm		$outmask,$zero,$outmask,$outperm
    192
    193	blt		Loop128
    194	addi		$inp,$inp,8
    195	beq		L192
    196	addi		$inp,$inp,8
    197	b		L256
    198
    199.align	4
    200Loop128:
    201	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
    202	vsldoi		$tmp,$zero,$in0,12	# >>32
    203	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    204	 vsel		$stage,$outhead,$outtail,$outmask
    205	 vmr		$outhead,$outtail
    206	vcipherlast	$key,$key,$rcon
    207	 stvx		$stage,0,$out
    208	 addi		$out,$out,16
    209
    210	vxor		$in0,$in0,$tmp
    211	vsldoi		$tmp,$zero,$tmp,12	# >>32
    212	vxor		$in0,$in0,$tmp
    213	vsldoi		$tmp,$zero,$tmp,12	# >>32
    214	vxor		$in0,$in0,$tmp
    215	 vadduwm	$rcon,$rcon,$rcon
    216	vxor		$in0,$in0,$key
    217	bdnz		Loop128
    218
    219	lvx		$rcon,0,$ptr		# last two round keys
    220
    221	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
    222	vsldoi		$tmp,$zero,$in0,12	# >>32
    223	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    224	 vsel		$stage,$outhead,$outtail,$outmask
    225	 vmr		$outhead,$outtail
    226	vcipherlast	$key,$key,$rcon
    227	 stvx		$stage,0,$out
    228	 addi		$out,$out,16
    229
    230	vxor		$in0,$in0,$tmp
    231	vsldoi		$tmp,$zero,$tmp,12	# >>32
    232	vxor		$in0,$in0,$tmp
    233	vsldoi		$tmp,$zero,$tmp,12	# >>32
    234	vxor		$in0,$in0,$tmp
    235	 vadduwm	$rcon,$rcon,$rcon
    236	vxor		$in0,$in0,$key
    237
    238	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
    239	vsldoi		$tmp,$zero,$in0,12	# >>32
    240	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    241	 vsel		$stage,$outhead,$outtail,$outmask
    242	 vmr		$outhead,$outtail
    243	vcipherlast	$key,$key,$rcon
    244	 stvx		$stage,0,$out
    245	 addi		$out,$out,16
    246
    247	vxor		$in0,$in0,$tmp
    248	vsldoi		$tmp,$zero,$tmp,12	# >>32
    249	vxor		$in0,$in0,$tmp
    250	vsldoi		$tmp,$zero,$tmp,12	# >>32
    251	vxor		$in0,$in0,$tmp
    252	vxor		$in0,$in0,$key
    253	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    254	 vsel		$stage,$outhead,$outtail,$outmask
    255	 vmr		$outhead,$outtail
    256	 stvx		$stage,0,$out
    257
    258	addi		$inp,$out,15		# 15 is not typo
    259	addi		$out,$out,0x50
    260
    261	li		$rounds,10
    262	b		Ldone
    263
    264.align	4
    265L192:
    266	lvx		$tmp,0,$inp
    267	li		$cnt,4
    268	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    269	 vsel		$stage,$outhead,$outtail,$outmask
    270	 vmr		$outhead,$outtail
    271	 stvx		$stage,0,$out
    272	 addi		$out,$out,16
    273	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
    274	vspltisb	$key,8			# borrow $key
    275	mtctr		$cnt
    276	vsububm		$mask,$mask,$key	# adjust the mask
    277
    278Loop192:
    279	vperm		$key,$in1,$in1,$mask	# roate-n-splat
    280	vsldoi		$tmp,$zero,$in0,12	# >>32
    281	vcipherlast	$key,$key,$rcon
    282
    283	vxor		$in0,$in0,$tmp
    284	vsldoi		$tmp,$zero,$tmp,12	# >>32
    285	vxor		$in0,$in0,$tmp
    286	vsldoi		$tmp,$zero,$tmp,12	# >>32
    287	vxor		$in0,$in0,$tmp
    288
    289	 vsldoi		$stage,$zero,$in1,8
    290	vspltw		$tmp,$in0,3
    291	vxor		$tmp,$tmp,$in1
    292	vsldoi		$in1,$zero,$in1,12	# >>32
    293	 vadduwm	$rcon,$rcon,$rcon
    294	vxor		$in1,$in1,$tmp
    295	vxor		$in0,$in0,$key
    296	vxor		$in1,$in1,$key
    297	 vsldoi		$stage,$stage,$in0,8
    298
    299	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
    300	vsldoi		$tmp,$zero,$in0,12	# >>32
    301	 vperm		$outtail,$stage,$stage,$outperm	# rotate
    302	 vsel		$stage,$outhead,$outtail,$outmask
    303	 vmr		$outhead,$outtail
    304	vcipherlast	$key,$key,$rcon
    305	 stvx		$stage,0,$out
    306	 addi		$out,$out,16
    307
    308	 vsldoi		$stage,$in0,$in1,8
    309	vxor		$in0,$in0,$tmp
    310	vsldoi		$tmp,$zero,$tmp,12	# >>32
    311	 vperm		$outtail,$stage,$stage,$outperm	# rotate
    312	 vsel		$stage,$outhead,$outtail,$outmask
    313	 vmr		$outhead,$outtail
    314	vxor		$in0,$in0,$tmp
    315	vsldoi		$tmp,$zero,$tmp,12	# >>32
    316	vxor		$in0,$in0,$tmp
    317	 stvx		$stage,0,$out
    318	 addi		$out,$out,16
    319
    320	vspltw		$tmp,$in0,3
    321	vxor		$tmp,$tmp,$in1
    322	vsldoi		$in1,$zero,$in1,12	# >>32
    323	 vadduwm	$rcon,$rcon,$rcon
    324	vxor		$in1,$in1,$tmp
    325	vxor		$in0,$in0,$key
    326	vxor		$in1,$in1,$key
    327	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    328	 vsel		$stage,$outhead,$outtail,$outmask
    329	 vmr		$outhead,$outtail
    330	 stvx		$stage,0,$out
    331	 addi		$inp,$out,15		# 15 is not typo
    332	 addi		$out,$out,16
    333	bdnz		Loop192
    334
    335	li		$rounds,12
    336	addi		$out,$out,0x20
    337	b		Ldone
    338
    339.align	4
    340L256:
    341	lvx		$tmp,0,$inp
    342	li		$cnt,7
    343	li		$rounds,14
    344	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    345	 vsel		$stage,$outhead,$outtail,$outmask
    346	 vmr		$outhead,$outtail
    347	 stvx		$stage,0,$out
    348	 addi		$out,$out,16
    349	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
    350	mtctr		$cnt
    351
    352Loop256:
    353	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
    354	vsldoi		$tmp,$zero,$in0,12	# >>32
    355	 vperm		$outtail,$in1,$in1,$outperm	# rotate
    356	 vsel		$stage,$outhead,$outtail,$outmask
    357	 vmr		$outhead,$outtail
    358	vcipherlast	$key,$key,$rcon
    359	 stvx		$stage,0,$out
    360	 addi		$out,$out,16
    361
    362	vxor		$in0,$in0,$tmp
    363	vsldoi		$tmp,$zero,$tmp,12	# >>32
    364	vxor		$in0,$in0,$tmp
    365	vsldoi		$tmp,$zero,$tmp,12	# >>32
    366	vxor		$in0,$in0,$tmp
    367	 vadduwm	$rcon,$rcon,$rcon
    368	vxor		$in0,$in0,$key
    369	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    370	 vsel		$stage,$outhead,$outtail,$outmask
    371	 vmr		$outhead,$outtail
    372	 stvx		$stage,0,$out
    373	 addi		$inp,$out,15		# 15 is not typo
    374	 addi		$out,$out,16
    375	bdz		Ldone
    376
    377	vspltw		$key,$in0,3		# just splat
    378	vsldoi		$tmp,$zero,$in1,12	# >>32
    379	vsbox		$key,$key
    380
    381	vxor		$in1,$in1,$tmp
    382	vsldoi		$tmp,$zero,$tmp,12	# >>32
    383	vxor		$in1,$in1,$tmp
    384	vsldoi		$tmp,$zero,$tmp,12	# >>32
    385	vxor		$in1,$in1,$tmp
    386
    387	vxor		$in1,$in1,$key
    388	b		Loop256
    389
    390.align	4
    391Ldone:
    392	lvx		$in1,0,$inp		# redundant in aligned case
    393	vsel		$in1,$outhead,$in1,$outmask
    394	stvx		$in1,0,$inp
    395	li		$ptr,0
    396	mtspr		256,$vrsave
    397	stw		$rounds,0($out)
    398
    399Lenc_key_abort:
    400	mr		r3,$ptr
    401	blr
    402	.long		0
    403	.byte		0,12,0x14,1,0,0,3,0
    404	.long		0
    405.size	.${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
    406
    407.globl	.${prefix}_set_decrypt_key
    408	$STU		$sp,-$FRAME($sp)
    409	mflr		r10
    410	$PUSH		r10,$FRAME+$LRSAVE($sp)
    411	bl		Lset_encrypt_key
    412	mtlr		r10
    413
    414	cmpwi		r3,0
    415	bne-		Ldec_key_abort
    416
    417	slwi		$cnt,$rounds,4
    418	subi		$inp,$out,240		# first round key
    419	srwi		$rounds,$rounds,1
    420	add		$out,$inp,$cnt		# last round key
    421	mtctr		$rounds
    422
    423Ldeckey:
    424	lwz		r0, 0($inp)
    425	lwz		r6, 4($inp)
    426	lwz		r7, 8($inp)
    427	lwz		r8, 12($inp)
    428	addi		$inp,$inp,16
    429	lwz		r9, 0($out)
    430	lwz		r10,4($out)
    431	lwz		r11,8($out)
    432	lwz		r12,12($out)
    433	stw		r0, 0($out)
    434	stw		r6, 4($out)
    435	stw		r7, 8($out)
    436	stw		r8, 12($out)
    437	subi		$out,$out,16
    438	stw		r9, -16($inp)
    439	stw		r10,-12($inp)
    440	stw		r11,-8($inp)
    441	stw		r12,-4($inp)
    442	bdnz		Ldeckey
    443
    444	xor		r3,r3,r3		# return value
    445Ldec_key_abort:
    446	addi		$sp,$sp,$FRAME
    447	blr
    448	.long		0
    449	.byte		0,12,4,1,0x80,0,3,0
    450	.long		0
    451.size	.${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
    452___
    453}}}
    454#########################################################################
    455{{{	# Single block en- and decrypt procedures			#
    456sub gen_block () {
    457my $dir = shift;
    458my $n   = $dir eq "de" ? "n" : "";
    459my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
    460
    461$code.=<<___;
    462.globl	.${prefix}_${dir}crypt
    463	lwz		$rounds,240($key)
    464	lis		r0,0xfc00
    465	mfspr		$vrsave,256
    466	li		$idx,15			# 15 is not typo
    467	mtspr		256,r0
    468
    469	lvx		v0,0,$inp
    470	neg		r11,$out
    471	lvx		v1,$idx,$inp
    472	lvsl		v2,0,$inp		# inpperm
    473	le?vspltisb	v4,0x0f
    474	?lvsl		v3,0,r11		# outperm
    475	le?vxor		v2,v2,v4
    476	li		$idx,16
    477	vperm		v0,v0,v1,v2		# align [and byte swap in LE]
    478	lvx		v1,0,$key
    479	?lvsl		v5,0,$key		# keyperm
    480	srwi		$rounds,$rounds,1
    481	lvx		v2,$idx,$key
    482	addi		$idx,$idx,16
    483	subi		$rounds,$rounds,1
    484	?vperm		v1,v1,v2,v5		# align round key
    485
    486	vxor		v0,v0,v1
    487	lvx		v1,$idx,$key
    488	addi		$idx,$idx,16
    489	mtctr		$rounds
    490
    491Loop_${dir}c:
    492	?vperm		v2,v2,v1,v5
    493	v${n}cipher	v0,v0,v2
    494	lvx		v2,$idx,$key
    495	addi		$idx,$idx,16
    496	?vperm		v1,v1,v2,v5
    497	v${n}cipher	v0,v0,v1
    498	lvx		v1,$idx,$key
    499	addi		$idx,$idx,16
    500	bdnz		Loop_${dir}c
    501
    502	?vperm		v2,v2,v1,v5
    503	v${n}cipher	v0,v0,v2
    504	lvx		v2,$idx,$key
    505	?vperm		v1,v1,v2,v5
    506	v${n}cipherlast	v0,v0,v1
    507
    508	vspltisb	v2,-1
    509	vxor		v1,v1,v1
    510	li		$idx,15			# 15 is not typo
    511	?vperm		v2,v1,v2,v3		# outmask
    512	le?vxor		v3,v3,v4
    513	lvx		v1,0,$out		# outhead
    514	vperm		v0,v0,v0,v3		# rotate [and byte swap in LE]
    515	vsel		v1,v1,v0,v2
    516	lvx		v4,$idx,$out
    517	stvx		v1,0,$out
    518	vsel		v0,v0,v4,v2
    519	stvx		v0,$idx,$out
    520
    521	mtspr		256,$vrsave
    522	blr
    523	.long		0
    524	.byte		0,12,0x14,0,0,0,3,0
    525	.long		0
    526.size	.${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
    527___
    528}
    529&gen_block("en");
    530&gen_block("de");
    531}}}
    532#########################################################################
    533{{{	# CBC en- and decrypt procedures				#
    534my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
    535my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
    536my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
    537						map("v$_",(4..10));
    538$code.=<<___;
    539.globl	.${prefix}_cbc_encrypt
    540	${UCMP}i	$len,16
    541	bltlr-
    542
    543	cmpwi		$enc,0			# test direction
    544	lis		r0,0xffe0
    545	mfspr		$vrsave,256
    546	mtspr		256,r0
    547
    548	li		$idx,15
    549	vxor		$rndkey0,$rndkey0,$rndkey0
    550	le?vspltisb	$tmp,0x0f
    551
    552	lvx		$ivec,0,$ivp		# load [unaligned] iv
    553	lvsl		$inpperm,0,$ivp
    554	lvx		$inptail,$idx,$ivp
    555	le?vxor		$inpperm,$inpperm,$tmp
    556	vperm		$ivec,$ivec,$inptail,$inpperm
    557
    558	neg		r11,$inp
    559	?lvsl		$keyperm,0,$key		# prepare for unaligned key
    560	lwz		$rounds,240($key)
    561
    562	lvsr		$inpperm,0,r11		# prepare for unaligned load
    563	lvx		$inptail,0,$inp
    564	addi		$inp,$inp,15		# 15 is not typo
    565	le?vxor		$inpperm,$inpperm,$tmp
    566
    567	?lvsr		$outperm,0,$out		# prepare for unaligned store
    568	vspltisb	$outmask,-1
    569	lvx		$outhead,0,$out
    570	?vperm		$outmask,$rndkey0,$outmask,$outperm
    571	le?vxor		$outperm,$outperm,$tmp
    572
    573	srwi		$rounds,$rounds,1
    574	li		$idx,16
    575	subi		$rounds,$rounds,1
    576	beq		Lcbc_dec
    577
    578Lcbc_enc:
    579	vmr		$inout,$inptail
    580	lvx		$inptail,0,$inp
    581	addi		$inp,$inp,16
    582	mtctr		$rounds
    583	subi		$len,$len,16		# len-=16
    584
    585	lvx		$rndkey0,0,$key
    586	 vperm		$inout,$inout,$inptail,$inpperm
    587	lvx		$rndkey1,$idx,$key
    588	addi		$idx,$idx,16
    589	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
    590	vxor		$inout,$inout,$rndkey0
    591	lvx		$rndkey0,$idx,$key
    592	addi		$idx,$idx,16
    593	vxor		$inout,$inout,$ivec
    594
    595Loop_cbc_enc:
    596	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
    597	vcipher		$inout,$inout,$rndkey1
    598	lvx		$rndkey1,$idx,$key
    599	addi		$idx,$idx,16
    600	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
    601	vcipher		$inout,$inout,$rndkey0
    602	lvx		$rndkey0,$idx,$key
    603	addi		$idx,$idx,16
    604	bdnz		Loop_cbc_enc
    605
    606	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
    607	vcipher		$inout,$inout,$rndkey1
    608	lvx		$rndkey1,$idx,$key
    609	li		$idx,16
    610	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
    611	vcipherlast	$ivec,$inout,$rndkey0
    612	${UCMP}i	$len,16
    613
    614	vperm		$tmp,$ivec,$ivec,$outperm
    615	vsel		$inout,$outhead,$tmp,$outmask
    616	vmr		$outhead,$tmp
    617	stvx		$inout,0,$out
    618	addi		$out,$out,16
    619	bge		Lcbc_enc
    620
    621	b		Lcbc_done
    622
    623.align	4
    624Lcbc_dec:
    625	${UCMP}i	$len,128
    626	bge		_aesp8_cbc_decrypt8x
    627	vmr		$tmp,$inptail
    628	lvx		$inptail,0,$inp
    629	addi		$inp,$inp,16
    630	mtctr		$rounds
    631	subi		$len,$len,16		# len-=16
    632
    633	lvx		$rndkey0,0,$key
    634	 vperm		$tmp,$tmp,$inptail,$inpperm
    635	lvx		$rndkey1,$idx,$key
    636	addi		$idx,$idx,16
    637	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
    638	vxor		$inout,$tmp,$rndkey0
    639	lvx		$rndkey0,$idx,$key
    640	addi		$idx,$idx,16
    641
    642Loop_cbc_dec:
    643	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
    644	vncipher	$inout,$inout,$rndkey1
    645	lvx		$rndkey1,$idx,$key
    646	addi		$idx,$idx,16
    647	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
    648	vncipher	$inout,$inout,$rndkey0
    649	lvx		$rndkey0,$idx,$key
    650	addi		$idx,$idx,16
    651	bdnz		Loop_cbc_dec
    652
    653	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
    654	vncipher	$inout,$inout,$rndkey1
    655	lvx		$rndkey1,$idx,$key
    656	li		$idx,16
    657	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
    658	vncipherlast	$inout,$inout,$rndkey0
    659	${UCMP}i	$len,16
    660
    661	vxor		$inout,$inout,$ivec
    662	vmr		$ivec,$tmp
    663	vperm		$tmp,$inout,$inout,$outperm
    664	vsel		$inout,$outhead,$tmp,$outmask
    665	vmr		$outhead,$tmp
    666	stvx		$inout,0,$out
    667	addi		$out,$out,16
    668	bge		Lcbc_dec
    669
    670Lcbc_done:
    671	addi		$out,$out,-1
    672	lvx		$inout,0,$out		# redundant in aligned case
    673	vsel		$inout,$outhead,$inout,$outmask
    674	stvx		$inout,0,$out
    675
    676	neg		$enc,$ivp		# write [unaligned] iv
    677	li		$idx,15			# 15 is not typo
    678	vxor		$rndkey0,$rndkey0,$rndkey0
    679	vspltisb	$outmask,-1
    680	le?vspltisb	$tmp,0x0f
    681	?lvsl		$outperm,0,$enc
    682	?vperm		$outmask,$rndkey0,$outmask,$outperm
    683	le?vxor		$outperm,$outperm,$tmp
    684	lvx		$outhead,0,$ivp
    685	vperm		$ivec,$ivec,$ivec,$outperm
    686	vsel		$inout,$outhead,$ivec,$outmask
    687	lvx		$inptail,$idx,$ivp
    688	stvx		$inout,0,$ivp
    689	vsel		$inout,$ivec,$inptail,$outmask
    690	stvx		$inout,$idx,$ivp
    691
    692	mtspr		256,$vrsave
    693	blr
    694	.long		0
    695	.byte		0,12,0x14,0,0,0,6,0
    696	.long		0
    697___
    698#########################################################################
    699{{	# Optimized CBC decrypt procedure				#
    700my $key_="r11";
    701my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
    702my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
    703my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
    704my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
    705			# v26-v31 last 6 round keys
    706my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
    707
    708$code.=<<___;
    709.align	5
    710_aesp8_cbc_decrypt8x:
    711	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
    712	li		r10,`$FRAME+8*16+15`
    713	li		r11,`$FRAME+8*16+31`
    714	stvx		v20,r10,$sp		# ABI says so
    715	addi		r10,r10,32
    716	stvx		v21,r11,$sp
    717	addi		r11,r11,32
    718	stvx		v22,r10,$sp
    719	addi		r10,r10,32
    720	stvx		v23,r11,$sp
    721	addi		r11,r11,32
    722	stvx		v24,r10,$sp
    723	addi		r10,r10,32
    724	stvx		v25,r11,$sp
    725	addi		r11,r11,32
    726	stvx		v26,r10,$sp
    727	addi		r10,r10,32
    728	stvx		v27,r11,$sp
    729	addi		r11,r11,32
    730	stvx		v28,r10,$sp
    731	addi		r10,r10,32
    732	stvx		v29,r11,$sp
    733	addi		r11,r11,32
    734	stvx		v30,r10,$sp
    735	stvx		v31,r11,$sp
    736	li		r0,-1
    737	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
    738	li		$x10,0x10
    739	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
    740	li		$x20,0x20
    741	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
    742	li		$x30,0x30
    743	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
    744	li		$x40,0x40
    745	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
    746	li		$x50,0x50
    747	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
    748	li		$x60,0x60
    749	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
    750	li		$x70,0x70
    751	mtspr		256,r0
    752
    753	subi		$rounds,$rounds,3	# -4 in total
    754	subi		$len,$len,128		# bias
    755
    756	lvx		$rndkey0,$x00,$key	# load key schedule
    757	lvx		v30,$x10,$key
    758	addi		$key,$key,0x20
    759	lvx		v31,$x00,$key
    760	?vperm		$rndkey0,$rndkey0,v30,$keyperm
    761	addi		$key_,$sp,$FRAME+15
    762	mtctr		$rounds
    763
    764Load_cbc_dec_key:
    765	?vperm		v24,v30,v31,$keyperm
    766	lvx		v30,$x10,$key
    767	addi		$key,$key,0x20
    768	stvx		v24,$x00,$key_		# off-load round[1]
    769	?vperm		v25,v31,v30,$keyperm
    770	lvx		v31,$x00,$key
    771	stvx		v25,$x10,$key_		# off-load round[2]
    772	addi		$key_,$key_,0x20
    773	bdnz		Load_cbc_dec_key
    774
    775	lvx		v26,$x10,$key
    776	?vperm		v24,v30,v31,$keyperm
    777	lvx		v27,$x20,$key
    778	stvx		v24,$x00,$key_		# off-load round[3]
    779	?vperm		v25,v31,v26,$keyperm
    780	lvx		v28,$x30,$key
    781	stvx		v25,$x10,$key_		# off-load round[4]
    782	addi		$key_,$sp,$FRAME+15	# rewind $key_
    783	?vperm		v26,v26,v27,$keyperm
    784	lvx		v29,$x40,$key
    785	?vperm		v27,v27,v28,$keyperm
    786	lvx		v30,$x50,$key
    787	?vperm		v28,v28,v29,$keyperm
    788	lvx		v31,$x60,$key
    789	?vperm		v29,v29,v30,$keyperm
    790	lvx		$out0,$x70,$key		# borrow $out0
    791	?vperm		v30,v30,v31,$keyperm
    792	lvx		v24,$x00,$key_		# pre-load round[1]
    793	?vperm		v31,v31,$out0,$keyperm
    794	lvx		v25,$x10,$key_		# pre-load round[2]
    795
    796	#lvx		$inptail,0,$inp		# "caller" already did this
    797	#addi		$inp,$inp,15		# 15 is not typo
    798	subi		$inp,$inp,15		# undo "caller"
    799
    800	 le?li		$idx,8
    801	lvx_u		$in0,$x00,$inp		# load first 8 "words"
    802	 le?lvsl	$inpperm,0,$idx
    803	 le?vspltisb	$tmp,0x0f
    804	lvx_u		$in1,$x10,$inp
    805	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
    806	lvx_u		$in2,$x20,$inp
    807	 le?vperm	$in0,$in0,$in0,$inpperm
    808	lvx_u		$in3,$x30,$inp
    809	 le?vperm	$in1,$in1,$in1,$inpperm
    810	lvx_u		$in4,$x40,$inp
    811	 le?vperm	$in2,$in2,$in2,$inpperm
    812	vxor		$out0,$in0,$rndkey0
    813	lvx_u		$in5,$x50,$inp
    814	 le?vperm	$in3,$in3,$in3,$inpperm
    815	vxor		$out1,$in1,$rndkey0
    816	lvx_u		$in6,$x60,$inp
    817	 le?vperm	$in4,$in4,$in4,$inpperm
    818	vxor		$out2,$in2,$rndkey0
    819	lvx_u		$in7,$x70,$inp
    820	addi		$inp,$inp,0x80
    821	 le?vperm	$in5,$in5,$in5,$inpperm
    822	vxor		$out3,$in3,$rndkey0
    823	 le?vperm	$in6,$in6,$in6,$inpperm
    824	vxor		$out4,$in4,$rndkey0
    825	 le?vperm	$in7,$in7,$in7,$inpperm
    826	vxor		$out5,$in5,$rndkey0
    827	vxor		$out6,$in6,$rndkey0
    828	vxor		$out7,$in7,$rndkey0
    829
    830	mtctr		$rounds
    831	b		Loop_cbc_dec8x
    832.align	5
    833Loop_cbc_dec8x:
    834	vncipher	$out0,$out0,v24
    835	vncipher	$out1,$out1,v24
    836	vncipher	$out2,$out2,v24
    837	vncipher	$out3,$out3,v24
    838	vncipher	$out4,$out4,v24
    839	vncipher	$out5,$out5,v24
    840	vncipher	$out6,$out6,v24
    841	vncipher	$out7,$out7,v24
    842	lvx		v24,$x20,$key_		# round[3]
    843	addi		$key_,$key_,0x20
    844
    845	vncipher	$out0,$out0,v25
    846	vncipher	$out1,$out1,v25
    847	vncipher	$out2,$out2,v25
    848	vncipher	$out3,$out3,v25
    849	vncipher	$out4,$out4,v25
    850	vncipher	$out5,$out5,v25
    851	vncipher	$out6,$out6,v25
    852	vncipher	$out7,$out7,v25
    853	lvx		v25,$x10,$key_		# round[4]
    854	bdnz		Loop_cbc_dec8x
    855
    856	subic		$len,$len,128		# $len-=128
    857	vncipher	$out0,$out0,v24
    858	vncipher	$out1,$out1,v24
    859	vncipher	$out2,$out2,v24
    860	vncipher	$out3,$out3,v24
    861	vncipher	$out4,$out4,v24
    862	vncipher	$out5,$out5,v24
    863	vncipher	$out6,$out6,v24
    864	vncipher	$out7,$out7,v24
    865
    866	subfe.		r0,r0,r0		# borrow?-1:0
    867	vncipher	$out0,$out0,v25
    868	vncipher	$out1,$out1,v25
    869	vncipher	$out2,$out2,v25
    870	vncipher	$out3,$out3,v25
    871	vncipher	$out4,$out4,v25
    872	vncipher	$out5,$out5,v25
    873	vncipher	$out6,$out6,v25
    874	vncipher	$out7,$out7,v25
    875
    876	and		r0,r0,$len
    877	vncipher	$out0,$out0,v26
    878	vncipher	$out1,$out1,v26
    879	vncipher	$out2,$out2,v26
    880	vncipher	$out3,$out3,v26
    881	vncipher	$out4,$out4,v26
    882	vncipher	$out5,$out5,v26
    883	vncipher	$out6,$out6,v26
    884	vncipher	$out7,$out7,v26
    885
    886	add		$inp,$inp,r0		# $inp is adjusted in such
    887						# way that at exit from the
    888						# loop inX-in7 are loaded
    889						# with last "words"
    890	vncipher	$out0,$out0,v27
    891	vncipher	$out1,$out1,v27
    892	vncipher	$out2,$out2,v27
    893	vncipher	$out3,$out3,v27
    894	vncipher	$out4,$out4,v27
    895	vncipher	$out5,$out5,v27
    896	vncipher	$out6,$out6,v27
    897	vncipher	$out7,$out7,v27
    898
    899	addi		$key_,$sp,$FRAME+15	# rewind $key_
    900	vncipher	$out0,$out0,v28
    901	vncipher	$out1,$out1,v28
    902	vncipher	$out2,$out2,v28
    903	vncipher	$out3,$out3,v28
    904	vncipher	$out4,$out4,v28
    905	vncipher	$out5,$out5,v28
    906	vncipher	$out6,$out6,v28
    907	vncipher	$out7,$out7,v28
    908	lvx		v24,$x00,$key_		# re-pre-load round[1]
    909
    910	vncipher	$out0,$out0,v29
    911	vncipher	$out1,$out1,v29
    912	vncipher	$out2,$out2,v29
    913	vncipher	$out3,$out3,v29
    914	vncipher	$out4,$out4,v29
    915	vncipher	$out5,$out5,v29
    916	vncipher	$out6,$out6,v29
    917	vncipher	$out7,$out7,v29
    918	lvx		v25,$x10,$key_		# re-pre-load round[2]
    919
    920	vncipher	$out0,$out0,v30
    921	 vxor		$ivec,$ivec,v31		# xor with last round key
    922	vncipher	$out1,$out1,v30
    923	 vxor		$in0,$in0,v31
    924	vncipher	$out2,$out2,v30
    925	 vxor		$in1,$in1,v31
    926	vncipher	$out3,$out3,v30
    927	 vxor		$in2,$in2,v31
    928	vncipher	$out4,$out4,v30
    929	 vxor		$in3,$in3,v31
    930	vncipher	$out5,$out5,v30
    931	 vxor		$in4,$in4,v31
    932	vncipher	$out6,$out6,v30
    933	 vxor		$in5,$in5,v31
    934	vncipher	$out7,$out7,v30
    935	 vxor		$in6,$in6,v31
    936
    937	vncipherlast	$out0,$out0,$ivec
    938	vncipherlast	$out1,$out1,$in0
    939	 lvx_u		$in0,$x00,$inp		# load next input block
    940	vncipherlast	$out2,$out2,$in1
    941	 lvx_u		$in1,$x10,$inp
    942	vncipherlast	$out3,$out3,$in2
    943	 le?vperm	$in0,$in0,$in0,$inpperm
    944	 lvx_u		$in2,$x20,$inp
    945	vncipherlast	$out4,$out4,$in3
    946	 le?vperm	$in1,$in1,$in1,$inpperm
    947	 lvx_u		$in3,$x30,$inp
    948	vncipherlast	$out5,$out5,$in4
    949	 le?vperm	$in2,$in2,$in2,$inpperm
    950	 lvx_u		$in4,$x40,$inp
    951	vncipherlast	$out6,$out6,$in5
    952	 le?vperm	$in3,$in3,$in3,$inpperm
    953	 lvx_u		$in5,$x50,$inp
    954	vncipherlast	$out7,$out7,$in6
    955	 le?vperm	$in4,$in4,$in4,$inpperm
    956	 lvx_u		$in6,$x60,$inp
    957	vmr		$ivec,$in7
    958	 le?vperm	$in5,$in5,$in5,$inpperm
    959	 lvx_u		$in7,$x70,$inp
    960	 addi		$inp,$inp,0x80
    961
    962	le?vperm	$out0,$out0,$out0,$inpperm
    963	le?vperm	$out1,$out1,$out1,$inpperm
    964	stvx_u		$out0,$x00,$out
    965	 le?vperm	$in6,$in6,$in6,$inpperm
    966	 vxor		$out0,$in0,$rndkey0
    967	le?vperm	$out2,$out2,$out2,$inpperm
    968	stvx_u		$out1,$x10,$out
    969	 le?vperm	$in7,$in7,$in7,$inpperm
    970	 vxor		$out1,$in1,$rndkey0
    971	le?vperm	$out3,$out3,$out3,$inpperm
    972	stvx_u		$out2,$x20,$out
    973	 vxor		$out2,$in2,$rndkey0
    974	le?vperm	$out4,$out4,$out4,$inpperm
    975	stvx_u		$out3,$x30,$out
    976	 vxor		$out3,$in3,$rndkey0
    977	le?vperm	$out5,$out5,$out5,$inpperm
    978	stvx_u		$out4,$x40,$out
    979	 vxor		$out4,$in4,$rndkey0
    980	le?vperm	$out6,$out6,$out6,$inpperm
    981	stvx_u		$out5,$x50,$out
    982	 vxor		$out5,$in5,$rndkey0
    983	le?vperm	$out7,$out7,$out7,$inpperm
    984	stvx_u		$out6,$x60,$out
    985	 vxor		$out6,$in6,$rndkey0
    986	stvx_u		$out7,$x70,$out
    987	addi		$out,$out,0x80
    988	 vxor		$out7,$in7,$rndkey0
    989
    990	mtctr		$rounds
    991	beq		Loop_cbc_dec8x		# did $len-=128 borrow?
    992
    993	addic.		$len,$len,128
    994	beq		Lcbc_dec8x_done
    995	nop
    996	nop
    997
    998Loop_cbc_dec8x_tail:				# up to 7 "words" tail...
    999	vncipher	$out1,$out1,v24
   1000	vncipher	$out2,$out2,v24
   1001	vncipher	$out3,$out3,v24
   1002	vncipher	$out4,$out4,v24
   1003	vncipher	$out5,$out5,v24
   1004	vncipher	$out6,$out6,v24
   1005	vncipher	$out7,$out7,v24
   1006	lvx		v24,$x20,$key_		# round[3]
   1007	addi		$key_,$key_,0x20
   1008
   1009	vncipher	$out1,$out1,v25
   1010	vncipher	$out2,$out2,v25
   1011	vncipher	$out3,$out3,v25
   1012	vncipher	$out4,$out4,v25
   1013	vncipher	$out5,$out5,v25
   1014	vncipher	$out6,$out6,v25
   1015	vncipher	$out7,$out7,v25
   1016	lvx		v25,$x10,$key_		# round[4]
   1017	bdnz		Loop_cbc_dec8x_tail
   1018
   1019	vncipher	$out1,$out1,v24
   1020	vncipher	$out2,$out2,v24
   1021	vncipher	$out3,$out3,v24
   1022	vncipher	$out4,$out4,v24
   1023	vncipher	$out5,$out5,v24
   1024	vncipher	$out6,$out6,v24
   1025	vncipher	$out7,$out7,v24
   1026
   1027	vncipher	$out1,$out1,v25
   1028	vncipher	$out2,$out2,v25
   1029	vncipher	$out3,$out3,v25
   1030	vncipher	$out4,$out4,v25
   1031	vncipher	$out5,$out5,v25
   1032	vncipher	$out6,$out6,v25
   1033	vncipher	$out7,$out7,v25
   1034
   1035	vncipher	$out1,$out1,v26
   1036	vncipher	$out2,$out2,v26
   1037	vncipher	$out3,$out3,v26
   1038	vncipher	$out4,$out4,v26
   1039	vncipher	$out5,$out5,v26
   1040	vncipher	$out6,$out6,v26
   1041	vncipher	$out7,$out7,v26
   1042
   1043	vncipher	$out1,$out1,v27
   1044	vncipher	$out2,$out2,v27
   1045	vncipher	$out3,$out3,v27
   1046	vncipher	$out4,$out4,v27
   1047	vncipher	$out5,$out5,v27
   1048	vncipher	$out6,$out6,v27
   1049	vncipher	$out7,$out7,v27
   1050
   1051	vncipher	$out1,$out1,v28
   1052	vncipher	$out2,$out2,v28
   1053	vncipher	$out3,$out3,v28
   1054	vncipher	$out4,$out4,v28
   1055	vncipher	$out5,$out5,v28
   1056	vncipher	$out6,$out6,v28
   1057	vncipher	$out7,$out7,v28
   1058
   1059	vncipher	$out1,$out1,v29
   1060	vncipher	$out2,$out2,v29
   1061	vncipher	$out3,$out3,v29
   1062	vncipher	$out4,$out4,v29
   1063	vncipher	$out5,$out5,v29
   1064	vncipher	$out6,$out6,v29
   1065	vncipher	$out7,$out7,v29
   1066
   1067	vncipher	$out1,$out1,v30
   1068	 vxor		$ivec,$ivec,v31		# last round key
   1069	vncipher	$out2,$out2,v30
   1070	 vxor		$in1,$in1,v31
   1071	vncipher	$out3,$out3,v30
   1072	 vxor		$in2,$in2,v31
   1073	vncipher	$out4,$out4,v30
   1074	 vxor		$in3,$in3,v31
   1075	vncipher	$out5,$out5,v30
   1076	 vxor		$in4,$in4,v31
   1077	vncipher	$out6,$out6,v30
   1078	 vxor		$in5,$in5,v31
   1079	vncipher	$out7,$out7,v30
   1080	 vxor		$in6,$in6,v31
   1081
   1082	cmplwi		$len,32			# switch($len)
   1083	blt		Lcbc_dec8x_one
   1084	nop
   1085	beq		Lcbc_dec8x_two
   1086	cmplwi		$len,64
   1087	blt		Lcbc_dec8x_three
   1088	nop
   1089	beq		Lcbc_dec8x_four
   1090	cmplwi		$len,96
   1091	blt		Lcbc_dec8x_five
   1092	nop
   1093	beq		Lcbc_dec8x_six
   1094
   1095Lcbc_dec8x_seven:
   1096	vncipherlast	$out1,$out1,$ivec
   1097	vncipherlast	$out2,$out2,$in1
   1098	vncipherlast	$out3,$out3,$in2
   1099	vncipherlast	$out4,$out4,$in3
   1100	vncipherlast	$out5,$out5,$in4
   1101	vncipherlast	$out6,$out6,$in5
   1102	vncipherlast	$out7,$out7,$in6
   1103	vmr		$ivec,$in7
   1104
   1105	le?vperm	$out1,$out1,$out1,$inpperm
   1106	le?vperm	$out2,$out2,$out2,$inpperm
   1107	stvx_u		$out1,$x00,$out
   1108	le?vperm	$out3,$out3,$out3,$inpperm
   1109	stvx_u		$out2,$x10,$out
   1110	le?vperm	$out4,$out4,$out4,$inpperm
   1111	stvx_u		$out3,$x20,$out
   1112	le?vperm	$out5,$out5,$out5,$inpperm
   1113	stvx_u		$out4,$x30,$out
   1114	le?vperm	$out6,$out6,$out6,$inpperm
   1115	stvx_u		$out5,$x40,$out
   1116	le?vperm	$out7,$out7,$out7,$inpperm
   1117	stvx_u		$out6,$x50,$out
   1118	stvx_u		$out7,$x60,$out
   1119	addi		$out,$out,0x70
   1120	b		Lcbc_dec8x_done
   1121
   1122.align	5
   1123Lcbc_dec8x_six:
   1124	vncipherlast	$out2,$out2,$ivec
   1125	vncipherlast	$out3,$out3,$in2
   1126	vncipherlast	$out4,$out4,$in3
   1127	vncipherlast	$out5,$out5,$in4
   1128	vncipherlast	$out6,$out6,$in5
   1129	vncipherlast	$out7,$out7,$in6
   1130	vmr		$ivec,$in7
   1131
   1132	le?vperm	$out2,$out2,$out2,$inpperm
   1133	le?vperm	$out3,$out3,$out3,$inpperm
   1134	stvx_u		$out2,$x00,$out
   1135	le?vperm	$out4,$out4,$out4,$inpperm
   1136	stvx_u		$out3,$x10,$out
   1137	le?vperm	$out5,$out5,$out5,$inpperm
   1138	stvx_u		$out4,$x20,$out
   1139	le?vperm	$out6,$out6,$out6,$inpperm
   1140	stvx_u		$out5,$x30,$out
   1141	le?vperm	$out7,$out7,$out7,$inpperm
   1142	stvx_u		$out6,$x40,$out
   1143	stvx_u		$out7,$x50,$out
   1144	addi		$out,$out,0x60
   1145	b		Lcbc_dec8x_done
   1146
   1147.align	5
   1148Lcbc_dec8x_five:
   1149	vncipherlast	$out3,$out3,$ivec
   1150	vncipherlast	$out4,$out4,$in3
   1151	vncipherlast	$out5,$out5,$in4
   1152	vncipherlast	$out6,$out6,$in5
   1153	vncipherlast	$out7,$out7,$in6
   1154	vmr		$ivec,$in7
   1155
   1156	le?vperm	$out3,$out3,$out3,$inpperm
   1157	le?vperm	$out4,$out4,$out4,$inpperm
   1158	stvx_u		$out3,$x00,$out
   1159	le?vperm	$out5,$out5,$out5,$inpperm
   1160	stvx_u		$out4,$x10,$out
   1161	le?vperm	$out6,$out6,$out6,$inpperm
   1162	stvx_u		$out5,$x20,$out
   1163	le?vperm	$out7,$out7,$out7,$inpperm
   1164	stvx_u		$out6,$x30,$out
   1165	stvx_u		$out7,$x40,$out
   1166	addi		$out,$out,0x50
   1167	b		Lcbc_dec8x_done
   1168
   1169.align	5
   1170Lcbc_dec8x_four:
   1171	vncipherlast	$out4,$out4,$ivec
   1172	vncipherlast	$out5,$out5,$in4
   1173	vncipherlast	$out6,$out6,$in5
   1174	vncipherlast	$out7,$out7,$in6
   1175	vmr		$ivec,$in7
   1176
   1177	le?vperm	$out4,$out4,$out4,$inpperm
   1178	le?vperm	$out5,$out5,$out5,$inpperm
   1179	stvx_u		$out4,$x00,$out
   1180	le?vperm	$out6,$out6,$out6,$inpperm
   1181	stvx_u		$out5,$x10,$out
   1182	le?vperm	$out7,$out7,$out7,$inpperm
   1183	stvx_u		$out6,$x20,$out
   1184	stvx_u		$out7,$x30,$out
   1185	addi		$out,$out,0x40
   1186	b		Lcbc_dec8x_done
   1187
   1188.align	5
   1189Lcbc_dec8x_three:
   1190	vncipherlast	$out5,$out5,$ivec
   1191	vncipherlast	$out6,$out6,$in5
   1192	vncipherlast	$out7,$out7,$in6
   1193	vmr		$ivec,$in7
   1194
   1195	le?vperm	$out5,$out5,$out5,$inpperm
   1196	le?vperm	$out6,$out6,$out6,$inpperm
   1197	stvx_u		$out5,$x00,$out
   1198	le?vperm	$out7,$out7,$out7,$inpperm
   1199	stvx_u		$out6,$x10,$out
   1200	stvx_u		$out7,$x20,$out
   1201	addi		$out,$out,0x30
   1202	b		Lcbc_dec8x_done
   1203
   1204.align	5
   1205Lcbc_dec8x_two:
   1206	vncipherlast	$out6,$out6,$ivec
   1207	vncipherlast	$out7,$out7,$in6
   1208	vmr		$ivec,$in7
   1209
   1210	le?vperm	$out6,$out6,$out6,$inpperm
   1211	le?vperm	$out7,$out7,$out7,$inpperm
   1212	stvx_u		$out6,$x00,$out
   1213	stvx_u		$out7,$x10,$out
   1214	addi		$out,$out,0x20
   1215	b		Lcbc_dec8x_done
   1216
   1217.align	5
   1218Lcbc_dec8x_one:
   1219	vncipherlast	$out7,$out7,$ivec
   1220	vmr		$ivec,$in7
   1221
   1222	le?vperm	$out7,$out7,$out7,$inpperm
   1223	stvx_u		$out7,0,$out
   1224	addi		$out,$out,0x10
   1225
   1226Lcbc_dec8x_done:
   1227	le?vperm	$ivec,$ivec,$ivec,$inpperm
   1228	stvx_u		$ivec,0,$ivp		# write [unaligned] iv
   1229
   1230	li		r10,`$FRAME+15`
   1231	li		r11,`$FRAME+31`
   1232	stvx		$inpperm,r10,$sp	# wipe copies of round keys
   1233	addi		r10,r10,32
   1234	stvx		$inpperm,r11,$sp
   1235	addi		r11,r11,32
   1236	stvx		$inpperm,r10,$sp
   1237	addi		r10,r10,32
   1238	stvx		$inpperm,r11,$sp
   1239	addi		r11,r11,32
   1240	stvx		$inpperm,r10,$sp
   1241	addi		r10,r10,32
   1242	stvx		$inpperm,r11,$sp
   1243	addi		r11,r11,32
   1244	stvx		$inpperm,r10,$sp
   1245	addi		r10,r10,32
   1246	stvx		$inpperm,r11,$sp
   1247	addi		r11,r11,32
   1248
   1249	mtspr		256,$vrsave
   1250	lvx		v20,r10,$sp		# ABI says so
   1251	addi		r10,r10,32
   1252	lvx		v21,r11,$sp
   1253	addi		r11,r11,32
   1254	lvx		v22,r10,$sp
   1255	addi		r10,r10,32
   1256	lvx		v23,r11,$sp
   1257	addi		r11,r11,32
   1258	lvx		v24,r10,$sp
   1259	addi		r10,r10,32
   1260	lvx		v25,r11,$sp
   1261	addi		r11,r11,32
   1262	lvx		v26,r10,$sp
   1263	addi		r10,r10,32
   1264	lvx		v27,r11,$sp
   1265	addi		r11,r11,32
   1266	lvx		v28,r10,$sp
   1267	addi		r10,r10,32
   1268	lvx		v29,r11,$sp
   1269	addi		r11,r11,32
   1270	lvx		v30,r10,$sp
   1271	lvx		v31,r11,$sp
   1272	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
   1273	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
   1274	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
   1275	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
   1276	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
   1277	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
   1278	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
   1279	blr
   1280	.long		0
   1281	.byte		0,12,0x14,0,0x80,6,6,0
   1282	.long		0
   1283.size	.${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
   1284___
   1285}}	}}}
   1286
   1287#########################################################################
   1288{{{	# CTR procedure[s]						#
   1289
   1290####################### WARNING: Here be dragons! #######################
   1291#
   1292# This code is written as 'ctr32', based on a 32-bit counter used
   1293# upstream. The kernel does *not* use a 32-bit counter. The kernel uses
   1294# a 128-bit counter.
   1295#
   1296# This leads to subtle changes from the upstream code: the counter
   1297# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
   1298# both the bulk (8 blocks at a time) path, and in the individual block
   1299# path. Be aware of this when doing updates.
   1300#
   1301# See:
   1302# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
   1303# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
   1304# https://github.com/openssl/openssl/pull/8942
   1305#
   1306#########################################################################
   1307my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
   1308my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
   1309my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
   1310						map("v$_",(4..11));
   1311my $dat=$tmp;
   1312
   1313$code.=<<___;
   1314.globl	.${prefix}_ctr32_encrypt_blocks
   1315	${UCMP}i	$len,1
   1316	bltlr-
   1317
   1318	lis		r0,0xfff0
   1319	mfspr		$vrsave,256
   1320	mtspr		256,r0
   1321
   1322	li		$idx,15
   1323	vxor		$rndkey0,$rndkey0,$rndkey0
   1324	le?vspltisb	$tmp,0x0f
   1325
   1326	lvx		$ivec,0,$ivp		# load [unaligned] iv
   1327	lvsl		$inpperm,0,$ivp
   1328	lvx		$inptail,$idx,$ivp
   1329	 vspltisb	$one,1
   1330	le?vxor		$inpperm,$inpperm,$tmp
   1331	vperm		$ivec,$ivec,$inptail,$inpperm
   1332	 vsldoi		$one,$rndkey0,$one,1
   1333
   1334	neg		r11,$inp
   1335	?lvsl		$keyperm,0,$key		# prepare for unaligned key
   1336	lwz		$rounds,240($key)
   1337
   1338	lvsr		$inpperm,0,r11		# prepare for unaligned load
   1339	lvx		$inptail,0,$inp
   1340	addi		$inp,$inp,15		# 15 is not typo
   1341	le?vxor		$inpperm,$inpperm,$tmp
   1342
   1343	srwi		$rounds,$rounds,1
   1344	li		$idx,16
   1345	subi		$rounds,$rounds,1
   1346
   1347	${UCMP}i	$len,8
   1348	bge		_aesp8_ctr32_encrypt8x
   1349
   1350	?lvsr		$outperm,0,$out		# prepare for unaligned store
   1351	vspltisb	$outmask,-1
   1352	lvx		$outhead,0,$out
   1353	?vperm		$outmask,$rndkey0,$outmask,$outperm
   1354	le?vxor		$outperm,$outperm,$tmp
   1355
   1356	lvx		$rndkey0,0,$key
   1357	mtctr		$rounds
   1358	lvx		$rndkey1,$idx,$key
   1359	addi		$idx,$idx,16
   1360	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   1361	vxor		$inout,$ivec,$rndkey0
   1362	lvx		$rndkey0,$idx,$key
   1363	addi		$idx,$idx,16
   1364	b		Loop_ctr32_enc
   1365
   1366.align	5
   1367Loop_ctr32_enc:
   1368	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   1369	vcipher		$inout,$inout,$rndkey1
   1370	lvx		$rndkey1,$idx,$key
   1371	addi		$idx,$idx,16
   1372	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   1373	vcipher		$inout,$inout,$rndkey0
   1374	lvx		$rndkey0,$idx,$key
   1375	addi		$idx,$idx,16
   1376	bdnz		Loop_ctr32_enc
   1377
   1378	vadduqm		$ivec,$ivec,$one	# Kernel change for 128-bit
   1379	 vmr		$dat,$inptail
   1380	 lvx		$inptail,0,$inp
   1381	 addi		$inp,$inp,16
   1382	 subic.		$len,$len,1		# blocks--
   1383
   1384	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   1385	vcipher		$inout,$inout,$rndkey1
   1386	lvx		$rndkey1,$idx,$key
   1387	 vperm		$dat,$dat,$inptail,$inpperm
   1388	 li		$idx,16
   1389	?vperm		$rndkey1,$rndkey0,$rndkey1,$keyperm
   1390	 lvx		$rndkey0,0,$key
   1391	vxor		$dat,$dat,$rndkey1	# last round key
   1392	vcipherlast	$inout,$inout,$dat
   1393
   1394	 lvx		$rndkey1,$idx,$key
   1395	 addi		$idx,$idx,16
   1396	vperm		$inout,$inout,$inout,$outperm
   1397	vsel		$dat,$outhead,$inout,$outmask
   1398	 mtctr		$rounds
   1399	 ?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   1400	vmr		$outhead,$inout
   1401	 vxor		$inout,$ivec,$rndkey0
   1402	 lvx		$rndkey0,$idx,$key
   1403	 addi		$idx,$idx,16
   1404	stvx		$dat,0,$out
   1405	addi		$out,$out,16
   1406	bne		Loop_ctr32_enc
   1407
   1408	addi		$out,$out,-1
   1409	lvx		$inout,0,$out		# redundant in aligned case
   1410	vsel		$inout,$outhead,$inout,$outmask
   1411	stvx		$inout,0,$out
   1412
   1413	mtspr		256,$vrsave
   1414	blr
   1415	.long		0
   1416	.byte		0,12,0x14,0,0,0,6,0
   1417	.long		0
   1418___
   1419#########################################################################
   1420{{	# Optimized CTR procedure					#
   1421my $key_="r11";
   1422my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
   1423my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
   1424my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
   1425my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
   1426			# v26-v31 last 6 round keys
   1427my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
   1428my ($two,$three,$four)=($outhead,$outperm,$outmask);
   1429
   1430$code.=<<___;
   1431.align	5
   1432_aesp8_ctr32_encrypt8x:
   1433	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
   1434	li		r10,`$FRAME+8*16+15`
   1435	li		r11,`$FRAME+8*16+31`
   1436	stvx		v20,r10,$sp		# ABI says so
   1437	addi		r10,r10,32
   1438	stvx		v21,r11,$sp
   1439	addi		r11,r11,32
   1440	stvx		v22,r10,$sp
   1441	addi		r10,r10,32
   1442	stvx		v23,r11,$sp
   1443	addi		r11,r11,32
   1444	stvx		v24,r10,$sp
   1445	addi		r10,r10,32
   1446	stvx		v25,r11,$sp
   1447	addi		r11,r11,32
   1448	stvx		v26,r10,$sp
   1449	addi		r10,r10,32
   1450	stvx		v27,r11,$sp
   1451	addi		r11,r11,32
   1452	stvx		v28,r10,$sp
   1453	addi		r10,r10,32
   1454	stvx		v29,r11,$sp
   1455	addi		r11,r11,32
   1456	stvx		v30,r10,$sp
   1457	stvx		v31,r11,$sp
   1458	li		r0,-1
   1459	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
   1460	li		$x10,0x10
   1461	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
   1462	li		$x20,0x20
   1463	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
   1464	li		$x30,0x30
   1465	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
   1466	li		$x40,0x40
   1467	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
   1468	li		$x50,0x50
   1469	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
   1470	li		$x60,0x60
   1471	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
   1472	li		$x70,0x70
   1473	mtspr		256,r0
   1474
   1475	subi		$rounds,$rounds,3	# -4 in total
   1476
   1477	lvx		$rndkey0,$x00,$key	# load key schedule
   1478	lvx		v30,$x10,$key
   1479	addi		$key,$key,0x20
   1480	lvx		v31,$x00,$key
   1481	?vperm		$rndkey0,$rndkey0,v30,$keyperm
   1482	addi		$key_,$sp,$FRAME+15
   1483	mtctr		$rounds
   1484
   1485Load_ctr32_enc_key:
   1486	?vperm		v24,v30,v31,$keyperm
   1487	lvx		v30,$x10,$key
   1488	addi		$key,$key,0x20
   1489	stvx		v24,$x00,$key_		# off-load round[1]
   1490	?vperm		v25,v31,v30,$keyperm
   1491	lvx		v31,$x00,$key
   1492	stvx		v25,$x10,$key_		# off-load round[2]
   1493	addi		$key_,$key_,0x20
   1494	bdnz		Load_ctr32_enc_key
   1495
   1496	lvx		v26,$x10,$key
   1497	?vperm		v24,v30,v31,$keyperm
   1498	lvx		v27,$x20,$key
   1499	stvx		v24,$x00,$key_		# off-load round[3]
   1500	?vperm		v25,v31,v26,$keyperm
   1501	lvx		v28,$x30,$key
   1502	stvx		v25,$x10,$key_		# off-load round[4]
   1503	addi		$key_,$sp,$FRAME+15	# rewind $key_
   1504	?vperm		v26,v26,v27,$keyperm
   1505	lvx		v29,$x40,$key
   1506	?vperm		v27,v27,v28,$keyperm
   1507	lvx		v30,$x50,$key
   1508	?vperm		v28,v28,v29,$keyperm
   1509	lvx		v31,$x60,$key
   1510	?vperm		v29,v29,v30,$keyperm
   1511	lvx		$out0,$x70,$key		# borrow $out0
   1512	?vperm		v30,v30,v31,$keyperm
   1513	lvx		v24,$x00,$key_		# pre-load round[1]
   1514	?vperm		v31,v31,$out0,$keyperm
   1515	lvx		v25,$x10,$key_		# pre-load round[2]
   1516
   1517	vadduqm		$two,$one,$one
   1518	subi		$inp,$inp,15		# undo "caller"
   1519	$SHL		$len,$len,4
   1520
   1521	vadduqm		$out1,$ivec,$one	# counter values ...
   1522	vadduqm		$out2,$ivec,$two	# (do all ctr adds as 128-bit)
   1523	vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
   1524	 le?li		$idx,8
   1525	vadduqm		$out3,$out1,$two
   1526	vxor		$out1,$out1,$rndkey0
   1527	 le?lvsl	$inpperm,0,$idx
   1528	vadduqm		$out4,$out2,$two
   1529	vxor		$out2,$out2,$rndkey0
   1530	 le?vspltisb	$tmp,0x0f
   1531	vadduqm		$out5,$out3,$two
   1532	vxor		$out3,$out3,$rndkey0
   1533	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
   1534	vadduqm		$out6,$out4,$two
   1535	vxor		$out4,$out4,$rndkey0
   1536	vadduqm		$out7,$out5,$two
   1537	vxor		$out5,$out5,$rndkey0
   1538	vadduqm		$ivec,$out6,$two	# next counter value
   1539	vxor		$out6,$out6,$rndkey0
   1540	vxor		$out7,$out7,$rndkey0
   1541
   1542	mtctr		$rounds
   1543	b		Loop_ctr32_enc8x
   1544.align	5
   1545Loop_ctr32_enc8x:
   1546	vcipher 	$out0,$out0,v24
   1547	vcipher 	$out1,$out1,v24
   1548	vcipher 	$out2,$out2,v24
   1549	vcipher 	$out3,$out3,v24
   1550	vcipher 	$out4,$out4,v24
   1551	vcipher 	$out5,$out5,v24
   1552	vcipher 	$out6,$out6,v24
   1553	vcipher 	$out7,$out7,v24
   1554Loop_ctr32_enc8x_middle:
   1555	lvx		v24,$x20,$key_		# round[3]
   1556	addi		$key_,$key_,0x20
   1557
   1558	vcipher 	$out0,$out0,v25
   1559	vcipher 	$out1,$out1,v25
   1560	vcipher 	$out2,$out2,v25
   1561	vcipher 	$out3,$out3,v25
   1562	vcipher 	$out4,$out4,v25
   1563	vcipher 	$out5,$out5,v25
   1564	vcipher 	$out6,$out6,v25
   1565	vcipher 	$out7,$out7,v25
   1566	lvx		v25,$x10,$key_		# round[4]
   1567	bdnz		Loop_ctr32_enc8x
   1568
   1569	subic		r11,$len,256		# $len-256, borrow $key_
   1570	vcipher 	$out0,$out0,v24
   1571	vcipher 	$out1,$out1,v24
   1572	vcipher 	$out2,$out2,v24
   1573	vcipher 	$out3,$out3,v24
   1574	vcipher 	$out4,$out4,v24
   1575	vcipher 	$out5,$out5,v24
   1576	vcipher 	$out6,$out6,v24
   1577	vcipher 	$out7,$out7,v24
   1578
   1579	subfe		r0,r0,r0		# borrow?-1:0
   1580	vcipher 	$out0,$out0,v25
   1581	vcipher 	$out1,$out1,v25
   1582	vcipher 	$out2,$out2,v25
   1583	vcipher 	$out3,$out3,v25
   1584	vcipher 	$out4,$out4,v25
   1585	vcipher		$out5,$out5,v25
   1586	vcipher		$out6,$out6,v25
   1587	vcipher		$out7,$out7,v25
   1588
   1589	and		r0,r0,r11
   1590	addi		$key_,$sp,$FRAME+15	# rewind $key_
   1591	vcipher		$out0,$out0,v26
   1592	vcipher		$out1,$out1,v26
   1593	vcipher		$out2,$out2,v26
   1594	vcipher		$out3,$out3,v26
   1595	vcipher		$out4,$out4,v26
   1596	vcipher		$out5,$out5,v26
   1597	vcipher		$out6,$out6,v26
   1598	vcipher		$out7,$out7,v26
   1599	lvx		v24,$x00,$key_		# re-pre-load round[1]
   1600
   1601	subic		$len,$len,129		# $len-=129
   1602	vcipher		$out0,$out0,v27
   1603	addi		$len,$len,1		# $len-=128 really
   1604	vcipher		$out1,$out1,v27
   1605	vcipher		$out2,$out2,v27
   1606	vcipher		$out3,$out3,v27
   1607	vcipher		$out4,$out4,v27
   1608	vcipher		$out5,$out5,v27
   1609	vcipher		$out6,$out6,v27
   1610	vcipher		$out7,$out7,v27
   1611	lvx		v25,$x10,$key_		# re-pre-load round[2]
   1612
   1613	vcipher		$out0,$out0,v28
   1614	 lvx_u		$in0,$x00,$inp		# load input
   1615	vcipher		$out1,$out1,v28
   1616	 lvx_u		$in1,$x10,$inp
   1617	vcipher		$out2,$out2,v28
   1618	 lvx_u		$in2,$x20,$inp
   1619	vcipher		$out3,$out3,v28
   1620	 lvx_u		$in3,$x30,$inp
   1621	vcipher		$out4,$out4,v28
   1622	 lvx_u		$in4,$x40,$inp
   1623	vcipher		$out5,$out5,v28
   1624	 lvx_u		$in5,$x50,$inp
   1625	vcipher		$out6,$out6,v28
   1626	 lvx_u		$in6,$x60,$inp
   1627	vcipher		$out7,$out7,v28
   1628	 lvx_u		$in7,$x70,$inp
   1629	 addi		$inp,$inp,0x80
   1630
   1631	vcipher		$out0,$out0,v29
   1632	 le?vperm	$in0,$in0,$in0,$inpperm
   1633	vcipher		$out1,$out1,v29
   1634	 le?vperm	$in1,$in1,$in1,$inpperm
   1635	vcipher		$out2,$out2,v29
   1636	 le?vperm	$in2,$in2,$in2,$inpperm
   1637	vcipher		$out3,$out3,v29
   1638	 le?vperm	$in3,$in3,$in3,$inpperm
   1639	vcipher		$out4,$out4,v29
   1640	 le?vperm	$in4,$in4,$in4,$inpperm
   1641	vcipher		$out5,$out5,v29
   1642	 le?vperm	$in5,$in5,$in5,$inpperm
   1643	vcipher		$out6,$out6,v29
   1644	 le?vperm	$in6,$in6,$in6,$inpperm
   1645	vcipher		$out7,$out7,v29
   1646	 le?vperm	$in7,$in7,$in7,$inpperm
   1647
   1648	add		$inp,$inp,r0		# $inp is adjusted in such
   1649						# way that at exit from the
   1650						# loop inX-in7 are loaded
   1651						# with last "words"
   1652	subfe.		r0,r0,r0		# borrow?-1:0
   1653	vcipher		$out0,$out0,v30
   1654	 vxor		$in0,$in0,v31		# xor with last round key
   1655	vcipher		$out1,$out1,v30
   1656	 vxor		$in1,$in1,v31
   1657	vcipher		$out2,$out2,v30
   1658	 vxor		$in2,$in2,v31
   1659	vcipher		$out3,$out3,v30
   1660	 vxor		$in3,$in3,v31
   1661	vcipher		$out4,$out4,v30
   1662	 vxor		$in4,$in4,v31
   1663	vcipher		$out5,$out5,v30
   1664	 vxor		$in5,$in5,v31
   1665	vcipher		$out6,$out6,v30
   1666	 vxor		$in6,$in6,v31
   1667	vcipher		$out7,$out7,v30
   1668	 vxor		$in7,$in7,v31
   1669
   1670	bne		Lctr32_enc8x_break	# did $len-129 borrow?
   1671
   1672	vcipherlast	$in0,$out0,$in0
   1673	vcipherlast	$in1,$out1,$in1
   1674	 vadduqm	$out1,$ivec,$one	# counter values ...
   1675	vcipherlast	$in2,$out2,$in2
   1676	 vadduqm	$out2,$ivec,$two
   1677	 vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
   1678	vcipherlast	$in3,$out3,$in3
   1679	 vadduqm	$out3,$out1,$two
   1680	 vxor		$out1,$out1,$rndkey0
   1681	vcipherlast	$in4,$out4,$in4
   1682	 vadduqm	$out4,$out2,$two
   1683	 vxor		$out2,$out2,$rndkey0
   1684	vcipherlast	$in5,$out5,$in5
   1685	 vadduqm	$out5,$out3,$two
   1686	 vxor		$out3,$out3,$rndkey0
   1687	vcipherlast	$in6,$out6,$in6
   1688	 vadduqm	$out6,$out4,$two
   1689	 vxor		$out4,$out4,$rndkey0
   1690	vcipherlast	$in7,$out7,$in7
   1691	 vadduqm	$out7,$out5,$two
   1692	 vxor		$out5,$out5,$rndkey0
   1693	le?vperm	$in0,$in0,$in0,$inpperm
   1694	 vadduqm	$ivec,$out6,$two	# next counter value
   1695	 vxor		$out6,$out6,$rndkey0
   1696	le?vperm	$in1,$in1,$in1,$inpperm
   1697	 vxor		$out7,$out7,$rndkey0
   1698	mtctr		$rounds
   1699
   1700	 vcipher	$out0,$out0,v24
   1701	stvx_u		$in0,$x00,$out
   1702	le?vperm	$in2,$in2,$in2,$inpperm
   1703	 vcipher	$out1,$out1,v24
   1704	stvx_u		$in1,$x10,$out
   1705	le?vperm	$in3,$in3,$in3,$inpperm
   1706	 vcipher	$out2,$out2,v24
   1707	stvx_u		$in2,$x20,$out
   1708	le?vperm	$in4,$in4,$in4,$inpperm
   1709	 vcipher	$out3,$out3,v24
   1710	stvx_u		$in3,$x30,$out
   1711	le?vperm	$in5,$in5,$in5,$inpperm
   1712	 vcipher	$out4,$out4,v24
   1713	stvx_u		$in4,$x40,$out
   1714	le?vperm	$in6,$in6,$in6,$inpperm
   1715	 vcipher	$out5,$out5,v24
   1716	stvx_u		$in5,$x50,$out
   1717	le?vperm	$in7,$in7,$in7,$inpperm
   1718	 vcipher	$out6,$out6,v24
   1719	stvx_u		$in6,$x60,$out
   1720	 vcipher	$out7,$out7,v24
   1721	stvx_u		$in7,$x70,$out
   1722	addi		$out,$out,0x80
   1723
   1724	b		Loop_ctr32_enc8x_middle
   1725
   1726.align	5
   1727Lctr32_enc8x_break:
   1728	cmpwi		$len,-0x60
   1729	blt		Lctr32_enc8x_one
   1730	nop
   1731	beq		Lctr32_enc8x_two
   1732	cmpwi		$len,-0x40
   1733	blt		Lctr32_enc8x_three
   1734	nop
   1735	beq		Lctr32_enc8x_four
   1736	cmpwi		$len,-0x20
   1737	blt		Lctr32_enc8x_five
   1738	nop
   1739	beq		Lctr32_enc8x_six
   1740	cmpwi		$len,0x00
   1741	blt		Lctr32_enc8x_seven
   1742
   1743Lctr32_enc8x_eight:
   1744	vcipherlast	$out0,$out0,$in0
   1745	vcipherlast	$out1,$out1,$in1
   1746	vcipherlast	$out2,$out2,$in2
   1747	vcipherlast	$out3,$out3,$in3
   1748	vcipherlast	$out4,$out4,$in4
   1749	vcipherlast	$out5,$out5,$in5
   1750	vcipherlast	$out6,$out6,$in6
   1751	vcipherlast	$out7,$out7,$in7
   1752
   1753	le?vperm	$out0,$out0,$out0,$inpperm
   1754	le?vperm	$out1,$out1,$out1,$inpperm
   1755	stvx_u		$out0,$x00,$out
   1756	le?vperm	$out2,$out2,$out2,$inpperm
   1757	stvx_u		$out1,$x10,$out
   1758	le?vperm	$out3,$out3,$out3,$inpperm
   1759	stvx_u		$out2,$x20,$out
   1760	le?vperm	$out4,$out4,$out4,$inpperm
   1761	stvx_u		$out3,$x30,$out
   1762	le?vperm	$out5,$out5,$out5,$inpperm
   1763	stvx_u		$out4,$x40,$out
   1764	le?vperm	$out6,$out6,$out6,$inpperm
   1765	stvx_u		$out5,$x50,$out
   1766	le?vperm	$out7,$out7,$out7,$inpperm
   1767	stvx_u		$out6,$x60,$out
   1768	stvx_u		$out7,$x70,$out
   1769	addi		$out,$out,0x80
   1770	b		Lctr32_enc8x_done
   1771
   1772.align	5
   1773Lctr32_enc8x_seven:
   1774	vcipherlast	$out0,$out0,$in1
   1775	vcipherlast	$out1,$out1,$in2
   1776	vcipherlast	$out2,$out2,$in3
   1777	vcipherlast	$out3,$out3,$in4
   1778	vcipherlast	$out4,$out4,$in5
   1779	vcipherlast	$out5,$out5,$in6
   1780	vcipherlast	$out6,$out6,$in7
   1781
   1782	le?vperm	$out0,$out0,$out0,$inpperm
   1783	le?vperm	$out1,$out1,$out1,$inpperm
   1784	stvx_u		$out0,$x00,$out
   1785	le?vperm	$out2,$out2,$out2,$inpperm
   1786	stvx_u		$out1,$x10,$out
   1787	le?vperm	$out3,$out3,$out3,$inpperm
   1788	stvx_u		$out2,$x20,$out
   1789	le?vperm	$out4,$out4,$out4,$inpperm
   1790	stvx_u		$out3,$x30,$out
   1791	le?vperm	$out5,$out5,$out5,$inpperm
   1792	stvx_u		$out4,$x40,$out
   1793	le?vperm	$out6,$out6,$out6,$inpperm
   1794	stvx_u		$out5,$x50,$out
   1795	stvx_u		$out6,$x60,$out
   1796	addi		$out,$out,0x70
   1797	b		Lctr32_enc8x_done
   1798
   1799.align	5
   1800Lctr32_enc8x_six:
   1801	vcipherlast	$out0,$out0,$in2
   1802	vcipherlast	$out1,$out1,$in3
   1803	vcipherlast	$out2,$out2,$in4
   1804	vcipherlast	$out3,$out3,$in5
   1805	vcipherlast	$out4,$out4,$in6
   1806	vcipherlast	$out5,$out5,$in7
   1807
   1808	le?vperm	$out0,$out0,$out0,$inpperm
   1809	le?vperm	$out1,$out1,$out1,$inpperm
   1810	stvx_u		$out0,$x00,$out
   1811	le?vperm	$out2,$out2,$out2,$inpperm
   1812	stvx_u		$out1,$x10,$out
   1813	le?vperm	$out3,$out3,$out3,$inpperm
   1814	stvx_u		$out2,$x20,$out
   1815	le?vperm	$out4,$out4,$out4,$inpperm
   1816	stvx_u		$out3,$x30,$out
   1817	le?vperm	$out5,$out5,$out5,$inpperm
   1818	stvx_u		$out4,$x40,$out
   1819	stvx_u		$out5,$x50,$out
   1820	addi		$out,$out,0x60
   1821	b		Lctr32_enc8x_done
   1822
   1823.align	5
   1824Lctr32_enc8x_five:
   1825	vcipherlast	$out0,$out0,$in3
   1826	vcipherlast	$out1,$out1,$in4
   1827	vcipherlast	$out2,$out2,$in5
   1828	vcipherlast	$out3,$out3,$in6
   1829	vcipherlast	$out4,$out4,$in7
   1830
   1831	le?vperm	$out0,$out0,$out0,$inpperm
   1832	le?vperm	$out1,$out1,$out1,$inpperm
   1833	stvx_u		$out0,$x00,$out
   1834	le?vperm	$out2,$out2,$out2,$inpperm
   1835	stvx_u		$out1,$x10,$out
   1836	le?vperm	$out3,$out3,$out3,$inpperm
   1837	stvx_u		$out2,$x20,$out
   1838	le?vperm	$out4,$out4,$out4,$inpperm
   1839	stvx_u		$out3,$x30,$out
   1840	stvx_u		$out4,$x40,$out
   1841	addi		$out,$out,0x50
   1842	b		Lctr32_enc8x_done
   1843
   1844.align	5
   1845Lctr32_enc8x_four:
   1846	vcipherlast	$out0,$out0,$in4
   1847	vcipherlast	$out1,$out1,$in5
   1848	vcipherlast	$out2,$out2,$in6
   1849	vcipherlast	$out3,$out3,$in7
   1850
   1851	le?vperm	$out0,$out0,$out0,$inpperm
   1852	le?vperm	$out1,$out1,$out1,$inpperm
   1853	stvx_u		$out0,$x00,$out
   1854	le?vperm	$out2,$out2,$out2,$inpperm
   1855	stvx_u		$out1,$x10,$out
   1856	le?vperm	$out3,$out3,$out3,$inpperm
   1857	stvx_u		$out2,$x20,$out
   1858	stvx_u		$out3,$x30,$out
   1859	addi		$out,$out,0x40
   1860	b		Lctr32_enc8x_done
   1861
   1862.align	5
   1863Lctr32_enc8x_three:
   1864	vcipherlast	$out0,$out0,$in5
   1865	vcipherlast	$out1,$out1,$in6
   1866	vcipherlast	$out2,$out2,$in7
   1867
   1868	le?vperm	$out0,$out0,$out0,$inpperm
   1869	le?vperm	$out1,$out1,$out1,$inpperm
   1870	stvx_u		$out0,$x00,$out
   1871	le?vperm	$out2,$out2,$out2,$inpperm
   1872	stvx_u		$out1,$x10,$out
   1873	stvx_u		$out2,$x20,$out
   1874	addi		$out,$out,0x30
   1875	b		Lctr32_enc8x_done
   1876
   1877.align	5
   1878Lctr32_enc8x_two:
   1879	vcipherlast	$out0,$out0,$in6
   1880	vcipherlast	$out1,$out1,$in7
   1881
   1882	le?vperm	$out0,$out0,$out0,$inpperm
   1883	le?vperm	$out1,$out1,$out1,$inpperm
   1884	stvx_u		$out0,$x00,$out
   1885	stvx_u		$out1,$x10,$out
   1886	addi		$out,$out,0x20
   1887	b		Lctr32_enc8x_done
   1888
   1889.align	5
   1890Lctr32_enc8x_one:
   1891	vcipherlast	$out0,$out0,$in7
   1892
   1893	le?vperm	$out0,$out0,$out0,$inpperm
   1894	stvx_u		$out0,0,$out
   1895	addi		$out,$out,0x10
   1896
   1897Lctr32_enc8x_done:
   1898	li		r10,`$FRAME+15`
   1899	li		r11,`$FRAME+31`
   1900	stvx		$inpperm,r10,$sp	# wipe copies of round keys
   1901	addi		r10,r10,32
   1902	stvx		$inpperm,r11,$sp
   1903	addi		r11,r11,32
   1904	stvx		$inpperm,r10,$sp
   1905	addi		r10,r10,32
   1906	stvx		$inpperm,r11,$sp
   1907	addi		r11,r11,32
   1908	stvx		$inpperm,r10,$sp
   1909	addi		r10,r10,32
   1910	stvx		$inpperm,r11,$sp
   1911	addi		r11,r11,32
   1912	stvx		$inpperm,r10,$sp
   1913	addi		r10,r10,32
   1914	stvx		$inpperm,r11,$sp
   1915	addi		r11,r11,32
   1916
   1917	mtspr		256,$vrsave
   1918	lvx		v20,r10,$sp		# ABI says so
   1919	addi		r10,r10,32
   1920	lvx		v21,r11,$sp
   1921	addi		r11,r11,32
   1922	lvx		v22,r10,$sp
   1923	addi		r10,r10,32
   1924	lvx		v23,r11,$sp
   1925	addi		r11,r11,32
   1926	lvx		v24,r10,$sp
   1927	addi		r10,r10,32
   1928	lvx		v25,r11,$sp
   1929	addi		r11,r11,32
   1930	lvx		v26,r10,$sp
   1931	addi		r10,r10,32
   1932	lvx		v27,r11,$sp
   1933	addi		r11,r11,32
   1934	lvx		v28,r10,$sp
   1935	addi		r10,r10,32
   1936	lvx		v29,r11,$sp
   1937	addi		r11,r11,32
   1938	lvx		v30,r10,$sp
   1939	lvx		v31,r11,$sp
   1940	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
   1941	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
   1942	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
   1943	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
   1944	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
   1945	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
   1946	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
   1947	blr
   1948	.long		0
   1949	.byte		0,12,0x14,0,0x80,6,6,0
   1950	.long		0
   1951.size	.${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
   1952___
   1953}}	}}}
   1954
   1955#########################################################################
   1956{{{	# XTS procedures						#
   1957# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,	#
   1958#                             const AES_KEY *key1, const AES_KEY *key2,	#
   1959#                             [const] unsigned char iv[16]);		#
   1960# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which	#
   1961# input tweak value is assumed to be encrypted already, and last tweak	#
   1962# value, one suitable for consecutive call on same chunk of data, is	#
   1963# written back to original buffer. In addition, in "tweak chaining"	#
   1964# mode only complete input blocks are processed.			#
   1965
   1966my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =	map("r$_",(3..10));
   1967my ($rndkey0,$rndkey1,$inout) =				map("v$_",(0..2));
   1968my ($output,$inptail,$inpperm,$leperm,$keyperm) =	map("v$_",(3..7));
   1969my ($tweak,$seven,$eighty7,$tmp,$tweak1) =		map("v$_",(8..12));
   1970my $taillen = $key2;
   1971
   1972   ($inp,$idx) = ($idx,$inp);				# reassign
   1973
   1974$code.=<<___;
   1975.globl	.${prefix}_xts_encrypt
   1976	mr		$inp,r3				# reassign
   1977	li		r3,-1
   1978	${UCMP}i	$len,16
   1979	bltlr-
   1980
   1981	lis		r0,0xfff0
   1982	mfspr		r12,256				# save vrsave
   1983	li		r11,0
   1984	mtspr		256,r0
   1985
   1986	vspltisb	$seven,0x07			# 0x070707..07
   1987	le?lvsl		$leperm,r11,r11
   1988	le?vspltisb	$tmp,0x0f
   1989	le?vxor		$leperm,$leperm,$seven
   1990
   1991	li		$idx,15
   1992	lvx		$tweak,0,$ivp			# load [unaligned] iv
   1993	lvsl		$inpperm,0,$ivp
   1994	lvx		$inptail,$idx,$ivp
   1995	le?vxor		$inpperm,$inpperm,$tmp
   1996	vperm		$tweak,$tweak,$inptail,$inpperm
   1997
   1998	neg		r11,$inp
   1999	lvsr		$inpperm,0,r11			# prepare for unaligned load
   2000	lvx		$inout,0,$inp
   2001	addi		$inp,$inp,15			# 15 is not typo
   2002	le?vxor		$inpperm,$inpperm,$tmp
   2003
   2004	${UCMP}i	$key2,0				# key2==NULL?
   2005	beq		Lxts_enc_no_key2
   2006
   2007	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
   2008	lwz		$rounds,240($key2)
   2009	srwi		$rounds,$rounds,1
   2010	subi		$rounds,$rounds,1
   2011	li		$idx,16
   2012
   2013	lvx		$rndkey0,0,$key2
   2014	lvx		$rndkey1,$idx,$key2
   2015	addi		$idx,$idx,16
   2016	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2017	vxor		$tweak,$tweak,$rndkey0
   2018	lvx		$rndkey0,$idx,$key2
   2019	addi		$idx,$idx,16
   2020	mtctr		$rounds
   2021
   2022Ltweak_xts_enc:
   2023	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2024	vcipher		$tweak,$tweak,$rndkey1
   2025	lvx		$rndkey1,$idx,$key2
   2026	addi		$idx,$idx,16
   2027	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2028	vcipher		$tweak,$tweak,$rndkey0
   2029	lvx		$rndkey0,$idx,$key2
   2030	addi		$idx,$idx,16
   2031	bdnz		Ltweak_xts_enc
   2032
   2033	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2034	vcipher		$tweak,$tweak,$rndkey1
   2035	lvx		$rndkey1,$idx,$key2
   2036	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2037	vcipherlast	$tweak,$tweak,$rndkey0
   2038
   2039	li		$ivp,0				# don't chain the tweak
   2040	b		Lxts_enc
   2041
   2042Lxts_enc_no_key2:
   2043	li		$idx,-16
   2044	and		$len,$len,$idx			# in "tweak chaining"
   2045							# mode only complete
   2046							# blocks are processed
   2047Lxts_enc:
   2048	lvx		$inptail,0,$inp
   2049	addi		$inp,$inp,16
   2050
   2051	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
   2052	lwz		$rounds,240($key1)
   2053	srwi		$rounds,$rounds,1
   2054	subi		$rounds,$rounds,1
   2055	li		$idx,16
   2056
   2057	vslb		$eighty7,$seven,$seven		# 0x808080..80
   2058	vor		$eighty7,$eighty7,$seven	# 0x878787..87
   2059	vspltisb	$tmp,1				# 0x010101..01
   2060	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
   2061
   2062	${UCMP}i	$len,96
   2063	bge		_aesp8_xts_encrypt6x
   2064
   2065	andi.		$taillen,$len,15
   2066	subic		r0,$len,32
   2067	subi		$taillen,$taillen,16
   2068	subfe		r0,r0,r0
   2069	and		r0,r0,$taillen
   2070	add		$inp,$inp,r0
   2071
   2072	lvx		$rndkey0,0,$key1
   2073	lvx		$rndkey1,$idx,$key1
   2074	addi		$idx,$idx,16
   2075	vperm		$inout,$inout,$inptail,$inpperm
   2076	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2077	vxor		$inout,$inout,$tweak
   2078	vxor		$inout,$inout,$rndkey0
   2079	lvx		$rndkey0,$idx,$key1
   2080	addi		$idx,$idx,16
   2081	mtctr		$rounds
   2082	b		Loop_xts_enc
   2083
   2084.align	5
   2085Loop_xts_enc:
   2086	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2087	vcipher		$inout,$inout,$rndkey1
   2088	lvx		$rndkey1,$idx,$key1
   2089	addi		$idx,$idx,16
   2090	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2091	vcipher		$inout,$inout,$rndkey0
   2092	lvx		$rndkey0,$idx,$key1
   2093	addi		$idx,$idx,16
   2094	bdnz		Loop_xts_enc
   2095
   2096	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2097	vcipher		$inout,$inout,$rndkey1
   2098	lvx		$rndkey1,$idx,$key1
   2099	li		$idx,16
   2100	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2101	vxor		$rndkey0,$rndkey0,$tweak
   2102	vcipherlast	$output,$inout,$rndkey0
   2103
   2104	le?vperm	$tmp,$output,$output,$leperm
   2105	be?nop
   2106	le?stvx_u	$tmp,0,$out
   2107	be?stvx_u	$output,0,$out
   2108	addi		$out,$out,16
   2109
   2110	subic.		$len,$len,16
   2111	beq		Lxts_enc_done
   2112
   2113	vmr		$inout,$inptail
   2114	lvx		$inptail,0,$inp
   2115	addi		$inp,$inp,16
   2116	lvx		$rndkey0,0,$key1
   2117	lvx		$rndkey1,$idx,$key1
   2118	addi		$idx,$idx,16
   2119
   2120	subic		r0,$len,32
   2121	subfe		r0,r0,r0
   2122	and		r0,r0,$taillen
   2123	add		$inp,$inp,r0
   2124
   2125	vsrab		$tmp,$tweak,$seven		# next tweak value
   2126	vaddubm		$tweak,$tweak,$tweak
   2127	vsldoi		$tmp,$tmp,$tmp,15
   2128	vand		$tmp,$tmp,$eighty7
   2129	vxor		$tweak,$tweak,$tmp
   2130
   2131	vperm		$inout,$inout,$inptail,$inpperm
   2132	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2133	vxor		$inout,$inout,$tweak
   2134	vxor		$output,$output,$rndkey0	# just in case $len<16
   2135	vxor		$inout,$inout,$rndkey0
   2136	lvx		$rndkey0,$idx,$key1
   2137	addi		$idx,$idx,16
   2138
   2139	mtctr		$rounds
   2140	${UCMP}i	$len,16
   2141	bge		Loop_xts_enc
   2142
   2143	vxor		$output,$output,$tweak
   2144	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
   2145	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
   2146	vspltisb	$tmp,-1
   2147	vperm		$inptail,$inptail,$tmp,$inpperm
   2148	vsel		$inout,$inout,$output,$inptail
   2149
   2150	subi		r11,$out,17
   2151	subi		$out,$out,16
   2152	mtctr		$len
   2153	li		$len,16
   2154Loop_xts_enc_steal:
   2155	lbzu		r0,1(r11)
   2156	stb		r0,16(r11)
   2157	bdnz		Loop_xts_enc_steal
   2158
   2159	mtctr		$rounds
   2160	b		Loop_xts_enc			# one more time...
   2161
   2162Lxts_enc_done:
   2163	${UCMP}i	$ivp,0
   2164	beq		Lxts_enc_ret
   2165
   2166	vsrab		$tmp,$tweak,$seven		# next tweak value
   2167	vaddubm		$tweak,$tweak,$tweak
   2168	vsldoi		$tmp,$tmp,$tmp,15
   2169	vand		$tmp,$tmp,$eighty7
   2170	vxor		$tweak,$tweak,$tmp
   2171
   2172	le?vperm	$tweak,$tweak,$tweak,$leperm
   2173	stvx_u		$tweak,0,$ivp
   2174
   2175Lxts_enc_ret:
   2176	mtspr		256,r12				# restore vrsave
   2177	li		r3,0
   2178	blr
   2179	.long		0
   2180	.byte		0,12,0x04,0,0x80,6,6,0
   2181	.long		0
   2182.size	.${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
   2183
   2184.globl	.${prefix}_xts_decrypt
   2185	mr		$inp,r3				# reassign
   2186	li		r3,-1
   2187	${UCMP}i	$len,16
   2188	bltlr-
   2189
   2190	lis		r0,0xfff8
   2191	mfspr		r12,256				# save vrsave
   2192	li		r11,0
   2193	mtspr		256,r0
   2194
   2195	andi.		r0,$len,15
   2196	neg		r0,r0
   2197	andi.		r0,r0,16
   2198	sub		$len,$len,r0
   2199
   2200	vspltisb	$seven,0x07			# 0x070707..07
   2201	le?lvsl		$leperm,r11,r11
   2202	le?vspltisb	$tmp,0x0f
   2203	le?vxor		$leperm,$leperm,$seven
   2204
   2205	li		$idx,15
   2206	lvx		$tweak,0,$ivp			# load [unaligned] iv
   2207	lvsl		$inpperm,0,$ivp
   2208	lvx		$inptail,$idx,$ivp
   2209	le?vxor		$inpperm,$inpperm,$tmp
   2210	vperm		$tweak,$tweak,$inptail,$inpperm
   2211
   2212	neg		r11,$inp
   2213	lvsr		$inpperm,0,r11			# prepare for unaligned load
   2214	lvx		$inout,0,$inp
   2215	addi		$inp,$inp,15			# 15 is not typo
   2216	le?vxor		$inpperm,$inpperm,$tmp
   2217
   2218	${UCMP}i	$key2,0				# key2==NULL?
   2219	beq		Lxts_dec_no_key2
   2220
   2221	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
   2222	lwz		$rounds,240($key2)
   2223	srwi		$rounds,$rounds,1
   2224	subi		$rounds,$rounds,1
   2225	li		$idx,16
   2226
   2227	lvx		$rndkey0,0,$key2
   2228	lvx		$rndkey1,$idx,$key2
   2229	addi		$idx,$idx,16
   2230	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2231	vxor		$tweak,$tweak,$rndkey0
   2232	lvx		$rndkey0,$idx,$key2
   2233	addi		$idx,$idx,16
   2234	mtctr		$rounds
   2235
   2236Ltweak_xts_dec:
   2237	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2238	vcipher		$tweak,$tweak,$rndkey1
   2239	lvx		$rndkey1,$idx,$key2
   2240	addi		$idx,$idx,16
   2241	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2242	vcipher		$tweak,$tweak,$rndkey0
   2243	lvx		$rndkey0,$idx,$key2
   2244	addi		$idx,$idx,16
   2245	bdnz		Ltweak_xts_dec
   2246
   2247	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2248	vcipher		$tweak,$tweak,$rndkey1
   2249	lvx		$rndkey1,$idx,$key2
   2250	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2251	vcipherlast	$tweak,$tweak,$rndkey0
   2252
   2253	li		$ivp,0				# don't chain the tweak
   2254	b		Lxts_dec
   2255
   2256Lxts_dec_no_key2:
   2257	neg		$idx,$len
   2258	andi.		$idx,$idx,15
   2259	add		$len,$len,$idx			# in "tweak chaining"
   2260							# mode only complete
   2261							# blocks are processed
   2262Lxts_dec:
   2263	lvx		$inptail,0,$inp
   2264	addi		$inp,$inp,16
   2265
   2266	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
   2267	lwz		$rounds,240($key1)
   2268	srwi		$rounds,$rounds,1
   2269	subi		$rounds,$rounds,1
   2270	li		$idx,16
   2271
   2272	vslb		$eighty7,$seven,$seven		# 0x808080..80
   2273	vor		$eighty7,$eighty7,$seven	# 0x878787..87
   2274	vspltisb	$tmp,1				# 0x010101..01
   2275	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
   2276
   2277	${UCMP}i	$len,96
   2278	bge		_aesp8_xts_decrypt6x
   2279
   2280	lvx		$rndkey0,0,$key1
   2281	lvx		$rndkey1,$idx,$key1
   2282	addi		$idx,$idx,16
   2283	vperm		$inout,$inout,$inptail,$inpperm
   2284	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2285	vxor		$inout,$inout,$tweak
   2286	vxor		$inout,$inout,$rndkey0
   2287	lvx		$rndkey0,$idx,$key1
   2288	addi		$idx,$idx,16
   2289	mtctr		$rounds
   2290
   2291	${UCMP}i	$len,16
   2292	blt		Ltail_xts_dec
   2293	be?b		Loop_xts_dec
   2294
   2295.align	5
   2296Loop_xts_dec:
   2297	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2298	vncipher	$inout,$inout,$rndkey1
   2299	lvx		$rndkey1,$idx,$key1
   2300	addi		$idx,$idx,16
   2301	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2302	vncipher	$inout,$inout,$rndkey0
   2303	lvx		$rndkey0,$idx,$key1
   2304	addi		$idx,$idx,16
   2305	bdnz		Loop_xts_dec
   2306
   2307	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2308	vncipher	$inout,$inout,$rndkey1
   2309	lvx		$rndkey1,$idx,$key1
   2310	li		$idx,16
   2311	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2312	vxor		$rndkey0,$rndkey0,$tweak
   2313	vncipherlast	$output,$inout,$rndkey0
   2314
   2315	le?vperm	$tmp,$output,$output,$leperm
   2316	be?nop
   2317	le?stvx_u	$tmp,0,$out
   2318	be?stvx_u	$output,0,$out
   2319	addi		$out,$out,16
   2320
   2321	subic.		$len,$len,16
   2322	beq		Lxts_dec_done
   2323
   2324	vmr		$inout,$inptail
   2325	lvx		$inptail,0,$inp
   2326	addi		$inp,$inp,16
   2327	lvx		$rndkey0,0,$key1
   2328	lvx		$rndkey1,$idx,$key1
   2329	addi		$idx,$idx,16
   2330
   2331	vsrab		$tmp,$tweak,$seven		# next tweak value
   2332	vaddubm		$tweak,$tweak,$tweak
   2333	vsldoi		$tmp,$tmp,$tmp,15
   2334	vand		$tmp,$tmp,$eighty7
   2335	vxor		$tweak,$tweak,$tmp
   2336
   2337	vperm		$inout,$inout,$inptail,$inpperm
   2338	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2339	vxor		$inout,$inout,$tweak
   2340	vxor		$inout,$inout,$rndkey0
   2341	lvx		$rndkey0,$idx,$key1
   2342	addi		$idx,$idx,16
   2343
   2344	mtctr		$rounds
   2345	${UCMP}i	$len,16
   2346	bge		Loop_xts_dec
   2347
   2348Ltail_xts_dec:
   2349	vsrab		$tmp,$tweak,$seven		# next tweak value
   2350	vaddubm		$tweak1,$tweak,$tweak
   2351	vsldoi		$tmp,$tmp,$tmp,15
   2352	vand		$tmp,$tmp,$eighty7
   2353	vxor		$tweak1,$tweak1,$tmp
   2354
   2355	subi		$inp,$inp,16
   2356	add		$inp,$inp,$len
   2357
   2358	vxor		$inout,$inout,$tweak		# :-(
   2359	vxor		$inout,$inout,$tweak1		# :-)
   2360
   2361Loop_xts_dec_short:
   2362	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2363	vncipher	$inout,$inout,$rndkey1
   2364	lvx		$rndkey1,$idx,$key1
   2365	addi		$idx,$idx,16
   2366	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2367	vncipher	$inout,$inout,$rndkey0
   2368	lvx		$rndkey0,$idx,$key1
   2369	addi		$idx,$idx,16
   2370	bdnz		Loop_xts_dec_short
   2371
   2372	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2373	vncipher	$inout,$inout,$rndkey1
   2374	lvx		$rndkey1,$idx,$key1
   2375	li		$idx,16
   2376	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2377	vxor		$rndkey0,$rndkey0,$tweak1
   2378	vncipherlast	$output,$inout,$rndkey0
   2379
   2380	le?vperm	$tmp,$output,$output,$leperm
   2381	be?nop
   2382	le?stvx_u	$tmp,0,$out
   2383	be?stvx_u	$output,0,$out
   2384
   2385	vmr		$inout,$inptail
   2386	lvx		$inptail,0,$inp
   2387	#addi		$inp,$inp,16
   2388	lvx		$rndkey0,0,$key1
   2389	lvx		$rndkey1,$idx,$key1
   2390	addi		$idx,$idx,16
   2391	vperm		$inout,$inout,$inptail,$inpperm
   2392	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2393
   2394	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
   2395	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
   2396	vspltisb	$tmp,-1
   2397	vperm		$inptail,$inptail,$tmp,$inpperm
   2398	vsel		$inout,$inout,$output,$inptail
   2399
   2400	vxor		$rndkey0,$rndkey0,$tweak
   2401	vxor		$inout,$inout,$rndkey0
   2402	lvx		$rndkey0,$idx,$key1
   2403	addi		$idx,$idx,16
   2404
   2405	subi		r11,$out,1
   2406	mtctr		$len
   2407	li		$len,16
   2408Loop_xts_dec_steal:
   2409	lbzu		r0,1(r11)
   2410	stb		r0,16(r11)
   2411	bdnz		Loop_xts_dec_steal
   2412
   2413	mtctr		$rounds
   2414	b		Loop_xts_dec			# one more time...
   2415
   2416Lxts_dec_done:
   2417	${UCMP}i	$ivp,0
   2418	beq		Lxts_dec_ret
   2419
   2420	vsrab		$tmp,$tweak,$seven		# next tweak value
   2421	vaddubm		$tweak,$tweak,$tweak
   2422	vsldoi		$tmp,$tmp,$tmp,15
   2423	vand		$tmp,$tmp,$eighty7
   2424	vxor		$tweak,$tweak,$tmp
   2425
   2426	le?vperm	$tweak,$tweak,$tweak,$leperm
   2427	stvx_u		$tweak,0,$ivp
   2428
   2429Lxts_dec_ret:
   2430	mtspr		256,r12				# restore vrsave
   2431	li		r3,0
   2432	blr
   2433	.long		0
   2434	.byte		0,12,0x04,0,0x80,6,6,0
   2435	.long		0
   2436.size	.${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
   2437___
   2438#########################################################################
   2439{{	# Optimized XTS procedures					#
   2440my $key_=$key2;
   2441my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
   2442    $x00=0 if ($flavour =~ /osx/);
   2443my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
   2444my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
   2445my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
   2446my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
   2447			# v26-v31 last 6 round keys
   2448my ($keyperm)=($out0);	# aliases with "caller", redundant assignment
   2449my $taillen=$x70;
   2450
   2451$code.=<<___;
   2452.align	5
   2453_aesp8_xts_encrypt6x:
   2454	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
   2455	mflr		r11
   2456	li		r7,`$FRAME+8*16+15`
   2457	li		r3,`$FRAME+8*16+31`
   2458	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
   2459	stvx		v20,r7,$sp		# ABI says so
   2460	addi		r7,r7,32
   2461	stvx		v21,r3,$sp
   2462	addi		r3,r3,32
   2463	stvx		v22,r7,$sp
   2464	addi		r7,r7,32
   2465	stvx		v23,r3,$sp
   2466	addi		r3,r3,32
   2467	stvx		v24,r7,$sp
   2468	addi		r7,r7,32
   2469	stvx		v25,r3,$sp
   2470	addi		r3,r3,32
   2471	stvx		v26,r7,$sp
   2472	addi		r7,r7,32
   2473	stvx		v27,r3,$sp
   2474	addi		r3,r3,32
   2475	stvx		v28,r7,$sp
   2476	addi		r7,r7,32
   2477	stvx		v29,r3,$sp
   2478	addi		r3,r3,32
   2479	stvx		v30,r7,$sp
   2480	stvx		v31,r3,$sp
   2481	li		r0,-1
   2482	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
   2483	li		$x10,0x10
   2484	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
   2485	li		$x20,0x20
   2486	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
   2487	li		$x30,0x30
   2488	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
   2489	li		$x40,0x40
   2490	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
   2491	li		$x50,0x50
   2492	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
   2493	li		$x60,0x60
   2494	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
   2495	li		$x70,0x70
   2496	mtspr		256,r0
   2497
   2498	subi		$rounds,$rounds,3	# -4 in total
   2499
   2500	lvx		$rndkey0,$x00,$key1	# load key schedule
   2501	lvx		v30,$x10,$key1
   2502	addi		$key1,$key1,0x20
   2503	lvx		v31,$x00,$key1
   2504	?vperm		$rndkey0,$rndkey0,v30,$keyperm
   2505	addi		$key_,$sp,$FRAME+15
   2506	mtctr		$rounds
   2507
   2508Load_xts_enc_key:
   2509	?vperm		v24,v30,v31,$keyperm
   2510	lvx		v30,$x10,$key1
   2511	addi		$key1,$key1,0x20
   2512	stvx		v24,$x00,$key_		# off-load round[1]
   2513	?vperm		v25,v31,v30,$keyperm
   2514	lvx		v31,$x00,$key1
   2515	stvx		v25,$x10,$key_		# off-load round[2]
   2516	addi		$key_,$key_,0x20
   2517	bdnz		Load_xts_enc_key
   2518
   2519	lvx		v26,$x10,$key1
   2520	?vperm		v24,v30,v31,$keyperm
   2521	lvx		v27,$x20,$key1
   2522	stvx		v24,$x00,$key_		# off-load round[3]
   2523	?vperm		v25,v31,v26,$keyperm
   2524	lvx		v28,$x30,$key1
   2525	stvx		v25,$x10,$key_		# off-load round[4]
   2526	addi		$key_,$sp,$FRAME+15	# rewind $key_
   2527	?vperm		v26,v26,v27,$keyperm
   2528	lvx		v29,$x40,$key1
   2529	?vperm		v27,v27,v28,$keyperm
   2530	lvx		v30,$x50,$key1
   2531	?vperm		v28,v28,v29,$keyperm
   2532	lvx		v31,$x60,$key1
   2533	?vperm		v29,v29,v30,$keyperm
   2534	lvx		$twk5,$x70,$key1	# borrow $twk5
   2535	?vperm		v30,v30,v31,$keyperm
   2536	lvx		v24,$x00,$key_		# pre-load round[1]
   2537	?vperm		v31,v31,$twk5,$keyperm
   2538	lvx		v25,$x10,$key_		# pre-load round[2]
   2539
   2540	 vperm		$in0,$inout,$inptail,$inpperm
   2541	 subi		$inp,$inp,31		# undo "caller"
   2542	vxor		$twk0,$tweak,$rndkey0
   2543	vsrab		$tmp,$tweak,$seven	# next tweak value
   2544	vaddubm		$tweak,$tweak,$tweak
   2545	vsldoi		$tmp,$tmp,$tmp,15
   2546	vand		$tmp,$tmp,$eighty7
   2547	 vxor		$out0,$in0,$twk0
   2548	vxor		$tweak,$tweak,$tmp
   2549
   2550	 lvx_u		$in1,$x10,$inp
   2551	vxor		$twk1,$tweak,$rndkey0
   2552	vsrab		$tmp,$tweak,$seven	# next tweak value
   2553	vaddubm		$tweak,$tweak,$tweak
   2554	vsldoi		$tmp,$tmp,$tmp,15
   2555	 le?vperm	$in1,$in1,$in1,$leperm
   2556	vand		$tmp,$tmp,$eighty7
   2557	 vxor		$out1,$in1,$twk1
   2558	vxor		$tweak,$tweak,$tmp
   2559
   2560	 lvx_u		$in2,$x20,$inp
   2561	 andi.		$taillen,$len,15
   2562	vxor		$twk2,$tweak,$rndkey0
   2563	vsrab		$tmp,$tweak,$seven	# next tweak value
   2564	vaddubm		$tweak,$tweak,$tweak
   2565	vsldoi		$tmp,$tmp,$tmp,15
   2566	 le?vperm	$in2,$in2,$in2,$leperm
   2567	vand		$tmp,$tmp,$eighty7
   2568	 vxor		$out2,$in2,$twk2
   2569	vxor		$tweak,$tweak,$tmp
   2570
   2571	 lvx_u		$in3,$x30,$inp
   2572	 sub		$len,$len,$taillen
   2573	vxor		$twk3,$tweak,$rndkey0
   2574	vsrab		$tmp,$tweak,$seven	# next tweak value
   2575	vaddubm		$tweak,$tweak,$tweak
   2576	vsldoi		$tmp,$tmp,$tmp,15
   2577	 le?vperm	$in3,$in3,$in3,$leperm
   2578	vand		$tmp,$tmp,$eighty7
   2579	 vxor		$out3,$in3,$twk3
   2580	vxor		$tweak,$tweak,$tmp
   2581
   2582	 lvx_u		$in4,$x40,$inp
   2583	 subi		$len,$len,0x60
   2584	vxor		$twk4,$tweak,$rndkey0
   2585	vsrab		$tmp,$tweak,$seven	# next tweak value
   2586	vaddubm		$tweak,$tweak,$tweak
   2587	vsldoi		$tmp,$tmp,$tmp,15
   2588	 le?vperm	$in4,$in4,$in4,$leperm
   2589	vand		$tmp,$tmp,$eighty7
   2590	 vxor		$out4,$in4,$twk4
   2591	vxor		$tweak,$tweak,$tmp
   2592
   2593	 lvx_u		$in5,$x50,$inp
   2594	 addi		$inp,$inp,0x60
   2595	vxor		$twk5,$tweak,$rndkey0
   2596	vsrab		$tmp,$tweak,$seven	# next tweak value
   2597	vaddubm		$tweak,$tweak,$tweak
   2598	vsldoi		$tmp,$tmp,$tmp,15
   2599	 le?vperm	$in5,$in5,$in5,$leperm
   2600	vand		$tmp,$tmp,$eighty7
   2601	 vxor		$out5,$in5,$twk5
   2602	vxor		$tweak,$tweak,$tmp
   2603
   2604	vxor		v31,v31,$rndkey0
   2605	mtctr		$rounds
   2606	b		Loop_xts_enc6x
   2607
   2608.align	5
   2609Loop_xts_enc6x:
   2610	vcipher		$out0,$out0,v24
   2611	vcipher		$out1,$out1,v24
   2612	vcipher		$out2,$out2,v24
   2613	vcipher		$out3,$out3,v24
   2614	vcipher		$out4,$out4,v24
   2615	vcipher		$out5,$out5,v24
   2616	lvx		v24,$x20,$key_		# round[3]
   2617	addi		$key_,$key_,0x20
   2618
   2619	vcipher		$out0,$out0,v25
   2620	vcipher		$out1,$out1,v25
   2621	vcipher		$out2,$out2,v25
   2622	vcipher		$out3,$out3,v25
   2623	vcipher		$out4,$out4,v25
   2624	vcipher		$out5,$out5,v25
   2625	lvx		v25,$x10,$key_		# round[4]
   2626	bdnz		Loop_xts_enc6x
   2627
   2628	subic		$len,$len,96		# $len-=96
   2629	 vxor		$in0,$twk0,v31		# xor with last round key
   2630	vcipher		$out0,$out0,v24
   2631	vcipher		$out1,$out1,v24
   2632	 vsrab		$tmp,$tweak,$seven	# next tweak value
   2633	 vxor		$twk0,$tweak,$rndkey0
   2634	 vaddubm	$tweak,$tweak,$tweak
   2635	vcipher		$out2,$out2,v24
   2636	vcipher		$out3,$out3,v24
   2637	 vsldoi		$tmp,$tmp,$tmp,15
   2638	vcipher		$out4,$out4,v24
   2639	vcipher		$out5,$out5,v24
   2640
   2641	subfe.		r0,r0,r0		# borrow?-1:0
   2642	 vand		$tmp,$tmp,$eighty7
   2643	vcipher		$out0,$out0,v25
   2644	vcipher		$out1,$out1,v25
   2645	 vxor		$tweak,$tweak,$tmp
   2646	vcipher		$out2,$out2,v25
   2647	vcipher		$out3,$out3,v25
   2648	 vxor		$in1,$twk1,v31
   2649	 vsrab		$tmp,$tweak,$seven	# next tweak value
   2650	 vxor		$twk1,$tweak,$rndkey0
   2651	vcipher		$out4,$out4,v25
   2652	vcipher		$out5,$out5,v25
   2653
   2654	and		r0,r0,$len
   2655	 vaddubm	$tweak,$tweak,$tweak
   2656	 vsldoi		$tmp,$tmp,$tmp,15
   2657	vcipher		$out0,$out0,v26
   2658	vcipher		$out1,$out1,v26
   2659	 vand		$tmp,$tmp,$eighty7
   2660	vcipher		$out2,$out2,v26
   2661	vcipher		$out3,$out3,v26
   2662	 vxor		$tweak,$tweak,$tmp
   2663	vcipher		$out4,$out4,v26
   2664	vcipher		$out5,$out5,v26
   2665
   2666	add		$inp,$inp,r0		# $inp is adjusted in such
   2667						# way that at exit from the
   2668						# loop inX-in5 are loaded
   2669						# with last "words"
   2670	 vxor		$in2,$twk2,v31
   2671	 vsrab		$tmp,$tweak,$seven	# next tweak value
   2672	 vxor		$twk2,$tweak,$rndkey0
   2673	 vaddubm	$tweak,$tweak,$tweak
   2674	vcipher		$out0,$out0,v27
   2675	vcipher		$out1,$out1,v27
   2676	 vsldoi		$tmp,$tmp,$tmp,15
   2677	vcipher		$out2,$out2,v27
   2678	vcipher		$out3,$out3,v27
   2679	 vand		$tmp,$tmp,$eighty7
   2680	vcipher		$out4,$out4,v27
   2681	vcipher		$out5,$out5,v27
   2682
   2683	addi		$key_,$sp,$FRAME+15	# rewind $key_
   2684	 vxor		$tweak,$tweak,$tmp
   2685	vcipher		$out0,$out0,v28
   2686	vcipher		$out1,$out1,v28
   2687	 vxor		$in3,$twk3,v31
   2688	 vsrab		$tmp,$tweak,$seven	# next tweak value
   2689	 vxor		$twk3,$tweak,$rndkey0
   2690	vcipher		$out2,$out2,v28
   2691	vcipher		$out3,$out3,v28
   2692	 vaddubm	$tweak,$tweak,$tweak
   2693	 vsldoi		$tmp,$tmp,$tmp,15
   2694	vcipher		$out4,$out4,v28
   2695	vcipher		$out5,$out5,v28
   2696	lvx		v24,$x00,$key_		# re-pre-load round[1]
   2697	 vand		$tmp,$tmp,$eighty7
   2698
   2699	vcipher		$out0,$out0,v29
   2700	vcipher		$out1,$out1,v29
   2701	 vxor		$tweak,$tweak,$tmp
   2702	vcipher		$out2,$out2,v29
   2703	vcipher		$out3,$out3,v29
   2704	 vxor		$in4,$twk4,v31
   2705	 vsrab		$tmp,$tweak,$seven	# next tweak value
   2706	 vxor		$twk4,$tweak,$rndkey0
   2707	vcipher		$out4,$out4,v29
   2708	vcipher		$out5,$out5,v29
   2709	lvx		v25,$x10,$key_		# re-pre-load round[2]
   2710	 vaddubm	$tweak,$tweak,$tweak
   2711	 vsldoi		$tmp,$tmp,$tmp,15
   2712
   2713	vcipher		$out0,$out0,v30
   2714	vcipher		$out1,$out1,v30
   2715	 vand		$tmp,$tmp,$eighty7
   2716	vcipher		$out2,$out2,v30
   2717	vcipher		$out3,$out3,v30
   2718	 vxor		$tweak,$tweak,$tmp
   2719	vcipher		$out4,$out4,v30
   2720	vcipher		$out5,$out5,v30
   2721	 vxor		$in5,$twk5,v31
   2722	 vsrab		$tmp,$tweak,$seven	# next tweak value
   2723	 vxor		$twk5,$tweak,$rndkey0
   2724
   2725	vcipherlast	$out0,$out0,$in0
   2726	 lvx_u		$in0,$x00,$inp		# load next input block
   2727	 vaddubm	$tweak,$tweak,$tweak
   2728	 vsldoi		$tmp,$tmp,$tmp,15
   2729	vcipherlast	$out1,$out1,$in1
   2730	 lvx_u		$in1,$x10,$inp
   2731	vcipherlast	$out2,$out2,$in2
   2732	 le?vperm	$in0,$in0,$in0,$leperm
   2733	 lvx_u		$in2,$x20,$inp
   2734	 vand		$tmp,$tmp,$eighty7
   2735	vcipherlast	$out3,$out3,$in3
   2736	 le?vperm	$in1,$in1,$in1,$leperm
   2737	 lvx_u		$in3,$x30,$inp
   2738	vcipherlast	$out4,$out4,$in4
   2739	 le?vperm	$in2,$in2,$in2,$leperm
   2740	 lvx_u		$in4,$x40,$inp
   2741	 vxor		$tweak,$tweak,$tmp
   2742	vcipherlast	$tmp,$out5,$in5		# last block might be needed
   2743						# in stealing mode
   2744	 le?vperm	$in3,$in3,$in3,$leperm
   2745	 lvx_u		$in5,$x50,$inp
   2746	 addi		$inp,$inp,0x60
   2747	 le?vperm	$in4,$in4,$in4,$leperm
   2748	 le?vperm	$in5,$in5,$in5,$leperm
   2749
   2750	le?vperm	$out0,$out0,$out0,$leperm
   2751	le?vperm	$out1,$out1,$out1,$leperm
   2752	stvx_u		$out0,$x00,$out		# store output
   2753	 vxor		$out0,$in0,$twk0
   2754	le?vperm	$out2,$out2,$out2,$leperm
   2755	stvx_u		$out1,$x10,$out
   2756	 vxor		$out1,$in1,$twk1
   2757	le?vperm	$out3,$out3,$out3,$leperm
   2758	stvx_u		$out2,$x20,$out
   2759	 vxor		$out2,$in2,$twk2
   2760	le?vperm	$out4,$out4,$out4,$leperm
   2761	stvx_u		$out3,$x30,$out
   2762	 vxor		$out3,$in3,$twk3
   2763	le?vperm	$out5,$tmp,$tmp,$leperm
   2764	stvx_u		$out4,$x40,$out
   2765	 vxor		$out4,$in4,$twk4
   2766	le?stvx_u	$out5,$x50,$out
   2767	be?stvx_u	$tmp, $x50,$out
   2768	 vxor		$out5,$in5,$twk5
   2769	addi		$out,$out,0x60
   2770
   2771	mtctr		$rounds
   2772	beq		Loop_xts_enc6x		# did $len-=96 borrow?
   2773
   2774	addic.		$len,$len,0x60
   2775	beq		Lxts_enc6x_zero
   2776	cmpwi		$len,0x20
   2777	blt		Lxts_enc6x_one
   2778	nop
   2779	beq		Lxts_enc6x_two
   2780	cmpwi		$len,0x40
   2781	blt		Lxts_enc6x_three
   2782	nop
   2783	beq		Lxts_enc6x_four
   2784
   2785Lxts_enc6x_five:
   2786	vxor		$out0,$in1,$twk0
   2787	vxor		$out1,$in2,$twk1
   2788	vxor		$out2,$in3,$twk2
   2789	vxor		$out3,$in4,$twk3
   2790	vxor		$out4,$in5,$twk4
   2791
   2792	bl		_aesp8_xts_enc5x
   2793
   2794	le?vperm	$out0,$out0,$out0,$leperm
   2795	vmr		$twk0,$twk5		# unused tweak
   2796	le?vperm	$out1,$out1,$out1,$leperm
   2797	stvx_u		$out0,$x00,$out		# store output
   2798	le?vperm	$out2,$out2,$out2,$leperm
   2799	stvx_u		$out1,$x10,$out
   2800	le?vperm	$out3,$out3,$out3,$leperm
   2801	stvx_u		$out2,$x20,$out
   2802	vxor		$tmp,$out4,$twk5	# last block prep for stealing
   2803	le?vperm	$out4,$out4,$out4,$leperm
   2804	stvx_u		$out3,$x30,$out
   2805	stvx_u		$out4,$x40,$out
   2806	addi		$out,$out,0x50
   2807	bne		Lxts_enc6x_steal
   2808	b		Lxts_enc6x_done
   2809
   2810.align	4
   2811Lxts_enc6x_four:
   2812	vxor		$out0,$in2,$twk0
   2813	vxor		$out1,$in3,$twk1
   2814	vxor		$out2,$in4,$twk2
   2815	vxor		$out3,$in5,$twk3
   2816	vxor		$out4,$out4,$out4
   2817
   2818	bl		_aesp8_xts_enc5x
   2819
   2820	le?vperm	$out0,$out0,$out0,$leperm
   2821	vmr		$twk0,$twk4		# unused tweak
   2822	le?vperm	$out1,$out1,$out1,$leperm
   2823	stvx_u		$out0,$x00,$out		# store output
   2824	le?vperm	$out2,$out2,$out2,$leperm
   2825	stvx_u		$out1,$x10,$out
   2826	vxor		$tmp,$out3,$twk4	# last block prep for stealing
   2827	le?vperm	$out3,$out3,$out3,$leperm
   2828	stvx_u		$out2,$x20,$out
   2829	stvx_u		$out3,$x30,$out
   2830	addi		$out,$out,0x40
   2831	bne		Lxts_enc6x_steal
   2832	b		Lxts_enc6x_done
   2833
   2834.align	4
   2835Lxts_enc6x_three:
   2836	vxor		$out0,$in3,$twk0
   2837	vxor		$out1,$in4,$twk1
   2838	vxor		$out2,$in5,$twk2
   2839	vxor		$out3,$out3,$out3
   2840	vxor		$out4,$out4,$out4
   2841
   2842	bl		_aesp8_xts_enc5x
   2843
   2844	le?vperm	$out0,$out0,$out0,$leperm
   2845	vmr		$twk0,$twk3		# unused tweak
   2846	le?vperm	$out1,$out1,$out1,$leperm
   2847	stvx_u		$out0,$x00,$out		# store output
   2848	vxor		$tmp,$out2,$twk3	# last block prep for stealing
   2849	le?vperm	$out2,$out2,$out2,$leperm
   2850	stvx_u		$out1,$x10,$out
   2851	stvx_u		$out2,$x20,$out
   2852	addi		$out,$out,0x30
   2853	bne		Lxts_enc6x_steal
   2854	b		Lxts_enc6x_done
   2855
   2856.align	4
   2857Lxts_enc6x_two:
   2858	vxor		$out0,$in4,$twk0
   2859	vxor		$out1,$in5,$twk1
   2860	vxor		$out2,$out2,$out2
   2861	vxor		$out3,$out3,$out3
   2862	vxor		$out4,$out4,$out4
   2863
   2864	bl		_aesp8_xts_enc5x
   2865
   2866	le?vperm	$out0,$out0,$out0,$leperm
   2867	vmr		$twk0,$twk2		# unused tweak
   2868	vxor		$tmp,$out1,$twk2	# last block prep for stealing
   2869	le?vperm	$out1,$out1,$out1,$leperm
   2870	stvx_u		$out0,$x00,$out		# store output
   2871	stvx_u		$out1,$x10,$out
   2872	addi		$out,$out,0x20
   2873	bne		Lxts_enc6x_steal
   2874	b		Lxts_enc6x_done
   2875
   2876.align	4
   2877Lxts_enc6x_one:
   2878	vxor		$out0,$in5,$twk0
   2879	nop
   2880Loop_xts_enc1x:
   2881	vcipher		$out0,$out0,v24
   2882	lvx		v24,$x20,$key_		# round[3]
   2883	addi		$key_,$key_,0x20
   2884
   2885	vcipher		$out0,$out0,v25
   2886	lvx		v25,$x10,$key_		# round[4]
   2887	bdnz		Loop_xts_enc1x
   2888
   2889	add		$inp,$inp,$taillen
   2890	cmpwi		$taillen,0
   2891	vcipher		$out0,$out0,v24
   2892
   2893	subi		$inp,$inp,16
   2894	vcipher		$out0,$out0,v25
   2895
   2896	lvsr		$inpperm,0,$taillen
   2897	vcipher		$out0,$out0,v26
   2898
   2899	lvx_u		$in0,0,$inp
   2900	vcipher		$out0,$out0,v27
   2901
   2902	addi		$key_,$sp,$FRAME+15	# rewind $key_
   2903	vcipher		$out0,$out0,v28
   2904	lvx		v24,$x00,$key_		# re-pre-load round[1]
   2905
   2906	vcipher		$out0,$out0,v29
   2907	lvx		v25,$x10,$key_		# re-pre-load round[2]
   2908	 vxor		$twk0,$twk0,v31
   2909
   2910	le?vperm	$in0,$in0,$in0,$leperm
   2911	vcipher		$out0,$out0,v30
   2912
   2913	vperm		$in0,$in0,$in0,$inpperm
   2914	vcipherlast	$out0,$out0,$twk0
   2915
   2916	vmr		$twk0,$twk1		# unused tweak
   2917	vxor		$tmp,$out0,$twk1	# last block prep for stealing
   2918	le?vperm	$out0,$out0,$out0,$leperm
   2919	stvx_u		$out0,$x00,$out		# store output
   2920	addi		$out,$out,0x10
   2921	bne		Lxts_enc6x_steal
   2922	b		Lxts_enc6x_done
   2923
   2924.align	4
   2925Lxts_enc6x_zero:
   2926	cmpwi		$taillen,0
   2927	beq		Lxts_enc6x_done
   2928
   2929	add		$inp,$inp,$taillen
   2930	subi		$inp,$inp,16
   2931	lvx_u		$in0,0,$inp
   2932	lvsr		$inpperm,0,$taillen	# $in5 is no more
   2933	le?vperm	$in0,$in0,$in0,$leperm
   2934	vperm		$in0,$in0,$in0,$inpperm
   2935	vxor		$tmp,$tmp,$twk0
   2936Lxts_enc6x_steal:
   2937	vxor		$in0,$in0,$twk0
   2938	vxor		$out0,$out0,$out0
   2939	vspltisb	$out1,-1
   2940	vperm		$out0,$out0,$out1,$inpperm
   2941	vsel		$out0,$in0,$tmp,$out0	# $tmp is last block, remember?
   2942
   2943	subi		r30,$out,17
   2944	subi		$out,$out,16
   2945	mtctr		$taillen
   2946Loop_xts_enc6x_steal:
   2947	lbzu		r0,1(r30)
   2948	stb		r0,16(r30)
   2949	bdnz		Loop_xts_enc6x_steal
   2950
   2951	li		$taillen,0
   2952	mtctr		$rounds
   2953	b		Loop_xts_enc1x		# one more time...
   2954
   2955.align	4
   2956Lxts_enc6x_done:
   2957	${UCMP}i	$ivp,0
   2958	beq		Lxts_enc6x_ret
   2959
   2960	vxor		$tweak,$twk0,$rndkey0
   2961	le?vperm	$tweak,$tweak,$tweak,$leperm
   2962	stvx_u		$tweak,0,$ivp
   2963
   2964Lxts_enc6x_ret:
   2965	mtlr		r11
   2966	li		r10,`$FRAME+15`
   2967	li		r11,`$FRAME+31`
   2968	stvx		$seven,r10,$sp		# wipe copies of round keys
   2969	addi		r10,r10,32
   2970	stvx		$seven,r11,$sp
   2971	addi		r11,r11,32
   2972	stvx		$seven,r10,$sp
   2973	addi		r10,r10,32
   2974	stvx		$seven,r11,$sp
   2975	addi		r11,r11,32
   2976	stvx		$seven,r10,$sp
   2977	addi		r10,r10,32
   2978	stvx		$seven,r11,$sp
   2979	addi		r11,r11,32
   2980	stvx		$seven,r10,$sp
   2981	addi		r10,r10,32
   2982	stvx		$seven,r11,$sp
   2983	addi		r11,r11,32
   2984
   2985	mtspr		256,$vrsave
   2986	lvx		v20,r10,$sp		# ABI says so
   2987	addi		r10,r10,32
   2988	lvx		v21,r11,$sp
   2989	addi		r11,r11,32
   2990	lvx		v22,r10,$sp
   2991	addi		r10,r10,32
   2992	lvx		v23,r11,$sp
   2993	addi		r11,r11,32
   2994	lvx		v24,r10,$sp
   2995	addi		r10,r10,32
   2996	lvx		v25,r11,$sp
   2997	addi		r11,r11,32
   2998	lvx		v26,r10,$sp
   2999	addi		r10,r10,32
   3000	lvx		v27,r11,$sp
   3001	addi		r11,r11,32
   3002	lvx		v28,r10,$sp
   3003	addi		r10,r10,32
   3004	lvx		v29,r11,$sp
   3005	addi		r11,r11,32
   3006	lvx		v30,r10,$sp
   3007	lvx		v31,r11,$sp
   3008	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
   3009	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
   3010	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
   3011	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
   3012	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
   3013	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
   3014	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
   3015	blr
   3016	.long		0
   3017	.byte		0,12,0x04,1,0x80,6,6,0
   3018	.long		0
   3019
   3020.align	5
   3021_aesp8_xts_enc5x:
   3022	vcipher		$out0,$out0,v24
   3023	vcipher		$out1,$out1,v24
   3024	vcipher		$out2,$out2,v24
   3025	vcipher		$out3,$out3,v24
   3026	vcipher		$out4,$out4,v24
   3027	lvx		v24,$x20,$key_		# round[3]
   3028	addi		$key_,$key_,0x20
   3029
   3030	vcipher		$out0,$out0,v25
   3031	vcipher		$out1,$out1,v25
   3032	vcipher		$out2,$out2,v25
   3033	vcipher		$out3,$out3,v25
   3034	vcipher		$out4,$out4,v25
   3035	lvx		v25,$x10,$key_		# round[4]
   3036	bdnz		_aesp8_xts_enc5x
   3037
   3038	add		$inp,$inp,$taillen
   3039	cmpwi		$taillen,0
   3040	vcipher		$out0,$out0,v24
   3041	vcipher		$out1,$out1,v24
   3042	vcipher		$out2,$out2,v24
   3043	vcipher		$out3,$out3,v24
   3044	vcipher		$out4,$out4,v24
   3045
   3046	subi		$inp,$inp,16
   3047	vcipher		$out0,$out0,v25
   3048	vcipher		$out1,$out1,v25
   3049	vcipher		$out2,$out2,v25
   3050	vcipher		$out3,$out3,v25
   3051	vcipher		$out4,$out4,v25
   3052	 vxor		$twk0,$twk0,v31
   3053
   3054	vcipher		$out0,$out0,v26
   3055	lvsr		$inpperm,r0,$taillen	# $in5 is no more
   3056	vcipher		$out1,$out1,v26
   3057	vcipher		$out2,$out2,v26
   3058	vcipher		$out3,$out3,v26
   3059	vcipher		$out4,$out4,v26
   3060	 vxor		$in1,$twk1,v31
   3061
   3062	vcipher		$out0,$out0,v27
   3063	lvx_u		$in0,0,$inp
   3064	vcipher		$out1,$out1,v27
   3065	vcipher		$out2,$out2,v27
   3066	vcipher		$out3,$out3,v27
   3067	vcipher		$out4,$out4,v27
   3068	 vxor		$in2,$twk2,v31
   3069
   3070	addi		$key_,$sp,$FRAME+15	# rewind $key_
   3071	vcipher		$out0,$out0,v28
   3072	vcipher		$out1,$out1,v28
   3073	vcipher		$out2,$out2,v28
   3074	vcipher		$out3,$out3,v28
   3075	vcipher		$out4,$out4,v28
   3076	lvx		v24,$x00,$key_		# re-pre-load round[1]
   3077	 vxor		$in3,$twk3,v31
   3078
   3079	vcipher		$out0,$out0,v29
   3080	le?vperm	$in0,$in0,$in0,$leperm
   3081	vcipher		$out1,$out1,v29
   3082	vcipher		$out2,$out2,v29
   3083	vcipher		$out3,$out3,v29
   3084	vcipher		$out4,$out4,v29
   3085	lvx		v25,$x10,$key_		# re-pre-load round[2]
   3086	 vxor		$in4,$twk4,v31
   3087
   3088	vcipher		$out0,$out0,v30
   3089	vperm		$in0,$in0,$in0,$inpperm
   3090	vcipher		$out1,$out1,v30
   3091	vcipher		$out2,$out2,v30
   3092	vcipher		$out3,$out3,v30
   3093	vcipher		$out4,$out4,v30
   3094
   3095	vcipherlast	$out0,$out0,$twk0
   3096	vcipherlast	$out1,$out1,$in1
   3097	vcipherlast	$out2,$out2,$in2
   3098	vcipherlast	$out3,$out3,$in3
   3099	vcipherlast	$out4,$out4,$in4
   3100	blr
   3101        .long   	0
   3102        .byte   	0,12,0x14,0,0,0,0,0
   3103
   3104.align	5
   3105_aesp8_xts_decrypt6x:
   3106	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
   3107	mflr		r11
   3108	li		r7,`$FRAME+8*16+15`
   3109	li		r3,`$FRAME+8*16+31`
   3110	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
   3111	stvx		v20,r7,$sp		# ABI says so
   3112	addi		r7,r7,32
   3113	stvx		v21,r3,$sp
   3114	addi		r3,r3,32
   3115	stvx		v22,r7,$sp
   3116	addi		r7,r7,32
   3117	stvx		v23,r3,$sp
   3118	addi		r3,r3,32
   3119	stvx		v24,r7,$sp
   3120	addi		r7,r7,32
   3121	stvx		v25,r3,$sp
   3122	addi		r3,r3,32
   3123	stvx		v26,r7,$sp
   3124	addi		r7,r7,32
   3125	stvx		v27,r3,$sp
   3126	addi		r3,r3,32
   3127	stvx		v28,r7,$sp
   3128	addi		r7,r7,32
   3129	stvx		v29,r3,$sp
   3130	addi		r3,r3,32
   3131	stvx		v30,r7,$sp
   3132	stvx		v31,r3,$sp
   3133	li		r0,-1
   3134	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
   3135	li		$x10,0x10
   3136	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
   3137	li		$x20,0x20
   3138	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
   3139	li		$x30,0x30
   3140	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
   3141	li		$x40,0x40
   3142	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
   3143	li		$x50,0x50
   3144	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
   3145	li		$x60,0x60
   3146	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
   3147	li		$x70,0x70
   3148	mtspr		256,r0
   3149
   3150	subi		$rounds,$rounds,3	# -4 in total
   3151
   3152	lvx		$rndkey0,$x00,$key1	# load key schedule
   3153	lvx		v30,$x10,$key1
   3154	addi		$key1,$key1,0x20
   3155	lvx		v31,$x00,$key1
   3156	?vperm		$rndkey0,$rndkey0,v30,$keyperm
   3157	addi		$key_,$sp,$FRAME+15
   3158	mtctr		$rounds
   3159
   3160Load_xts_dec_key:
   3161	?vperm		v24,v30,v31,$keyperm
   3162	lvx		v30,$x10,$key1
   3163	addi		$key1,$key1,0x20
   3164	stvx		v24,$x00,$key_		# off-load round[1]
   3165	?vperm		v25,v31,v30,$keyperm
   3166	lvx		v31,$x00,$key1
   3167	stvx		v25,$x10,$key_		# off-load round[2]
   3168	addi		$key_,$key_,0x20
   3169	bdnz		Load_xts_dec_key
   3170
   3171	lvx		v26,$x10,$key1
   3172	?vperm		v24,v30,v31,$keyperm
   3173	lvx		v27,$x20,$key1
   3174	stvx		v24,$x00,$key_		# off-load round[3]
   3175	?vperm		v25,v31,v26,$keyperm
   3176	lvx		v28,$x30,$key1
   3177	stvx		v25,$x10,$key_		# off-load round[4]
   3178	addi		$key_,$sp,$FRAME+15	# rewind $key_
   3179	?vperm		v26,v26,v27,$keyperm
   3180	lvx		v29,$x40,$key1
   3181	?vperm		v27,v27,v28,$keyperm
   3182	lvx		v30,$x50,$key1
   3183	?vperm		v28,v28,v29,$keyperm
   3184	lvx		v31,$x60,$key1
   3185	?vperm		v29,v29,v30,$keyperm
   3186	lvx		$twk5,$x70,$key1	# borrow $twk5
   3187	?vperm		v30,v30,v31,$keyperm
   3188	lvx		v24,$x00,$key_		# pre-load round[1]
   3189	?vperm		v31,v31,$twk5,$keyperm
   3190	lvx		v25,$x10,$key_		# pre-load round[2]
   3191
   3192	 vperm		$in0,$inout,$inptail,$inpperm
   3193	 subi		$inp,$inp,31		# undo "caller"
   3194	vxor		$twk0,$tweak,$rndkey0
   3195	vsrab		$tmp,$tweak,$seven	# next tweak value
   3196	vaddubm		$tweak,$tweak,$tweak
   3197	vsldoi		$tmp,$tmp,$tmp,15
   3198	vand		$tmp,$tmp,$eighty7
   3199	 vxor		$out0,$in0,$twk0
   3200	vxor		$tweak,$tweak,$tmp
   3201
   3202	 lvx_u		$in1,$x10,$inp
   3203	vxor		$twk1,$tweak,$rndkey0
   3204	vsrab		$tmp,$tweak,$seven	# next tweak value
   3205	vaddubm		$tweak,$tweak,$tweak
   3206	vsldoi		$tmp,$tmp,$tmp,15
   3207	 le?vperm	$in1,$in1,$in1,$leperm
   3208	vand		$tmp,$tmp,$eighty7
   3209	 vxor		$out1,$in1,$twk1
   3210	vxor		$tweak,$tweak,$tmp
   3211
   3212	 lvx_u		$in2,$x20,$inp
   3213	 andi.		$taillen,$len,15
   3214	vxor		$twk2,$tweak,$rndkey0
   3215	vsrab		$tmp,$tweak,$seven	# next tweak value
   3216	vaddubm		$tweak,$tweak,$tweak
   3217	vsldoi		$tmp,$tmp,$tmp,15
   3218	 le?vperm	$in2,$in2,$in2,$leperm
   3219	vand		$tmp,$tmp,$eighty7
   3220	 vxor		$out2,$in2,$twk2
   3221	vxor		$tweak,$tweak,$tmp
   3222
   3223	 lvx_u		$in3,$x30,$inp
   3224	 sub		$len,$len,$taillen
   3225	vxor		$twk3,$tweak,$rndkey0
   3226	vsrab		$tmp,$tweak,$seven	# next tweak value
   3227	vaddubm		$tweak,$tweak,$tweak
   3228	vsldoi		$tmp,$tmp,$tmp,15
   3229	 le?vperm	$in3,$in3,$in3,$leperm
   3230	vand		$tmp,$tmp,$eighty7
   3231	 vxor		$out3,$in3,$twk3
   3232	vxor		$tweak,$tweak,$tmp
   3233
   3234	 lvx_u		$in4,$x40,$inp
   3235	 subi		$len,$len,0x60
   3236	vxor		$twk4,$tweak,$rndkey0
   3237	vsrab		$tmp,$tweak,$seven	# next tweak value
   3238	vaddubm		$tweak,$tweak,$tweak
   3239	vsldoi		$tmp,$tmp,$tmp,15
   3240	 le?vperm	$in4,$in4,$in4,$leperm
   3241	vand		$tmp,$tmp,$eighty7
   3242	 vxor		$out4,$in4,$twk4
   3243	vxor		$tweak,$tweak,$tmp
   3244
   3245	 lvx_u		$in5,$x50,$inp
   3246	 addi		$inp,$inp,0x60
   3247	vxor		$twk5,$tweak,$rndkey0
   3248	vsrab		$tmp,$tweak,$seven	# next tweak value
   3249	vaddubm		$tweak,$tweak,$tweak
   3250	vsldoi		$tmp,$tmp,$tmp,15
   3251	 le?vperm	$in5,$in5,$in5,$leperm
   3252	vand		$tmp,$tmp,$eighty7
   3253	 vxor		$out5,$in5,$twk5
   3254	vxor		$tweak,$tweak,$tmp
   3255
   3256	vxor		v31,v31,$rndkey0
   3257	mtctr		$rounds
   3258	b		Loop_xts_dec6x
   3259
   3260.align	5
   3261Loop_xts_dec6x:
   3262	vncipher	$out0,$out0,v24
   3263	vncipher	$out1,$out1,v24
   3264	vncipher	$out2,$out2,v24
   3265	vncipher	$out3,$out3,v24
   3266	vncipher	$out4,$out4,v24
   3267	vncipher	$out5,$out5,v24
   3268	lvx		v24,$x20,$key_		# round[3]
   3269	addi		$key_,$key_,0x20
   3270
   3271	vncipher	$out0,$out0,v25
   3272	vncipher	$out1,$out1,v25
   3273	vncipher	$out2,$out2,v25
   3274	vncipher	$out3,$out3,v25
   3275	vncipher	$out4,$out4,v25
   3276	vncipher	$out5,$out5,v25
   3277	lvx		v25,$x10,$key_		# round[4]
   3278	bdnz		Loop_xts_dec6x
   3279
   3280	subic		$len,$len,96		# $len-=96
   3281	 vxor		$in0,$twk0,v31		# xor with last round key
   3282	vncipher	$out0,$out0,v24
   3283	vncipher	$out1,$out1,v24
   3284	 vsrab		$tmp,$tweak,$seven	# next tweak value
   3285	 vxor		$twk0,$tweak,$rndkey0
   3286	 vaddubm	$tweak,$tweak,$tweak
   3287	vncipher	$out2,$out2,v24
   3288	vncipher	$out3,$out3,v24
   3289	 vsldoi		$tmp,$tmp,$tmp,15
   3290	vncipher	$out4,$out4,v24
   3291	vncipher	$out5,$out5,v24
   3292
   3293	subfe.		r0,r0,r0		# borrow?-1:0
   3294	 vand		$tmp,$tmp,$eighty7
   3295	vncipher	$out0,$out0,v25
   3296	vncipher	$out1,$out1,v25
   3297	 vxor		$tweak,$tweak,$tmp
   3298	vncipher	$out2,$out2,v25
   3299	vncipher	$out3,$out3,v25
   3300	 vxor		$in1,$twk1,v31
   3301	 vsrab		$tmp,$tweak,$seven	# next tweak value
   3302	 vxor		$twk1,$tweak,$rndkey0
   3303	vncipher	$out4,$out4,v25
   3304	vncipher	$out5,$out5,v25
   3305
   3306	and		r0,r0,$len
   3307	 vaddubm	$tweak,$tweak,$tweak
   3308	 vsldoi		$tmp,$tmp,$tmp,15
   3309	vncipher	$out0,$out0,v26
   3310	vncipher	$out1,$out1,v26
   3311	 vand		$tmp,$tmp,$eighty7
   3312	vncipher	$out2,$out2,v26
   3313	vncipher	$out3,$out3,v26
   3314	 vxor		$tweak,$tweak,$tmp
   3315	vncipher	$out4,$out4,v26
   3316	vncipher	$out5,$out5,v26
   3317
   3318	add		$inp,$inp,r0		# $inp is adjusted in such
   3319						# way that at exit from the
   3320						# loop inX-in5 are loaded
   3321						# with last "words"
   3322	 vxor		$in2,$twk2,v31
   3323	 vsrab		$tmp,$tweak,$seven	# next tweak value
   3324	 vxor		$twk2,$tweak,$rndkey0
   3325	 vaddubm	$tweak,$tweak,$tweak
   3326	vncipher	$out0,$out0,v27
   3327	vncipher	$out1,$out1,v27
   3328	 vsldoi		$tmp,$tmp,$tmp,15
   3329	vncipher	$out2,$out2,v27
   3330	vncipher	$out3,$out3,v27
   3331	 vand		$tmp,$tmp,$eighty7
   3332	vncipher	$out4,$out4,v27
   3333	vncipher	$out5,$out5,v27
   3334
   3335	addi		$key_,$sp,$FRAME+15	# rewind $key_
   3336	 vxor		$tweak,$tweak,$tmp
   3337	vncipher	$out0,$out0,v28
   3338	vncipher	$out1,$out1,v28
   3339	 vxor		$in3,$twk3,v31
   3340	 vsrab		$tmp,$tweak,$seven	# next tweak value
   3341	 vxor		$twk3,$tweak,$rndkey0
   3342	vncipher	$out2,$out2,v28
   3343	vncipher	$out3,$out3,v28
   3344	 vaddubm	$tweak,$tweak,$tweak
   3345	 vsldoi		$tmp,$tmp,$tmp,15
   3346	vncipher	$out4,$out4,v28
   3347	vncipher	$out5,$out5,v28
   3348	lvx		v24,$x00,$key_		# re-pre-load round[1]
   3349	 vand		$tmp,$tmp,$eighty7
   3350
   3351	vncipher	$out0,$out0,v29
   3352	vncipher	$out1,$out1,v29
   3353	 vxor		$tweak,$tweak,$tmp
   3354	vncipher	$out2,$out2,v29
   3355	vncipher	$out3,$out3,v29
   3356	 vxor		$in4,$twk4,v31
   3357	 vsrab		$tmp,$tweak,$seven	# next tweak value
   3358	 vxor		$twk4,$tweak,$rndkey0
   3359	vncipher	$out4,$out4,v29
   3360	vncipher	$out5,$out5,v29
   3361	lvx		v25,$x10,$key_		# re-pre-load round[2]
   3362	 vaddubm	$tweak,$tweak,$tweak
   3363	 vsldoi		$tmp,$tmp,$tmp,15
   3364
   3365	vncipher	$out0,$out0,v30
   3366	vncipher	$out1,$out1,v30
   3367	 vand		$tmp,$tmp,$eighty7
   3368	vncipher	$out2,$out2,v30
   3369	vncipher	$out3,$out3,v30
   3370	 vxor		$tweak,$tweak,$tmp
   3371	vncipher	$out4,$out4,v30
   3372	vncipher	$out5,$out5,v30
   3373	 vxor		$in5,$twk5,v31
   3374	 vsrab		$tmp,$tweak,$seven	# next tweak value
   3375	 vxor		$twk5,$tweak,$rndkey0
   3376
   3377	vncipherlast	$out0,$out0,$in0
   3378	 lvx_u		$in0,$x00,$inp		# load next input block
   3379	 vaddubm	$tweak,$tweak,$tweak
   3380	 vsldoi		$tmp,$tmp,$tmp,15
   3381	vncipherlast	$out1,$out1,$in1
   3382	 lvx_u		$in1,$x10,$inp
   3383	vncipherlast	$out2,$out2,$in2
   3384	 le?vperm	$in0,$in0,$in0,$leperm
   3385	 lvx_u		$in2,$x20,$inp
   3386	 vand		$tmp,$tmp,$eighty7
   3387	vncipherlast	$out3,$out3,$in3
   3388	 le?vperm	$in1,$in1,$in1,$leperm
   3389	 lvx_u		$in3,$x30,$inp
   3390	vncipherlast	$out4,$out4,$in4
   3391	 le?vperm	$in2,$in2,$in2,$leperm
   3392	 lvx_u		$in4,$x40,$inp
   3393	 vxor		$tweak,$tweak,$tmp
   3394	vncipherlast	$out5,$out5,$in5
   3395	 le?vperm	$in3,$in3,$in3,$leperm
   3396	 lvx_u		$in5,$x50,$inp
   3397	 addi		$inp,$inp,0x60
   3398	 le?vperm	$in4,$in4,$in4,$leperm
   3399	 le?vperm	$in5,$in5,$in5,$leperm
   3400
   3401	le?vperm	$out0,$out0,$out0,$leperm
   3402	le?vperm	$out1,$out1,$out1,$leperm
   3403	stvx_u		$out0,$x00,$out		# store output
   3404	 vxor		$out0,$in0,$twk0
   3405	le?vperm	$out2,$out2,$out2,$leperm
   3406	stvx_u		$out1,$x10,$out
   3407	 vxor		$out1,$in1,$twk1
   3408	le?vperm	$out3,$out3,$out3,$leperm
   3409	stvx_u		$out2,$x20,$out
   3410	 vxor		$out2,$in2,$twk2
   3411	le?vperm	$out4,$out4,$out4,$leperm
   3412	stvx_u		$out3,$x30,$out
   3413	 vxor		$out3,$in3,$twk3
   3414	le?vperm	$out5,$out5,$out5,$leperm
   3415	stvx_u		$out4,$x40,$out
   3416	 vxor		$out4,$in4,$twk4
   3417	stvx_u		$out5,$x50,$out
   3418	 vxor		$out5,$in5,$twk5
   3419	addi		$out,$out,0x60
   3420
   3421	mtctr		$rounds
   3422	beq		Loop_xts_dec6x		# did $len-=96 borrow?
   3423
   3424	addic.		$len,$len,0x60
   3425	beq		Lxts_dec6x_zero
   3426	cmpwi		$len,0x20
   3427	blt		Lxts_dec6x_one
   3428	nop
   3429	beq		Lxts_dec6x_two
   3430	cmpwi		$len,0x40
   3431	blt		Lxts_dec6x_three
   3432	nop
   3433	beq		Lxts_dec6x_four
   3434
   3435Lxts_dec6x_five:
   3436	vxor		$out0,$in1,$twk0
   3437	vxor		$out1,$in2,$twk1
   3438	vxor		$out2,$in3,$twk2
   3439	vxor		$out3,$in4,$twk3
   3440	vxor		$out4,$in5,$twk4
   3441
   3442	bl		_aesp8_xts_dec5x
   3443
   3444	le?vperm	$out0,$out0,$out0,$leperm
   3445	vmr		$twk0,$twk5		# unused tweak
   3446	vxor		$twk1,$tweak,$rndkey0
   3447	le?vperm	$out1,$out1,$out1,$leperm
   3448	stvx_u		$out0,$x00,$out		# store output
   3449	vxor		$out0,$in0,$twk1
   3450	le?vperm	$out2,$out2,$out2,$leperm
   3451	stvx_u		$out1,$x10,$out
   3452	le?vperm	$out3,$out3,$out3,$leperm
   3453	stvx_u		$out2,$x20,$out
   3454	le?vperm	$out4,$out4,$out4,$leperm
   3455	stvx_u		$out3,$x30,$out
   3456	stvx_u		$out4,$x40,$out
   3457	addi		$out,$out,0x50
   3458	bne		Lxts_dec6x_steal
   3459	b		Lxts_dec6x_done
   3460
   3461.align	4
   3462Lxts_dec6x_four:
   3463	vxor		$out0,$in2,$twk0
   3464	vxor		$out1,$in3,$twk1
   3465	vxor		$out2,$in4,$twk2
   3466	vxor		$out3,$in5,$twk3
   3467	vxor		$out4,$out4,$out4
   3468
   3469	bl		_aesp8_xts_dec5x
   3470
   3471	le?vperm	$out0,$out0,$out0,$leperm
   3472	vmr		$twk0,$twk4		# unused tweak
   3473	vmr		$twk1,$twk5
   3474	le?vperm	$out1,$out1,$out1,$leperm
   3475	stvx_u		$out0,$x00,$out		# store output
   3476	vxor		$out0,$in0,$twk5
   3477	le?vperm	$out2,$out2,$out2,$leperm
   3478	stvx_u		$out1,$x10,$out
   3479	le?vperm	$out3,$out3,$out3,$leperm
   3480	stvx_u		$out2,$x20,$out
   3481	stvx_u		$out3,$x30,$out
   3482	addi		$out,$out,0x40
   3483	bne		Lxts_dec6x_steal
   3484	b		Lxts_dec6x_done
   3485
   3486.align	4
   3487Lxts_dec6x_three:
   3488	vxor		$out0,$in3,$twk0
   3489	vxor		$out1,$in4,$twk1
   3490	vxor		$out2,$in5,$twk2
   3491	vxor		$out3,$out3,$out3
   3492	vxor		$out4,$out4,$out4
   3493
   3494	bl		_aesp8_xts_dec5x
   3495
   3496	le?vperm	$out0,$out0,$out0,$leperm
   3497	vmr		$twk0,$twk3		# unused tweak
   3498	vmr		$twk1,$twk4
   3499	le?vperm	$out1,$out1,$out1,$leperm
   3500	stvx_u		$out0,$x00,$out		# store output
   3501	vxor		$out0,$in0,$twk4
   3502	le?vperm	$out2,$out2,$out2,$leperm
   3503	stvx_u		$out1,$x10,$out
   3504	stvx_u		$out2,$x20,$out
   3505	addi		$out,$out,0x30
   3506	bne		Lxts_dec6x_steal
   3507	b		Lxts_dec6x_done
   3508
   3509.align	4
   3510Lxts_dec6x_two:
   3511	vxor		$out0,$in4,$twk0
   3512	vxor		$out1,$in5,$twk1
   3513	vxor		$out2,$out2,$out2
   3514	vxor		$out3,$out3,$out3
   3515	vxor		$out4,$out4,$out4
   3516
   3517	bl		_aesp8_xts_dec5x
   3518
   3519	le?vperm	$out0,$out0,$out0,$leperm
   3520	vmr		$twk0,$twk2		# unused tweak
   3521	vmr		$twk1,$twk3
   3522	le?vperm	$out1,$out1,$out1,$leperm
   3523	stvx_u		$out0,$x00,$out		# store output
   3524	vxor		$out0,$in0,$twk3
   3525	stvx_u		$out1,$x10,$out
   3526	addi		$out,$out,0x20
   3527	bne		Lxts_dec6x_steal
   3528	b		Lxts_dec6x_done
   3529
   3530.align	4
   3531Lxts_dec6x_one:
   3532	vxor		$out0,$in5,$twk0
   3533	nop
   3534Loop_xts_dec1x:
   3535	vncipher	$out0,$out0,v24
   3536	lvx		v24,$x20,$key_		# round[3]
   3537	addi		$key_,$key_,0x20
   3538
   3539	vncipher	$out0,$out0,v25
   3540	lvx		v25,$x10,$key_		# round[4]
   3541	bdnz		Loop_xts_dec1x
   3542
   3543	subi		r0,$taillen,1
   3544	vncipher	$out0,$out0,v24
   3545
   3546	andi.		r0,r0,16
   3547	cmpwi		$taillen,0
   3548	vncipher	$out0,$out0,v25
   3549
   3550	sub		$inp,$inp,r0
   3551	vncipher	$out0,$out0,v26
   3552
   3553	lvx_u		$in0,0,$inp
   3554	vncipher	$out0,$out0,v27
   3555
   3556	addi		$key_,$sp,$FRAME+15	# rewind $key_
   3557	vncipher	$out0,$out0,v28
   3558	lvx		v24,$x00,$key_		# re-pre-load round[1]
   3559
   3560	vncipher	$out0,$out0,v29
   3561	lvx		v25,$x10,$key_		# re-pre-load round[2]
   3562	 vxor		$twk0,$twk0,v31
   3563
   3564	le?vperm	$in0,$in0,$in0,$leperm
   3565	vncipher	$out0,$out0,v30
   3566
   3567	mtctr		$rounds
   3568	vncipherlast	$out0,$out0,$twk0
   3569
   3570	vmr		$twk0,$twk1		# unused tweak
   3571	vmr		$twk1,$twk2
   3572	le?vperm	$out0,$out0,$out0,$leperm
   3573	stvx_u		$out0,$x00,$out		# store output
   3574	addi		$out,$out,0x10
   3575	vxor		$out0,$in0,$twk2
   3576	bne		Lxts_dec6x_steal
   3577	b		Lxts_dec6x_done
   3578
   3579.align	4
   3580Lxts_dec6x_zero:
   3581	cmpwi		$taillen,0
   3582	beq		Lxts_dec6x_done
   3583
   3584	lvx_u		$in0,0,$inp
   3585	le?vperm	$in0,$in0,$in0,$leperm
   3586	vxor		$out0,$in0,$twk1
   3587Lxts_dec6x_steal:
   3588	vncipher	$out0,$out0,v24
   3589	lvx		v24,$x20,$key_		# round[3]
   3590	addi		$key_,$key_,0x20
   3591
   3592	vncipher	$out0,$out0,v25
   3593	lvx		v25,$x10,$key_		# round[4]
   3594	bdnz		Lxts_dec6x_steal
   3595
   3596	add		$inp,$inp,$taillen
   3597	vncipher	$out0,$out0,v24
   3598
   3599	cmpwi		$taillen,0
   3600	vncipher	$out0,$out0,v25
   3601
   3602	lvx_u		$in0,0,$inp
   3603	vncipher	$out0,$out0,v26
   3604
   3605	lvsr		$inpperm,0,$taillen	# $in5 is no more
   3606	vncipher	$out0,$out0,v27
   3607
   3608	addi		$key_,$sp,$FRAME+15	# rewind $key_
   3609	vncipher	$out0,$out0,v28
   3610	lvx		v24,$x00,$key_		# re-pre-load round[1]
   3611
   3612	vncipher	$out0,$out0,v29
   3613	lvx		v25,$x10,$key_		# re-pre-load round[2]
   3614	 vxor		$twk1,$twk1,v31
   3615
   3616	le?vperm	$in0,$in0,$in0,$leperm
   3617	vncipher	$out0,$out0,v30
   3618
   3619	vperm		$in0,$in0,$in0,$inpperm
   3620	vncipherlast	$tmp,$out0,$twk1
   3621
   3622	le?vperm	$out0,$tmp,$tmp,$leperm
   3623	le?stvx_u	$out0,0,$out
   3624	be?stvx_u	$tmp,0,$out
   3625
   3626	vxor		$out0,$out0,$out0
   3627	vspltisb	$out1,-1
   3628	vperm		$out0,$out0,$out1,$inpperm
   3629	vsel		$out0,$in0,$tmp,$out0
   3630	vxor		$out0,$out0,$twk0
   3631
   3632	subi		r30,$out,1
   3633	mtctr		$taillen
   3634Loop_xts_dec6x_steal:
   3635	lbzu		r0,1(r30)
   3636	stb		r0,16(r30)
   3637	bdnz		Loop_xts_dec6x_steal
   3638
   3639	li		$taillen,0
   3640	mtctr		$rounds
   3641	b		Loop_xts_dec1x		# one more time...
   3642
   3643.align	4
   3644Lxts_dec6x_done:
   3645	${UCMP}i	$ivp,0
   3646	beq		Lxts_dec6x_ret
   3647
   3648	vxor		$tweak,$twk0,$rndkey0
   3649	le?vperm	$tweak,$tweak,$tweak,$leperm
   3650	stvx_u		$tweak,0,$ivp
   3651
   3652Lxts_dec6x_ret:
   3653	mtlr		r11
   3654	li		r10,`$FRAME+15`
   3655	li		r11,`$FRAME+31`
   3656	stvx		$seven,r10,$sp		# wipe copies of round keys
   3657	addi		r10,r10,32
   3658	stvx		$seven,r11,$sp
   3659	addi		r11,r11,32
   3660	stvx		$seven,r10,$sp
   3661	addi		r10,r10,32
   3662	stvx		$seven,r11,$sp
   3663	addi		r11,r11,32
   3664	stvx		$seven,r10,$sp
   3665	addi		r10,r10,32
   3666	stvx		$seven,r11,$sp
   3667	addi		r11,r11,32
   3668	stvx		$seven,r10,$sp
   3669	addi		r10,r10,32
   3670	stvx		$seven,r11,$sp
   3671	addi		r11,r11,32
   3672
   3673	mtspr		256,$vrsave
   3674	lvx		v20,r10,$sp		# ABI says so
   3675	addi		r10,r10,32
   3676	lvx		v21,r11,$sp
   3677	addi		r11,r11,32
   3678	lvx		v22,r10,$sp
   3679	addi		r10,r10,32
   3680	lvx		v23,r11,$sp
   3681	addi		r11,r11,32
   3682	lvx		v24,r10,$sp
   3683	addi		r10,r10,32
   3684	lvx		v25,r11,$sp
   3685	addi		r11,r11,32
   3686	lvx		v26,r10,$sp
   3687	addi		r10,r10,32
   3688	lvx		v27,r11,$sp
   3689	addi		r11,r11,32
   3690	lvx		v28,r10,$sp
   3691	addi		r10,r10,32
   3692	lvx		v29,r11,$sp
   3693	addi		r11,r11,32
   3694	lvx		v30,r10,$sp
   3695	lvx		v31,r11,$sp
   3696	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
   3697	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
   3698	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
   3699	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
   3700	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
   3701	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
   3702	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
   3703	blr
   3704	.long		0
   3705	.byte		0,12,0x04,1,0x80,6,6,0
   3706	.long		0
   3707
   3708.align	5
   3709_aesp8_xts_dec5x:
   3710	vncipher	$out0,$out0,v24
   3711	vncipher	$out1,$out1,v24
   3712	vncipher	$out2,$out2,v24
   3713	vncipher	$out3,$out3,v24
   3714	vncipher	$out4,$out4,v24
   3715	lvx		v24,$x20,$key_		# round[3]
   3716	addi		$key_,$key_,0x20
   3717
   3718	vncipher	$out0,$out0,v25
   3719	vncipher	$out1,$out1,v25
   3720	vncipher	$out2,$out2,v25
   3721	vncipher	$out3,$out3,v25
   3722	vncipher	$out4,$out4,v25
   3723	lvx		v25,$x10,$key_		# round[4]
   3724	bdnz		_aesp8_xts_dec5x
   3725
   3726	subi		r0,$taillen,1
   3727	vncipher	$out0,$out0,v24
   3728	vncipher	$out1,$out1,v24
   3729	vncipher	$out2,$out2,v24
   3730	vncipher	$out3,$out3,v24
   3731	vncipher	$out4,$out4,v24
   3732
   3733	andi.		r0,r0,16
   3734	cmpwi		$taillen,0
   3735	vncipher	$out0,$out0,v25
   3736	vncipher	$out1,$out1,v25
   3737	vncipher	$out2,$out2,v25
   3738	vncipher	$out3,$out3,v25
   3739	vncipher	$out4,$out4,v25
   3740	 vxor		$twk0,$twk0,v31
   3741
   3742	sub		$inp,$inp,r0
   3743	vncipher	$out0,$out0,v26
   3744	vncipher	$out1,$out1,v26
   3745	vncipher	$out2,$out2,v26
   3746	vncipher	$out3,$out3,v26
   3747	vncipher	$out4,$out4,v26
   3748	 vxor		$in1,$twk1,v31
   3749
   3750	vncipher	$out0,$out0,v27
   3751	lvx_u		$in0,0,$inp
   3752	vncipher	$out1,$out1,v27
   3753	vncipher	$out2,$out2,v27
   3754	vncipher	$out3,$out3,v27
   3755	vncipher	$out4,$out4,v27
   3756	 vxor		$in2,$twk2,v31
   3757
   3758	addi		$key_,$sp,$FRAME+15	# rewind $key_
   3759	vncipher	$out0,$out0,v28
   3760	vncipher	$out1,$out1,v28
   3761	vncipher	$out2,$out2,v28
   3762	vncipher	$out3,$out3,v28
   3763	vncipher	$out4,$out4,v28
   3764	lvx		v24,$x00,$key_		# re-pre-load round[1]
   3765	 vxor		$in3,$twk3,v31
   3766
   3767	vncipher	$out0,$out0,v29
   3768	le?vperm	$in0,$in0,$in0,$leperm
   3769	vncipher	$out1,$out1,v29
   3770	vncipher	$out2,$out2,v29
   3771	vncipher	$out3,$out3,v29
   3772	vncipher	$out4,$out4,v29
   3773	lvx		v25,$x10,$key_		# re-pre-load round[2]
   3774	 vxor		$in4,$twk4,v31
   3775
   3776	vncipher	$out0,$out0,v30
   3777	vncipher	$out1,$out1,v30
   3778	vncipher	$out2,$out2,v30
   3779	vncipher	$out3,$out3,v30
   3780	vncipher	$out4,$out4,v30
   3781
   3782	vncipherlast	$out0,$out0,$twk0
   3783	vncipherlast	$out1,$out1,$in1
   3784	vncipherlast	$out2,$out2,$in2
   3785	vncipherlast	$out3,$out3,$in3
   3786	vncipherlast	$out4,$out4,$in4
   3787	mtctr		$rounds
   3788	blr
   3789        .long   	0
   3790        .byte   	0,12,0x14,0,0,0,0,0
   3791___
   3792}}	}}}
   3793
   3794my $consts=1;
   3795foreach(split("\n",$code)) {
   3796        s/\`([^\`]*)\`/eval($1)/geo;
   3797
   3798	# constants table endian-specific conversion
   3799	if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
   3800	    my $conv=$3;
   3801	    my @bytes=();
   3802
   3803	    # convert to endian-agnostic format
   3804	    if ($1 eq "long") {
   3805	      foreach (split(/,\s*/,$2)) {
   3806		my $l = /^0/?oct:int;
   3807		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
   3808	      }
   3809	    } else {
   3810		@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
   3811	    }
   3812
   3813	    # little-endian conversion
   3814	    if ($flavour =~ /le$/o) {
   3815		SWITCH: for($conv)  {
   3816		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
   3817		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
   3818		}
   3819	    }
   3820
   3821	    #emit
   3822	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
   3823	    next;
   3824	}
   3825	$consts=0 if (m/Lconsts:/o);	# end of table
   3826
   3827	# instructions prefixed with '?' are endian-specific and need
   3828	# to be adjusted accordingly...
   3829	if ($flavour =~ /le$/o) {	# little-endian
   3830	    s/le\?//o		or
   3831	    s/be\?/#be#/o	or
   3832	    s/\?lvsr/lvsl/o	or
   3833	    s/\?lvsl/lvsr/o	or
   3834	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
   3835	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
   3836	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
   3837	} else {			# big-endian
   3838	    s/le\?/#le#/o	or
   3839	    s/be\?//o		or
   3840	    s/\?([a-z]+)/$1/o;
   3841	}
   3842
   3843        print $_,"\n";
   3844}
   3845
   3846close STDOUT;