aesp8-ppc.pl (95741B)
1#! /usr/bin/env perl 2# SPDX-License-Identifier: GPL-2.0 3 4# This code is taken from CRYPTOGAMs[1] and is included here using the option 5# in the license to distribute the code under the GPL. Therefore this program 6# is free software; you can redistribute it and/or modify it under the terms of 7# the GNU General Public License version 2 as published by the Free Software 8# Foundation. 9# 10# [1] https://www.openssl.org/~appro/cryptogams/ 11 12# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org> 13# All rights reserved. 14# 15# Redistribution and use in source and binary forms, with or without 16# modification, are permitted provided that the following conditions 17# are met: 18# 19# * Redistributions of source code must retain copyright notices, 20# this list of conditions and the following disclaimer. 21# 22# * Redistributions in binary form must reproduce the above 23# copyright notice, this list of conditions and the following 24# disclaimer in the documentation and/or other materials 25# provided with the distribution. 26# 27# * Neither the name of the CRYPTOGAMS nor the names of its 28# copyright holder and contributors may be used to endorse or 29# promote products derived from this software without specific 30# prior written permission. 31# 32# ALTERNATIVELY, provided that this notice is retained in full, this 33# product may be distributed under the terms of the GNU General Public 34# License (GPL), in which case the provisions of the GPL apply INSTEAD OF 35# those given above. 36# 37# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 38# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 49# ==================================================================== 50# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 51# project. The module is, however, dual licensed under OpenSSL and 52# CRYPTOGAMS licenses depending on where you obtain it. For further 53# details see https://www.openssl.org/~appro/cryptogams/. 54# ==================================================================== 55# 56# This module implements support for AES instructions as per PowerISA 57# specification version 2.07, first implemented by POWER8 processor. 58# The module is endian-agnostic in sense that it supports both big- 59# and little-endian cases. Data alignment in parallelizable modes is 60# handled with VSX loads and stores, which implies MSR.VSX flag being 61# set. It should also be noted that ISA specification doesn't prohibit 62# alignment exceptions for these instructions on page boundaries. 63# Initially alignment was handled in pure AltiVec/VMX way [when data 64# is aligned programmatically, which in turn guarantees exception- 65# free execution], but it turned to hamper performance when vcipher 66# instructions are interleaved. It's reckoned that eventual 67# misalignment penalties at page boundaries are in average lower 68# than additional overhead in pure AltiVec approach. 69# 70# May 2016 71# 72# Add XTS subroutine, 9x on little- and 12x improvement on big-endian 73# systems were measured. 74# 75###################################################################### 76# Current large-block performance in cycles per byte processed with 77# 128-bit key (less is better). 78# 79# CBC en-/decrypt CTR XTS 80# POWER8[le] 3.96/0.72 0.74 1.1 81# POWER8[be] 3.75/0.65 0.66 1.0 82 83$flavour = shift; 84 85if ($flavour =~ /64/) { 86 $SIZE_T =8; 87 $LRSAVE =2*$SIZE_T; 88 $STU ="stdu"; 89 $POP ="ld"; 90 $PUSH ="std"; 91 $UCMP ="cmpld"; 92 $SHL ="sldi"; 93} elsif ($flavour =~ /32/) { 94 $SIZE_T =4; 95 $LRSAVE =$SIZE_T; 96 $STU ="stwu"; 97 $POP ="lwz"; 98 $PUSH ="stw"; 99 $UCMP ="cmplw"; 100 $SHL ="slwi"; 101} else { die "nonsense $flavour"; } 102 103$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 104 105$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 106( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 107( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 108die "can't locate ppc-xlate.pl"; 109 110open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 111 112$FRAME=8*$SIZE_T; 113$prefix="aes_p8"; 114 115$sp="r1"; 116$vrsave="r12"; 117 118######################################################################### 119{{{ # Key setup procedures # 120my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 121my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 122my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 123 124$code.=<<___; 125.machine "any" 126 127.text 128 129.align 7 130rcon: 131.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 132.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 133.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 134.long 0,0,0,0 ?asis 135Lconsts: 136 mflr r0 137 bcl 20,31,\$+4 138 mflr $ptr #vvvvv "distance between . and rcon 139 addi $ptr,$ptr,-0x48 140 mtlr r0 141 blr 142 .long 0 143 .byte 0,12,0x14,0,0,0,0,0 144.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 145 146.globl .${prefix}_set_encrypt_key 147Lset_encrypt_key: 148 mflr r11 149 $PUSH r11,$LRSAVE($sp) 150 151 li $ptr,-1 152 ${UCMP}i $inp,0 153 beq- Lenc_key_abort # if ($inp==0) return -1; 154 ${UCMP}i $out,0 155 beq- Lenc_key_abort # if ($out==0) return -1; 156 li $ptr,-2 157 cmpwi $bits,128 158 blt- Lenc_key_abort 159 cmpwi $bits,256 160 bgt- Lenc_key_abort 161 andi. r0,$bits,0x3f 162 bne- Lenc_key_abort 163 164 lis r0,0xfff0 165 mfspr $vrsave,256 166 mtspr 256,r0 167 168 bl Lconsts 169 mtlr r11 170 171 neg r9,$inp 172 lvx $in0,0,$inp 173 addi $inp,$inp,15 # 15 is not typo 174 lvsr $key,0,r9 # borrow $key 175 li r8,0x20 176 cmpwi $bits,192 177 lvx $in1,0,$inp 178 le?vspltisb $mask,0x0f # borrow $mask 179 lvx $rcon,0,$ptr 180 le?vxor $key,$key,$mask # adjust for byte swap 181 lvx $mask,r8,$ptr 182 addi $ptr,$ptr,0x10 183 vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 184 li $cnt,8 185 vxor $zero,$zero,$zero 186 mtctr $cnt 187 188 ?lvsr $outperm,0,$out 189 vspltisb $outmask,-1 190 lvx $outhead,0,$out 191 ?vperm $outmask,$zero,$outmask,$outperm 192 193 blt Loop128 194 addi $inp,$inp,8 195 beq L192 196 addi $inp,$inp,8 197 b L256 198 199.align 4 200Loop128: 201 vperm $key,$in0,$in0,$mask # rotate-n-splat 202 vsldoi $tmp,$zero,$in0,12 # >>32 203 vperm $outtail,$in0,$in0,$outperm # rotate 204 vsel $stage,$outhead,$outtail,$outmask 205 vmr $outhead,$outtail 206 vcipherlast $key,$key,$rcon 207 stvx $stage,0,$out 208 addi $out,$out,16 209 210 vxor $in0,$in0,$tmp 211 vsldoi $tmp,$zero,$tmp,12 # >>32 212 vxor $in0,$in0,$tmp 213 vsldoi $tmp,$zero,$tmp,12 # >>32 214 vxor $in0,$in0,$tmp 215 vadduwm $rcon,$rcon,$rcon 216 vxor $in0,$in0,$key 217 bdnz Loop128 218 219 lvx $rcon,0,$ptr # last two round keys 220 221 vperm $key,$in0,$in0,$mask # rotate-n-splat 222 vsldoi $tmp,$zero,$in0,12 # >>32 223 vperm $outtail,$in0,$in0,$outperm # rotate 224 vsel $stage,$outhead,$outtail,$outmask 225 vmr $outhead,$outtail 226 vcipherlast $key,$key,$rcon 227 stvx $stage,0,$out 228 addi $out,$out,16 229 230 vxor $in0,$in0,$tmp 231 vsldoi $tmp,$zero,$tmp,12 # >>32 232 vxor $in0,$in0,$tmp 233 vsldoi $tmp,$zero,$tmp,12 # >>32 234 vxor $in0,$in0,$tmp 235 vadduwm $rcon,$rcon,$rcon 236 vxor $in0,$in0,$key 237 238 vperm $key,$in0,$in0,$mask # rotate-n-splat 239 vsldoi $tmp,$zero,$in0,12 # >>32 240 vperm $outtail,$in0,$in0,$outperm # rotate 241 vsel $stage,$outhead,$outtail,$outmask 242 vmr $outhead,$outtail 243 vcipherlast $key,$key,$rcon 244 stvx $stage,0,$out 245 addi $out,$out,16 246 247 vxor $in0,$in0,$tmp 248 vsldoi $tmp,$zero,$tmp,12 # >>32 249 vxor $in0,$in0,$tmp 250 vsldoi $tmp,$zero,$tmp,12 # >>32 251 vxor $in0,$in0,$tmp 252 vxor $in0,$in0,$key 253 vperm $outtail,$in0,$in0,$outperm # rotate 254 vsel $stage,$outhead,$outtail,$outmask 255 vmr $outhead,$outtail 256 stvx $stage,0,$out 257 258 addi $inp,$out,15 # 15 is not typo 259 addi $out,$out,0x50 260 261 li $rounds,10 262 b Ldone 263 264.align 4 265L192: 266 lvx $tmp,0,$inp 267 li $cnt,4 268 vperm $outtail,$in0,$in0,$outperm # rotate 269 vsel $stage,$outhead,$outtail,$outmask 270 vmr $outhead,$outtail 271 stvx $stage,0,$out 272 addi $out,$out,16 273 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 274 vspltisb $key,8 # borrow $key 275 mtctr $cnt 276 vsububm $mask,$mask,$key # adjust the mask 277 278Loop192: 279 vperm $key,$in1,$in1,$mask # roate-n-splat 280 vsldoi $tmp,$zero,$in0,12 # >>32 281 vcipherlast $key,$key,$rcon 282 283 vxor $in0,$in0,$tmp 284 vsldoi $tmp,$zero,$tmp,12 # >>32 285 vxor $in0,$in0,$tmp 286 vsldoi $tmp,$zero,$tmp,12 # >>32 287 vxor $in0,$in0,$tmp 288 289 vsldoi $stage,$zero,$in1,8 290 vspltw $tmp,$in0,3 291 vxor $tmp,$tmp,$in1 292 vsldoi $in1,$zero,$in1,12 # >>32 293 vadduwm $rcon,$rcon,$rcon 294 vxor $in1,$in1,$tmp 295 vxor $in0,$in0,$key 296 vxor $in1,$in1,$key 297 vsldoi $stage,$stage,$in0,8 298 299 vperm $key,$in1,$in1,$mask # rotate-n-splat 300 vsldoi $tmp,$zero,$in0,12 # >>32 301 vperm $outtail,$stage,$stage,$outperm # rotate 302 vsel $stage,$outhead,$outtail,$outmask 303 vmr $outhead,$outtail 304 vcipherlast $key,$key,$rcon 305 stvx $stage,0,$out 306 addi $out,$out,16 307 308 vsldoi $stage,$in0,$in1,8 309 vxor $in0,$in0,$tmp 310 vsldoi $tmp,$zero,$tmp,12 # >>32 311 vperm $outtail,$stage,$stage,$outperm # rotate 312 vsel $stage,$outhead,$outtail,$outmask 313 vmr $outhead,$outtail 314 vxor $in0,$in0,$tmp 315 vsldoi $tmp,$zero,$tmp,12 # >>32 316 vxor $in0,$in0,$tmp 317 stvx $stage,0,$out 318 addi $out,$out,16 319 320 vspltw $tmp,$in0,3 321 vxor $tmp,$tmp,$in1 322 vsldoi $in1,$zero,$in1,12 # >>32 323 vadduwm $rcon,$rcon,$rcon 324 vxor $in1,$in1,$tmp 325 vxor $in0,$in0,$key 326 vxor $in1,$in1,$key 327 vperm $outtail,$in0,$in0,$outperm # rotate 328 vsel $stage,$outhead,$outtail,$outmask 329 vmr $outhead,$outtail 330 stvx $stage,0,$out 331 addi $inp,$out,15 # 15 is not typo 332 addi $out,$out,16 333 bdnz Loop192 334 335 li $rounds,12 336 addi $out,$out,0x20 337 b Ldone 338 339.align 4 340L256: 341 lvx $tmp,0,$inp 342 li $cnt,7 343 li $rounds,14 344 vperm $outtail,$in0,$in0,$outperm # rotate 345 vsel $stage,$outhead,$outtail,$outmask 346 vmr $outhead,$outtail 347 stvx $stage,0,$out 348 addi $out,$out,16 349 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 350 mtctr $cnt 351 352Loop256: 353 vperm $key,$in1,$in1,$mask # rotate-n-splat 354 vsldoi $tmp,$zero,$in0,12 # >>32 355 vperm $outtail,$in1,$in1,$outperm # rotate 356 vsel $stage,$outhead,$outtail,$outmask 357 vmr $outhead,$outtail 358 vcipherlast $key,$key,$rcon 359 stvx $stage,0,$out 360 addi $out,$out,16 361 362 vxor $in0,$in0,$tmp 363 vsldoi $tmp,$zero,$tmp,12 # >>32 364 vxor $in0,$in0,$tmp 365 vsldoi $tmp,$zero,$tmp,12 # >>32 366 vxor $in0,$in0,$tmp 367 vadduwm $rcon,$rcon,$rcon 368 vxor $in0,$in0,$key 369 vperm $outtail,$in0,$in0,$outperm # rotate 370 vsel $stage,$outhead,$outtail,$outmask 371 vmr $outhead,$outtail 372 stvx $stage,0,$out 373 addi $inp,$out,15 # 15 is not typo 374 addi $out,$out,16 375 bdz Ldone 376 377 vspltw $key,$in0,3 # just splat 378 vsldoi $tmp,$zero,$in1,12 # >>32 379 vsbox $key,$key 380 381 vxor $in1,$in1,$tmp 382 vsldoi $tmp,$zero,$tmp,12 # >>32 383 vxor $in1,$in1,$tmp 384 vsldoi $tmp,$zero,$tmp,12 # >>32 385 vxor $in1,$in1,$tmp 386 387 vxor $in1,$in1,$key 388 b Loop256 389 390.align 4 391Ldone: 392 lvx $in1,0,$inp # redundant in aligned case 393 vsel $in1,$outhead,$in1,$outmask 394 stvx $in1,0,$inp 395 li $ptr,0 396 mtspr 256,$vrsave 397 stw $rounds,0($out) 398 399Lenc_key_abort: 400 mr r3,$ptr 401 blr 402 .long 0 403 .byte 0,12,0x14,1,0,0,3,0 404 .long 0 405.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 406 407.globl .${prefix}_set_decrypt_key 408 $STU $sp,-$FRAME($sp) 409 mflr r10 410 $PUSH r10,$FRAME+$LRSAVE($sp) 411 bl Lset_encrypt_key 412 mtlr r10 413 414 cmpwi r3,0 415 bne- Ldec_key_abort 416 417 slwi $cnt,$rounds,4 418 subi $inp,$out,240 # first round key 419 srwi $rounds,$rounds,1 420 add $out,$inp,$cnt # last round key 421 mtctr $rounds 422 423Ldeckey: 424 lwz r0, 0($inp) 425 lwz r6, 4($inp) 426 lwz r7, 8($inp) 427 lwz r8, 12($inp) 428 addi $inp,$inp,16 429 lwz r9, 0($out) 430 lwz r10,4($out) 431 lwz r11,8($out) 432 lwz r12,12($out) 433 stw r0, 0($out) 434 stw r6, 4($out) 435 stw r7, 8($out) 436 stw r8, 12($out) 437 subi $out,$out,16 438 stw r9, -16($inp) 439 stw r10,-12($inp) 440 stw r11,-8($inp) 441 stw r12,-4($inp) 442 bdnz Ldeckey 443 444 xor r3,r3,r3 # return value 445Ldec_key_abort: 446 addi $sp,$sp,$FRAME 447 blr 448 .long 0 449 .byte 0,12,4,1,0x80,0,3,0 450 .long 0 451.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 452___ 453}}} 454######################################################################### 455{{{ # Single block en- and decrypt procedures # 456sub gen_block () { 457my $dir = shift; 458my $n = $dir eq "de" ? "n" : ""; 459my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 460 461$code.=<<___; 462.globl .${prefix}_${dir}crypt 463 lwz $rounds,240($key) 464 lis r0,0xfc00 465 mfspr $vrsave,256 466 li $idx,15 # 15 is not typo 467 mtspr 256,r0 468 469 lvx v0,0,$inp 470 neg r11,$out 471 lvx v1,$idx,$inp 472 lvsl v2,0,$inp # inpperm 473 le?vspltisb v4,0x0f 474 ?lvsl v3,0,r11 # outperm 475 le?vxor v2,v2,v4 476 li $idx,16 477 vperm v0,v0,v1,v2 # align [and byte swap in LE] 478 lvx v1,0,$key 479 ?lvsl v5,0,$key # keyperm 480 srwi $rounds,$rounds,1 481 lvx v2,$idx,$key 482 addi $idx,$idx,16 483 subi $rounds,$rounds,1 484 ?vperm v1,v1,v2,v5 # align round key 485 486 vxor v0,v0,v1 487 lvx v1,$idx,$key 488 addi $idx,$idx,16 489 mtctr $rounds 490 491Loop_${dir}c: 492 ?vperm v2,v2,v1,v5 493 v${n}cipher v0,v0,v2 494 lvx v2,$idx,$key 495 addi $idx,$idx,16 496 ?vperm v1,v1,v2,v5 497 v${n}cipher v0,v0,v1 498 lvx v1,$idx,$key 499 addi $idx,$idx,16 500 bdnz Loop_${dir}c 501 502 ?vperm v2,v2,v1,v5 503 v${n}cipher v0,v0,v2 504 lvx v2,$idx,$key 505 ?vperm v1,v1,v2,v5 506 v${n}cipherlast v0,v0,v1 507 508 vspltisb v2,-1 509 vxor v1,v1,v1 510 li $idx,15 # 15 is not typo 511 ?vperm v2,v1,v2,v3 # outmask 512 le?vxor v3,v3,v4 513 lvx v1,0,$out # outhead 514 vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 515 vsel v1,v1,v0,v2 516 lvx v4,$idx,$out 517 stvx v1,0,$out 518 vsel v0,v0,v4,v2 519 stvx v0,$idx,$out 520 521 mtspr 256,$vrsave 522 blr 523 .long 0 524 .byte 0,12,0x14,0,0,0,3,0 525 .long 0 526.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 527___ 528} 529&gen_block("en"); 530&gen_block("de"); 531}}} 532######################################################################### 533{{{ # CBC en- and decrypt procedures # 534my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 535my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 536my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 537 map("v$_",(4..10)); 538$code.=<<___; 539.globl .${prefix}_cbc_encrypt 540 ${UCMP}i $len,16 541 bltlr- 542 543 cmpwi $enc,0 # test direction 544 lis r0,0xffe0 545 mfspr $vrsave,256 546 mtspr 256,r0 547 548 li $idx,15 549 vxor $rndkey0,$rndkey0,$rndkey0 550 le?vspltisb $tmp,0x0f 551 552 lvx $ivec,0,$ivp # load [unaligned] iv 553 lvsl $inpperm,0,$ivp 554 lvx $inptail,$idx,$ivp 555 le?vxor $inpperm,$inpperm,$tmp 556 vperm $ivec,$ivec,$inptail,$inpperm 557 558 neg r11,$inp 559 ?lvsl $keyperm,0,$key # prepare for unaligned key 560 lwz $rounds,240($key) 561 562 lvsr $inpperm,0,r11 # prepare for unaligned load 563 lvx $inptail,0,$inp 564 addi $inp,$inp,15 # 15 is not typo 565 le?vxor $inpperm,$inpperm,$tmp 566 567 ?lvsr $outperm,0,$out # prepare for unaligned store 568 vspltisb $outmask,-1 569 lvx $outhead,0,$out 570 ?vperm $outmask,$rndkey0,$outmask,$outperm 571 le?vxor $outperm,$outperm,$tmp 572 573 srwi $rounds,$rounds,1 574 li $idx,16 575 subi $rounds,$rounds,1 576 beq Lcbc_dec 577 578Lcbc_enc: 579 vmr $inout,$inptail 580 lvx $inptail,0,$inp 581 addi $inp,$inp,16 582 mtctr $rounds 583 subi $len,$len,16 # len-=16 584 585 lvx $rndkey0,0,$key 586 vperm $inout,$inout,$inptail,$inpperm 587 lvx $rndkey1,$idx,$key 588 addi $idx,$idx,16 589 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 590 vxor $inout,$inout,$rndkey0 591 lvx $rndkey0,$idx,$key 592 addi $idx,$idx,16 593 vxor $inout,$inout,$ivec 594 595Loop_cbc_enc: 596 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 597 vcipher $inout,$inout,$rndkey1 598 lvx $rndkey1,$idx,$key 599 addi $idx,$idx,16 600 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 601 vcipher $inout,$inout,$rndkey0 602 lvx $rndkey0,$idx,$key 603 addi $idx,$idx,16 604 bdnz Loop_cbc_enc 605 606 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 607 vcipher $inout,$inout,$rndkey1 608 lvx $rndkey1,$idx,$key 609 li $idx,16 610 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 611 vcipherlast $ivec,$inout,$rndkey0 612 ${UCMP}i $len,16 613 614 vperm $tmp,$ivec,$ivec,$outperm 615 vsel $inout,$outhead,$tmp,$outmask 616 vmr $outhead,$tmp 617 stvx $inout,0,$out 618 addi $out,$out,16 619 bge Lcbc_enc 620 621 b Lcbc_done 622 623.align 4 624Lcbc_dec: 625 ${UCMP}i $len,128 626 bge _aesp8_cbc_decrypt8x 627 vmr $tmp,$inptail 628 lvx $inptail,0,$inp 629 addi $inp,$inp,16 630 mtctr $rounds 631 subi $len,$len,16 # len-=16 632 633 lvx $rndkey0,0,$key 634 vperm $tmp,$tmp,$inptail,$inpperm 635 lvx $rndkey1,$idx,$key 636 addi $idx,$idx,16 637 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 638 vxor $inout,$tmp,$rndkey0 639 lvx $rndkey0,$idx,$key 640 addi $idx,$idx,16 641 642Loop_cbc_dec: 643 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 644 vncipher $inout,$inout,$rndkey1 645 lvx $rndkey1,$idx,$key 646 addi $idx,$idx,16 647 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 648 vncipher $inout,$inout,$rndkey0 649 lvx $rndkey0,$idx,$key 650 addi $idx,$idx,16 651 bdnz Loop_cbc_dec 652 653 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 654 vncipher $inout,$inout,$rndkey1 655 lvx $rndkey1,$idx,$key 656 li $idx,16 657 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 658 vncipherlast $inout,$inout,$rndkey0 659 ${UCMP}i $len,16 660 661 vxor $inout,$inout,$ivec 662 vmr $ivec,$tmp 663 vperm $tmp,$inout,$inout,$outperm 664 vsel $inout,$outhead,$tmp,$outmask 665 vmr $outhead,$tmp 666 stvx $inout,0,$out 667 addi $out,$out,16 668 bge Lcbc_dec 669 670Lcbc_done: 671 addi $out,$out,-1 672 lvx $inout,0,$out # redundant in aligned case 673 vsel $inout,$outhead,$inout,$outmask 674 stvx $inout,0,$out 675 676 neg $enc,$ivp # write [unaligned] iv 677 li $idx,15 # 15 is not typo 678 vxor $rndkey0,$rndkey0,$rndkey0 679 vspltisb $outmask,-1 680 le?vspltisb $tmp,0x0f 681 ?lvsl $outperm,0,$enc 682 ?vperm $outmask,$rndkey0,$outmask,$outperm 683 le?vxor $outperm,$outperm,$tmp 684 lvx $outhead,0,$ivp 685 vperm $ivec,$ivec,$ivec,$outperm 686 vsel $inout,$outhead,$ivec,$outmask 687 lvx $inptail,$idx,$ivp 688 stvx $inout,0,$ivp 689 vsel $inout,$ivec,$inptail,$outmask 690 stvx $inout,$idx,$ivp 691 692 mtspr 256,$vrsave 693 blr 694 .long 0 695 .byte 0,12,0x14,0,0,0,6,0 696 .long 0 697___ 698######################################################################### 699{{ # Optimized CBC decrypt procedure # 700my $key_="r11"; 701my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 702my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 703my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 704my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 705 # v26-v31 last 6 round keys 706my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 707 708$code.=<<___; 709.align 5 710_aesp8_cbc_decrypt8x: 711 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 712 li r10,`$FRAME+8*16+15` 713 li r11,`$FRAME+8*16+31` 714 stvx v20,r10,$sp # ABI says so 715 addi r10,r10,32 716 stvx v21,r11,$sp 717 addi r11,r11,32 718 stvx v22,r10,$sp 719 addi r10,r10,32 720 stvx v23,r11,$sp 721 addi r11,r11,32 722 stvx v24,r10,$sp 723 addi r10,r10,32 724 stvx v25,r11,$sp 725 addi r11,r11,32 726 stvx v26,r10,$sp 727 addi r10,r10,32 728 stvx v27,r11,$sp 729 addi r11,r11,32 730 stvx v28,r10,$sp 731 addi r10,r10,32 732 stvx v29,r11,$sp 733 addi r11,r11,32 734 stvx v30,r10,$sp 735 stvx v31,r11,$sp 736 li r0,-1 737 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 738 li $x10,0x10 739 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 740 li $x20,0x20 741 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 742 li $x30,0x30 743 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 744 li $x40,0x40 745 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 746 li $x50,0x50 747 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 748 li $x60,0x60 749 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 750 li $x70,0x70 751 mtspr 256,r0 752 753 subi $rounds,$rounds,3 # -4 in total 754 subi $len,$len,128 # bias 755 756 lvx $rndkey0,$x00,$key # load key schedule 757 lvx v30,$x10,$key 758 addi $key,$key,0x20 759 lvx v31,$x00,$key 760 ?vperm $rndkey0,$rndkey0,v30,$keyperm 761 addi $key_,$sp,$FRAME+15 762 mtctr $rounds 763 764Load_cbc_dec_key: 765 ?vperm v24,v30,v31,$keyperm 766 lvx v30,$x10,$key 767 addi $key,$key,0x20 768 stvx v24,$x00,$key_ # off-load round[1] 769 ?vperm v25,v31,v30,$keyperm 770 lvx v31,$x00,$key 771 stvx v25,$x10,$key_ # off-load round[2] 772 addi $key_,$key_,0x20 773 bdnz Load_cbc_dec_key 774 775 lvx v26,$x10,$key 776 ?vperm v24,v30,v31,$keyperm 777 lvx v27,$x20,$key 778 stvx v24,$x00,$key_ # off-load round[3] 779 ?vperm v25,v31,v26,$keyperm 780 lvx v28,$x30,$key 781 stvx v25,$x10,$key_ # off-load round[4] 782 addi $key_,$sp,$FRAME+15 # rewind $key_ 783 ?vperm v26,v26,v27,$keyperm 784 lvx v29,$x40,$key 785 ?vperm v27,v27,v28,$keyperm 786 lvx v30,$x50,$key 787 ?vperm v28,v28,v29,$keyperm 788 lvx v31,$x60,$key 789 ?vperm v29,v29,v30,$keyperm 790 lvx $out0,$x70,$key # borrow $out0 791 ?vperm v30,v30,v31,$keyperm 792 lvx v24,$x00,$key_ # pre-load round[1] 793 ?vperm v31,v31,$out0,$keyperm 794 lvx v25,$x10,$key_ # pre-load round[2] 795 796 #lvx $inptail,0,$inp # "caller" already did this 797 #addi $inp,$inp,15 # 15 is not typo 798 subi $inp,$inp,15 # undo "caller" 799 800 le?li $idx,8 801 lvx_u $in0,$x00,$inp # load first 8 "words" 802 le?lvsl $inpperm,0,$idx 803 le?vspltisb $tmp,0x0f 804 lvx_u $in1,$x10,$inp 805 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 806 lvx_u $in2,$x20,$inp 807 le?vperm $in0,$in0,$in0,$inpperm 808 lvx_u $in3,$x30,$inp 809 le?vperm $in1,$in1,$in1,$inpperm 810 lvx_u $in4,$x40,$inp 811 le?vperm $in2,$in2,$in2,$inpperm 812 vxor $out0,$in0,$rndkey0 813 lvx_u $in5,$x50,$inp 814 le?vperm $in3,$in3,$in3,$inpperm 815 vxor $out1,$in1,$rndkey0 816 lvx_u $in6,$x60,$inp 817 le?vperm $in4,$in4,$in4,$inpperm 818 vxor $out2,$in2,$rndkey0 819 lvx_u $in7,$x70,$inp 820 addi $inp,$inp,0x80 821 le?vperm $in5,$in5,$in5,$inpperm 822 vxor $out3,$in3,$rndkey0 823 le?vperm $in6,$in6,$in6,$inpperm 824 vxor $out4,$in4,$rndkey0 825 le?vperm $in7,$in7,$in7,$inpperm 826 vxor $out5,$in5,$rndkey0 827 vxor $out6,$in6,$rndkey0 828 vxor $out7,$in7,$rndkey0 829 830 mtctr $rounds 831 b Loop_cbc_dec8x 832.align 5 833Loop_cbc_dec8x: 834 vncipher $out0,$out0,v24 835 vncipher $out1,$out1,v24 836 vncipher $out2,$out2,v24 837 vncipher $out3,$out3,v24 838 vncipher $out4,$out4,v24 839 vncipher $out5,$out5,v24 840 vncipher $out6,$out6,v24 841 vncipher $out7,$out7,v24 842 lvx v24,$x20,$key_ # round[3] 843 addi $key_,$key_,0x20 844 845 vncipher $out0,$out0,v25 846 vncipher $out1,$out1,v25 847 vncipher $out2,$out2,v25 848 vncipher $out3,$out3,v25 849 vncipher $out4,$out4,v25 850 vncipher $out5,$out5,v25 851 vncipher $out6,$out6,v25 852 vncipher $out7,$out7,v25 853 lvx v25,$x10,$key_ # round[4] 854 bdnz Loop_cbc_dec8x 855 856 subic $len,$len,128 # $len-=128 857 vncipher $out0,$out0,v24 858 vncipher $out1,$out1,v24 859 vncipher $out2,$out2,v24 860 vncipher $out3,$out3,v24 861 vncipher $out4,$out4,v24 862 vncipher $out5,$out5,v24 863 vncipher $out6,$out6,v24 864 vncipher $out7,$out7,v24 865 866 subfe. r0,r0,r0 # borrow?-1:0 867 vncipher $out0,$out0,v25 868 vncipher $out1,$out1,v25 869 vncipher $out2,$out2,v25 870 vncipher $out3,$out3,v25 871 vncipher $out4,$out4,v25 872 vncipher $out5,$out5,v25 873 vncipher $out6,$out6,v25 874 vncipher $out7,$out7,v25 875 876 and r0,r0,$len 877 vncipher $out0,$out0,v26 878 vncipher $out1,$out1,v26 879 vncipher $out2,$out2,v26 880 vncipher $out3,$out3,v26 881 vncipher $out4,$out4,v26 882 vncipher $out5,$out5,v26 883 vncipher $out6,$out6,v26 884 vncipher $out7,$out7,v26 885 886 add $inp,$inp,r0 # $inp is adjusted in such 887 # way that at exit from the 888 # loop inX-in7 are loaded 889 # with last "words" 890 vncipher $out0,$out0,v27 891 vncipher $out1,$out1,v27 892 vncipher $out2,$out2,v27 893 vncipher $out3,$out3,v27 894 vncipher $out4,$out4,v27 895 vncipher $out5,$out5,v27 896 vncipher $out6,$out6,v27 897 vncipher $out7,$out7,v27 898 899 addi $key_,$sp,$FRAME+15 # rewind $key_ 900 vncipher $out0,$out0,v28 901 vncipher $out1,$out1,v28 902 vncipher $out2,$out2,v28 903 vncipher $out3,$out3,v28 904 vncipher $out4,$out4,v28 905 vncipher $out5,$out5,v28 906 vncipher $out6,$out6,v28 907 vncipher $out7,$out7,v28 908 lvx v24,$x00,$key_ # re-pre-load round[1] 909 910 vncipher $out0,$out0,v29 911 vncipher $out1,$out1,v29 912 vncipher $out2,$out2,v29 913 vncipher $out3,$out3,v29 914 vncipher $out4,$out4,v29 915 vncipher $out5,$out5,v29 916 vncipher $out6,$out6,v29 917 vncipher $out7,$out7,v29 918 lvx v25,$x10,$key_ # re-pre-load round[2] 919 920 vncipher $out0,$out0,v30 921 vxor $ivec,$ivec,v31 # xor with last round key 922 vncipher $out1,$out1,v30 923 vxor $in0,$in0,v31 924 vncipher $out2,$out2,v30 925 vxor $in1,$in1,v31 926 vncipher $out3,$out3,v30 927 vxor $in2,$in2,v31 928 vncipher $out4,$out4,v30 929 vxor $in3,$in3,v31 930 vncipher $out5,$out5,v30 931 vxor $in4,$in4,v31 932 vncipher $out6,$out6,v30 933 vxor $in5,$in5,v31 934 vncipher $out7,$out7,v30 935 vxor $in6,$in6,v31 936 937 vncipherlast $out0,$out0,$ivec 938 vncipherlast $out1,$out1,$in0 939 lvx_u $in0,$x00,$inp # load next input block 940 vncipherlast $out2,$out2,$in1 941 lvx_u $in1,$x10,$inp 942 vncipherlast $out3,$out3,$in2 943 le?vperm $in0,$in0,$in0,$inpperm 944 lvx_u $in2,$x20,$inp 945 vncipherlast $out4,$out4,$in3 946 le?vperm $in1,$in1,$in1,$inpperm 947 lvx_u $in3,$x30,$inp 948 vncipherlast $out5,$out5,$in4 949 le?vperm $in2,$in2,$in2,$inpperm 950 lvx_u $in4,$x40,$inp 951 vncipherlast $out6,$out6,$in5 952 le?vperm $in3,$in3,$in3,$inpperm 953 lvx_u $in5,$x50,$inp 954 vncipherlast $out7,$out7,$in6 955 le?vperm $in4,$in4,$in4,$inpperm 956 lvx_u $in6,$x60,$inp 957 vmr $ivec,$in7 958 le?vperm $in5,$in5,$in5,$inpperm 959 lvx_u $in7,$x70,$inp 960 addi $inp,$inp,0x80 961 962 le?vperm $out0,$out0,$out0,$inpperm 963 le?vperm $out1,$out1,$out1,$inpperm 964 stvx_u $out0,$x00,$out 965 le?vperm $in6,$in6,$in6,$inpperm 966 vxor $out0,$in0,$rndkey0 967 le?vperm $out2,$out2,$out2,$inpperm 968 stvx_u $out1,$x10,$out 969 le?vperm $in7,$in7,$in7,$inpperm 970 vxor $out1,$in1,$rndkey0 971 le?vperm $out3,$out3,$out3,$inpperm 972 stvx_u $out2,$x20,$out 973 vxor $out2,$in2,$rndkey0 974 le?vperm $out4,$out4,$out4,$inpperm 975 stvx_u $out3,$x30,$out 976 vxor $out3,$in3,$rndkey0 977 le?vperm $out5,$out5,$out5,$inpperm 978 stvx_u $out4,$x40,$out 979 vxor $out4,$in4,$rndkey0 980 le?vperm $out6,$out6,$out6,$inpperm 981 stvx_u $out5,$x50,$out 982 vxor $out5,$in5,$rndkey0 983 le?vperm $out7,$out7,$out7,$inpperm 984 stvx_u $out6,$x60,$out 985 vxor $out6,$in6,$rndkey0 986 stvx_u $out7,$x70,$out 987 addi $out,$out,0x80 988 vxor $out7,$in7,$rndkey0 989 990 mtctr $rounds 991 beq Loop_cbc_dec8x # did $len-=128 borrow? 992 993 addic. $len,$len,128 994 beq Lcbc_dec8x_done 995 nop 996 nop 997 998Loop_cbc_dec8x_tail: # up to 7 "words" tail... 999 vncipher $out1,$out1,v24 1000 vncipher $out2,$out2,v24 1001 vncipher $out3,$out3,v24 1002 vncipher $out4,$out4,v24 1003 vncipher $out5,$out5,v24 1004 vncipher $out6,$out6,v24 1005 vncipher $out7,$out7,v24 1006 lvx v24,$x20,$key_ # round[3] 1007 addi $key_,$key_,0x20 1008 1009 vncipher $out1,$out1,v25 1010 vncipher $out2,$out2,v25 1011 vncipher $out3,$out3,v25 1012 vncipher $out4,$out4,v25 1013 vncipher $out5,$out5,v25 1014 vncipher $out6,$out6,v25 1015 vncipher $out7,$out7,v25 1016 lvx v25,$x10,$key_ # round[4] 1017 bdnz Loop_cbc_dec8x_tail 1018 1019 vncipher $out1,$out1,v24 1020 vncipher $out2,$out2,v24 1021 vncipher $out3,$out3,v24 1022 vncipher $out4,$out4,v24 1023 vncipher $out5,$out5,v24 1024 vncipher $out6,$out6,v24 1025 vncipher $out7,$out7,v24 1026 1027 vncipher $out1,$out1,v25 1028 vncipher $out2,$out2,v25 1029 vncipher $out3,$out3,v25 1030 vncipher $out4,$out4,v25 1031 vncipher $out5,$out5,v25 1032 vncipher $out6,$out6,v25 1033 vncipher $out7,$out7,v25 1034 1035 vncipher $out1,$out1,v26 1036 vncipher $out2,$out2,v26 1037 vncipher $out3,$out3,v26 1038 vncipher $out4,$out4,v26 1039 vncipher $out5,$out5,v26 1040 vncipher $out6,$out6,v26 1041 vncipher $out7,$out7,v26 1042 1043 vncipher $out1,$out1,v27 1044 vncipher $out2,$out2,v27 1045 vncipher $out3,$out3,v27 1046 vncipher $out4,$out4,v27 1047 vncipher $out5,$out5,v27 1048 vncipher $out6,$out6,v27 1049 vncipher $out7,$out7,v27 1050 1051 vncipher $out1,$out1,v28 1052 vncipher $out2,$out2,v28 1053 vncipher $out3,$out3,v28 1054 vncipher $out4,$out4,v28 1055 vncipher $out5,$out5,v28 1056 vncipher $out6,$out6,v28 1057 vncipher $out7,$out7,v28 1058 1059 vncipher $out1,$out1,v29 1060 vncipher $out2,$out2,v29 1061 vncipher $out3,$out3,v29 1062 vncipher $out4,$out4,v29 1063 vncipher $out5,$out5,v29 1064 vncipher $out6,$out6,v29 1065 vncipher $out7,$out7,v29 1066 1067 vncipher $out1,$out1,v30 1068 vxor $ivec,$ivec,v31 # last round key 1069 vncipher $out2,$out2,v30 1070 vxor $in1,$in1,v31 1071 vncipher $out3,$out3,v30 1072 vxor $in2,$in2,v31 1073 vncipher $out4,$out4,v30 1074 vxor $in3,$in3,v31 1075 vncipher $out5,$out5,v30 1076 vxor $in4,$in4,v31 1077 vncipher $out6,$out6,v30 1078 vxor $in5,$in5,v31 1079 vncipher $out7,$out7,v30 1080 vxor $in6,$in6,v31 1081 1082 cmplwi $len,32 # switch($len) 1083 blt Lcbc_dec8x_one 1084 nop 1085 beq Lcbc_dec8x_two 1086 cmplwi $len,64 1087 blt Lcbc_dec8x_three 1088 nop 1089 beq Lcbc_dec8x_four 1090 cmplwi $len,96 1091 blt Lcbc_dec8x_five 1092 nop 1093 beq Lcbc_dec8x_six 1094 1095Lcbc_dec8x_seven: 1096 vncipherlast $out1,$out1,$ivec 1097 vncipherlast $out2,$out2,$in1 1098 vncipherlast $out3,$out3,$in2 1099 vncipherlast $out4,$out4,$in3 1100 vncipherlast $out5,$out5,$in4 1101 vncipherlast $out6,$out6,$in5 1102 vncipherlast $out7,$out7,$in6 1103 vmr $ivec,$in7 1104 1105 le?vperm $out1,$out1,$out1,$inpperm 1106 le?vperm $out2,$out2,$out2,$inpperm 1107 stvx_u $out1,$x00,$out 1108 le?vperm $out3,$out3,$out3,$inpperm 1109 stvx_u $out2,$x10,$out 1110 le?vperm $out4,$out4,$out4,$inpperm 1111 stvx_u $out3,$x20,$out 1112 le?vperm $out5,$out5,$out5,$inpperm 1113 stvx_u $out4,$x30,$out 1114 le?vperm $out6,$out6,$out6,$inpperm 1115 stvx_u $out5,$x40,$out 1116 le?vperm $out7,$out7,$out7,$inpperm 1117 stvx_u $out6,$x50,$out 1118 stvx_u $out7,$x60,$out 1119 addi $out,$out,0x70 1120 b Lcbc_dec8x_done 1121 1122.align 5 1123Lcbc_dec8x_six: 1124 vncipherlast $out2,$out2,$ivec 1125 vncipherlast $out3,$out3,$in2 1126 vncipherlast $out4,$out4,$in3 1127 vncipherlast $out5,$out5,$in4 1128 vncipherlast $out6,$out6,$in5 1129 vncipherlast $out7,$out7,$in6 1130 vmr $ivec,$in7 1131 1132 le?vperm $out2,$out2,$out2,$inpperm 1133 le?vperm $out3,$out3,$out3,$inpperm 1134 stvx_u $out2,$x00,$out 1135 le?vperm $out4,$out4,$out4,$inpperm 1136 stvx_u $out3,$x10,$out 1137 le?vperm $out5,$out5,$out5,$inpperm 1138 stvx_u $out4,$x20,$out 1139 le?vperm $out6,$out6,$out6,$inpperm 1140 stvx_u $out5,$x30,$out 1141 le?vperm $out7,$out7,$out7,$inpperm 1142 stvx_u $out6,$x40,$out 1143 stvx_u $out7,$x50,$out 1144 addi $out,$out,0x60 1145 b Lcbc_dec8x_done 1146 1147.align 5 1148Lcbc_dec8x_five: 1149 vncipherlast $out3,$out3,$ivec 1150 vncipherlast $out4,$out4,$in3 1151 vncipherlast $out5,$out5,$in4 1152 vncipherlast $out6,$out6,$in5 1153 vncipherlast $out7,$out7,$in6 1154 vmr $ivec,$in7 1155 1156 le?vperm $out3,$out3,$out3,$inpperm 1157 le?vperm $out4,$out4,$out4,$inpperm 1158 stvx_u $out3,$x00,$out 1159 le?vperm $out5,$out5,$out5,$inpperm 1160 stvx_u $out4,$x10,$out 1161 le?vperm $out6,$out6,$out6,$inpperm 1162 stvx_u $out5,$x20,$out 1163 le?vperm $out7,$out7,$out7,$inpperm 1164 stvx_u $out6,$x30,$out 1165 stvx_u $out7,$x40,$out 1166 addi $out,$out,0x50 1167 b Lcbc_dec8x_done 1168 1169.align 5 1170Lcbc_dec8x_four: 1171 vncipherlast $out4,$out4,$ivec 1172 vncipherlast $out5,$out5,$in4 1173 vncipherlast $out6,$out6,$in5 1174 vncipherlast $out7,$out7,$in6 1175 vmr $ivec,$in7 1176 1177 le?vperm $out4,$out4,$out4,$inpperm 1178 le?vperm $out5,$out5,$out5,$inpperm 1179 stvx_u $out4,$x00,$out 1180 le?vperm $out6,$out6,$out6,$inpperm 1181 stvx_u $out5,$x10,$out 1182 le?vperm $out7,$out7,$out7,$inpperm 1183 stvx_u $out6,$x20,$out 1184 stvx_u $out7,$x30,$out 1185 addi $out,$out,0x40 1186 b Lcbc_dec8x_done 1187 1188.align 5 1189Lcbc_dec8x_three: 1190 vncipherlast $out5,$out5,$ivec 1191 vncipherlast $out6,$out6,$in5 1192 vncipherlast $out7,$out7,$in6 1193 vmr $ivec,$in7 1194 1195 le?vperm $out5,$out5,$out5,$inpperm 1196 le?vperm $out6,$out6,$out6,$inpperm 1197 stvx_u $out5,$x00,$out 1198 le?vperm $out7,$out7,$out7,$inpperm 1199 stvx_u $out6,$x10,$out 1200 stvx_u $out7,$x20,$out 1201 addi $out,$out,0x30 1202 b Lcbc_dec8x_done 1203 1204.align 5 1205Lcbc_dec8x_two: 1206 vncipherlast $out6,$out6,$ivec 1207 vncipherlast $out7,$out7,$in6 1208 vmr $ivec,$in7 1209 1210 le?vperm $out6,$out6,$out6,$inpperm 1211 le?vperm $out7,$out7,$out7,$inpperm 1212 stvx_u $out6,$x00,$out 1213 stvx_u $out7,$x10,$out 1214 addi $out,$out,0x20 1215 b Lcbc_dec8x_done 1216 1217.align 5 1218Lcbc_dec8x_one: 1219 vncipherlast $out7,$out7,$ivec 1220 vmr $ivec,$in7 1221 1222 le?vperm $out7,$out7,$out7,$inpperm 1223 stvx_u $out7,0,$out 1224 addi $out,$out,0x10 1225 1226Lcbc_dec8x_done: 1227 le?vperm $ivec,$ivec,$ivec,$inpperm 1228 stvx_u $ivec,0,$ivp # write [unaligned] iv 1229 1230 li r10,`$FRAME+15` 1231 li r11,`$FRAME+31` 1232 stvx $inpperm,r10,$sp # wipe copies of round keys 1233 addi r10,r10,32 1234 stvx $inpperm,r11,$sp 1235 addi r11,r11,32 1236 stvx $inpperm,r10,$sp 1237 addi r10,r10,32 1238 stvx $inpperm,r11,$sp 1239 addi r11,r11,32 1240 stvx $inpperm,r10,$sp 1241 addi r10,r10,32 1242 stvx $inpperm,r11,$sp 1243 addi r11,r11,32 1244 stvx $inpperm,r10,$sp 1245 addi r10,r10,32 1246 stvx $inpperm,r11,$sp 1247 addi r11,r11,32 1248 1249 mtspr 256,$vrsave 1250 lvx v20,r10,$sp # ABI says so 1251 addi r10,r10,32 1252 lvx v21,r11,$sp 1253 addi r11,r11,32 1254 lvx v22,r10,$sp 1255 addi r10,r10,32 1256 lvx v23,r11,$sp 1257 addi r11,r11,32 1258 lvx v24,r10,$sp 1259 addi r10,r10,32 1260 lvx v25,r11,$sp 1261 addi r11,r11,32 1262 lvx v26,r10,$sp 1263 addi r10,r10,32 1264 lvx v27,r11,$sp 1265 addi r11,r11,32 1266 lvx v28,r10,$sp 1267 addi r10,r10,32 1268 lvx v29,r11,$sp 1269 addi r11,r11,32 1270 lvx v30,r10,$sp 1271 lvx v31,r11,$sp 1272 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1273 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1274 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1275 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1276 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1277 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1278 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1279 blr 1280 .long 0 1281 .byte 0,12,0x14,0,0x80,6,6,0 1282 .long 0 1283.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 1284___ 1285}} }}} 1286 1287######################################################################### 1288{{{ # CTR procedure[s] # 1289 1290####################### WARNING: Here be dragons! ####################### 1291# 1292# This code is written as 'ctr32', based on a 32-bit counter used 1293# upstream. The kernel does *not* use a 32-bit counter. The kernel uses 1294# a 128-bit counter. 1295# 1296# This leads to subtle changes from the upstream code: the counter 1297# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in 1298# both the bulk (8 blocks at a time) path, and in the individual block 1299# path. Be aware of this when doing updates. 1300# 1301# See: 1302# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug") 1303# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword") 1304# https://github.com/openssl/openssl/pull/8942 1305# 1306######################################################################### 1307my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 1308my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 1309my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 1310 map("v$_",(4..11)); 1311my $dat=$tmp; 1312 1313$code.=<<___; 1314.globl .${prefix}_ctr32_encrypt_blocks 1315 ${UCMP}i $len,1 1316 bltlr- 1317 1318 lis r0,0xfff0 1319 mfspr $vrsave,256 1320 mtspr 256,r0 1321 1322 li $idx,15 1323 vxor $rndkey0,$rndkey0,$rndkey0 1324 le?vspltisb $tmp,0x0f 1325 1326 lvx $ivec,0,$ivp # load [unaligned] iv 1327 lvsl $inpperm,0,$ivp 1328 lvx $inptail,$idx,$ivp 1329 vspltisb $one,1 1330 le?vxor $inpperm,$inpperm,$tmp 1331 vperm $ivec,$ivec,$inptail,$inpperm 1332 vsldoi $one,$rndkey0,$one,1 1333 1334 neg r11,$inp 1335 ?lvsl $keyperm,0,$key # prepare for unaligned key 1336 lwz $rounds,240($key) 1337 1338 lvsr $inpperm,0,r11 # prepare for unaligned load 1339 lvx $inptail,0,$inp 1340 addi $inp,$inp,15 # 15 is not typo 1341 le?vxor $inpperm,$inpperm,$tmp 1342 1343 srwi $rounds,$rounds,1 1344 li $idx,16 1345 subi $rounds,$rounds,1 1346 1347 ${UCMP}i $len,8 1348 bge _aesp8_ctr32_encrypt8x 1349 1350 ?lvsr $outperm,0,$out # prepare for unaligned store 1351 vspltisb $outmask,-1 1352 lvx $outhead,0,$out 1353 ?vperm $outmask,$rndkey0,$outmask,$outperm 1354 le?vxor $outperm,$outperm,$tmp 1355 1356 lvx $rndkey0,0,$key 1357 mtctr $rounds 1358 lvx $rndkey1,$idx,$key 1359 addi $idx,$idx,16 1360 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1361 vxor $inout,$ivec,$rndkey0 1362 lvx $rndkey0,$idx,$key 1363 addi $idx,$idx,16 1364 b Loop_ctr32_enc 1365 1366.align 5 1367Loop_ctr32_enc: 1368 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1369 vcipher $inout,$inout,$rndkey1 1370 lvx $rndkey1,$idx,$key 1371 addi $idx,$idx,16 1372 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1373 vcipher $inout,$inout,$rndkey0 1374 lvx $rndkey0,$idx,$key 1375 addi $idx,$idx,16 1376 bdnz Loop_ctr32_enc 1377 1378 vadduqm $ivec,$ivec,$one # Kernel change for 128-bit 1379 vmr $dat,$inptail 1380 lvx $inptail,0,$inp 1381 addi $inp,$inp,16 1382 subic. $len,$len,1 # blocks-- 1383 1384 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1385 vcipher $inout,$inout,$rndkey1 1386 lvx $rndkey1,$idx,$key 1387 vperm $dat,$dat,$inptail,$inpperm 1388 li $idx,16 1389 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 1390 lvx $rndkey0,0,$key 1391 vxor $dat,$dat,$rndkey1 # last round key 1392 vcipherlast $inout,$inout,$dat 1393 1394 lvx $rndkey1,$idx,$key 1395 addi $idx,$idx,16 1396 vperm $inout,$inout,$inout,$outperm 1397 vsel $dat,$outhead,$inout,$outmask 1398 mtctr $rounds 1399 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1400 vmr $outhead,$inout 1401 vxor $inout,$ivec,$rndkey0 1402 lvx $rndkey0,$idx,$key 1403 addi $idx,$idx,16 1404 stvx $dat,0,$out 1405 addi $out,$out,16 1406 bne Loop_ctr32_enc 1407 1408 addi $out,$out,-1 1409 lvx $inout,0,$out # redundant in aligned case 1410 vsel $inout,$outhead,$inout,$outmask 1411 stvx $inout,0,$out 1412 1413 mtspr 256,$vrsave 1414 blr 1415 .long 0 1416 .byte 0,12,0x14,0,0,0,6,0 1417 .long 0 1418___ 1419######################################################################### 1420{{ # Optimized CTR procedure # 1421my $key_="r11"; 1422my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 1423my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 1424my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 1425my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 1426 # v26-v31 last 6 round keys 1427my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 1428my ($two,$three,$four)=($outhead,$outperm,$outmask); 1429 1430$code.=<<___; 1431.align 5 1432_aesp8_ctr32_encrypt8x: 1433 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 1434 li r10,`$FRAME+8*16+15` 1435 li r11,`$FRAME+8*16+31` 1436 stvx v20,r10,$sp # ABI says so 1437 addi r10,r10,32 1438 stvx v21,r11,$sp 1439 addi r11,r11,32 1440 stvx v22,r10,$sp 1441 addi r10,r10,32 1442 stvx v23,r11,$sp 1443 addi r11,r11,32 1444 stvx v24,r10,$sp 1445 addi r10,r10,32 1446 stvx v25,r11,$sp 1447 addi r11,r11,32 1448 stvx v26,r10,$sp 1449 addi r10,r10,32 1450 stvx v27,r11,$sp 1451 addi r11,r11,32 1452 stvx v28,r10,$sp 1453 addi r10,r10,32 1454 stvx v29,r11,$sp 1455 addi r11,r11,32 1456 stvx v30,r10,$sp 1457 stvx v31,r11,$sp 1458 li r0,-1 1459 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 1460 li $x10,0x10 1461 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1462 li $x20,0x20 1463 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1464 li $x30,0x30 1465 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1466 li $x40,0x40 1467 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1468 li $x50,0x50 1469 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1470 li $x60,0x60 1471 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1472 li $x70,0x70 1473 mtspr 256,r0 1474 1475 subi $rounds,$rounds,3 # -4 in total 1476 1477 lvx $rndkey0,$x00,$key # load key schedule 1478 lvx v30,$x10,$key 1479 addi $key,$key,0x20 1480 lvx v31,$x00,$key 1481 ?vperm $rndkey0,$rndkey0,v30,$keyperm 1482 addi $key_,$sp,$FRAME+15 1483 mtctr $rounds 1484 1485Load_ctr32_enc_key: 1486 ?vperm v24,v30,v31,$keyperm 1487 lvx v30,$x10,$key 1488 addi $key,$key,0x20 1489 stvx v24,$x00,$key_ # off-load round[1] 1490 ?vperm v25,v31,v30,$keyperm 1491 lvx v31,$x00,$key 1492 stvx v25,$x10,$key_ # off-load round[2] 1493 addi $key_,$key_,0x20 1494 bdnz Load_ctr32_enc_key 1495 1496 lvx v26,$x10,$key 1497 ?vperm v24,v30,v31,$keyperm 1498 lvx v27,$x20,$key 1499 stvx v24,$x00,$key_ # off-load round[3] 1500 ?vperm v25,v31,v26,$keyperm 1501 lvx v28,$x30,$key 1502 stvx v25,$x10,$key_ # off-load round[4] 1503 addi $key_,$sp,$FRAME+15 # rewind $key_ 1504 ?vperm v26,v26,v27,$keyperm 1505 lvx v29,$x40,$key 1506 ?vperm v27,v27,v28,$keyperm 1507 lvx v30,$x50,$key 1508 ?vperm v28,v28,v29,$keyperm 1509 lvx v31,$x60,$key 1510 ?vperm v29,v29,v30,$keyperm 1511 lvx $out0,$x70,$key # borrow $out0 1512 ?vperm v30,v30,v31,$keyperm 1513 lvx v24,$x00,$key_ # pre-load round[1] 1514 ?vperm v31,v31,$out0,$keyperm 1515 lvx v25,$x10,$key_ # pre-load round[2] 1516 1517 vadduqm $two,$one,$one 1518 subi $inp,$inp,15 # undo "caller" 1519 $SHL $len,$len,4 1520 1521 vadduqm $out1,$ivec,$one # counter values ... 1522 vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit) 1523 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1524 le?li $idx,8 1525 vadduqm $out3,$out1,$two 1526 vxor $out1,$out1,$rndkey0 1527 le?lvsl $inpperm,0,$idx 1528 vadduqm $out4,$out2,$two 1529 vxor $out2,$out2,$rndkey0 1530 le?vspltisb $tmp,0x0f 1531 vadduqm $out5,$out3,$two 1532 vxor $out3,$out3,$rndkey0 1533 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 1534 vadduqm $out6,$out4,$two 1535 vxor $out4,$out4,$rndkey0 1536 vadduqm $out7,$out5,$two 1537 vxor $out5,$out5,$rndkey0 1538 vadduqm $ivec,$out6,$two # next counter value 1539 vxor $out6,$out6,$rndkey0 1540 vxor $out7,$out7,$rndkey0 1541 1542 mtctr $rounds 1543 b Loop_ctr32_enc8x 1544.align 5 1545Loop_ctr32_enc8x: 1546 vcipher $out0,$out0,v24 1547 vcipher $out1,$out1,v24 1548 vcipher $out2,$out2,v24 1549 vcipher $out3,$out3,v24 1550 vcipher $out4,$out4,v24 1551 vcipher $out5,$out5,v24 1552 vcipher $out6,$out6,v24 1553 vcipher $out7,$out7,v24 1554Loop_ctr32_enc8x_middle: 1555 lvx v24,$x20,$key_ # round[3] 1556 addi $key_,$key_,0x20 1557 1558 vcipher $out0,$out0,v25 1559 vcipher $out1,$out1,v25 1560 vcipher $out2,$out2,v25 1561 vcipher $out3,$out3,v25 1562 vcipher $out4,$out4,v25 1563 vcipher $out5,$out5,v25 1564 vcipher $out6,$out6,v25 1565 vcipher $out7,$out7,v25 1566 lvx v25,$x10,$key_ # round[4] 1567 bdnz Loop_ctr32_enc8x 1568 1569 subic r11,$len,256 # $len-256, borrow $key_ 1570 vcipher $out0,$out0,v24 1571 vcipher $out1,$out1,v24 1572 vcipher $out2,$out2,v24 1573 vcipher $out3,$out3,v24 1574 vcipher $out4,$out4,v24 1575 vcipher $out5,$out5,v24 1576 vcipher $out6,$out6,v24 1577 vcipher $out7,$out7,v24 1578 1579 subfe r0,r0,r0 # borrow?-1:0 1580 vcipher $out0,$out0,v25 1581 vcipher $out1,$out1,v25 1582 vcipher $out2,$out2,v25 1583 vcipher $out3,$out3,v25 1584 vcipher $out4,$out4,v25 1585 vcipher $out5,$out5,v25 1586 vcipher $out6,$out6,v25 1587 vcipher $out7,$out7,v25 1588 1589 and r0,r0,r11 1590 addi $key_,$sp,$FRAME+15 # rewind $key_ 1591 vcipher $out0,$out0,v26 1592 vcipher $out1,$out1,v26 1593 vcipher $out2,$out2,v26 1594 vcipher $out3,$out3,v26 1595 vcipher $out4,$out4,v26 1596 vcipher $out5,$out5,v26 1597 vcipher $out6,$out6,v26 1598 vcipher $out7,$out7,v26 1599 lvx v24,$x00,$key_ # re-pre-load round[1] 1600 1601 subic $len,$len,129 # $len-=129 1602 vcipher $out0,$out0,v27 1603 addi $len,$len,1 # $len-=128 really 1604 vcipher $out1,$out1,v27 1605 vcipher $out2,$out2,v27 1606 vcipher $out3,$out3,v27 1607 vcipher $out4,$out4,v27 1608 vcipher $out5,$out5,v27 1609 vcipher $out6,$out6,v27 1610 vcipher $out7,$out7,v27 1611 lvx v25,$x10,$key_ # re-pre-load round[2] 1612 1613 vcipher $out0,$out0,v28 1614 lvx_u $in0,$x00,$inp # load input 1615 vcipher $out1,$out1,v28 1616 lvx_u $in1,$x10,$inp 1617 vcipher $out2,$out2,v28 1618 lvx_u $in2,$x20,$inp 1619 vcipher $out3,$out3,v28 1620 lvx_u $in3,$x30,$inp 1621 vcipher $out4,$out4,v28 1622 lvx_u $in4,$x40,$inp 1623 vcipher $out5,$out5,v28 1624 lvx_u $in5,$x50,$inp 1625 vcipher $out6,$out6,v28 1626 lvx_u $in6,$x60,$inp 1627 vcipher $out7,$out7,v28 1628 lvx_u $in7,$x70,$inp 1629 addi $inp,$inp,0x80 1630 1631 vcipher $out0,$out0,v29 1632 le?vperm $in0,$in0,$in0,$inpperm 1633 vcipher $out1,$out1,v29 1634 le?vperm $in1,$in1,$in1,$inpperm 1635 vcipher $out2,$out2,v29 1636 le?vperm $in2,$in2,$in2,$inpperm 1637 vcipher $out3,$out3,v29 1638 le?vperm $in3,$in3,$in3,$inpperm 1639 vcipher $out4,$out4,v29 1640 le?vperm $in4,$in4,$in4,$inpperm 1641 vcipher $out5,$out5,v29 1642 le?vperm $in5,$in5,$in5,$inpperm 1643 vcipher $out6,$out6,v29 1644 le?vperm $in6,$in6,$in6,$inpperm 1645 vcipher $out7,$out7,v29 1646 le?vperm $in7,$in7,$in7,$inpperm 1647 1648 add $inp,$inp,r0 # $inp is adjusted in such 1649 # way that at exit from the 1650 # loop inX-in7 are loaded 1651 # with last "words" 1652 subfe. r0,r0,r0 # borrow?-1:0 1653 vcipher $out0,$out0,v30 1654 vxor $in0,$in0,v31 # xor with last round key 1655 vcipher $out1,$out1,v30 1656 vxor $in1,$in1,v31 1657 vcipher $out2,$out2,v30 1658 vxor $in2,$in2,v31 1659 vcipher $out3,$out3,v30 1660 vxor $in3,$in3,v31 1661 vcipher $out4,$out4,v30 1662 vxor $in4,$in4,v31 1663 vcipher $out5,$out5,v30 1664 vxor $in5,$in5,v31 1665 vcipher $out6,$out6,v30 1666 vxor $in6,$in6,v31 1667 vcipher $out7,$out7,v30 1668 vxor $in7,$in7,v31 1669 1670 bne Lctr32_enc8x_break # did $len-129 borrow? 1671 1672 vcipherlast $in0,$out0,$in0 1673 vcipherlast $in1,$out1,$in1 1674 vadduqm $out1,$ivec,$one # counter values ... 1675 vcipherlast $in2,$out2,$in2 1676 vadduqm $out2,$ivec,$two 1677 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1678 vcipherlast $in3,$out3,$in3 1679 vadduqm $out3,$out1,$two 1680 vxor $out1,$out1,$rndkey0 1681 vcipherlast $in4,$out4,$in4 1682 vadduqm $out4,$out2,$two 1683 vxor $out2,$out2,$rndkey0 1684 vcipherlast $in5,$out5,$in5 1685 vadduqm $out5,$out3,$two 1686 vxor $out3,$out3,$rndkey0 1687 vcipherlast $in6,$out6,$in6 1688 vadduqm $out6,$out4,$two 1689 vxor $out4,$out4,$rndkey0 1690 vcipherlast $in7,$out7,$in7 1691 vadduqm $out7,$out5,$two 1692 vxor $out5,$out5,$rndkey0 1693 le?vperm $in0,$in0,$in0,$inpperm 1694 vadduqm $ivec,$out6,$two # next counter value 1695 vxor $out6,$out6,$rndkey0 1696 le?vperm $in1,$in1,$in1,$inpperm 1697 vxor $out7,$out7,$rndkey0 1698 mtctr $rounds 1699 1700 vcipher $out0,$out0,v24 1701 stvx_u $in0,$x00,$out 1702 le?vperm $in2,$in2,$in2,$inpperm 1703 vcipher $out1,$out1,v24 1704 stvx_u $in1,$x10,$out 1705 le?vperm $in3,$in3,$in3,$inpperm 1706 vcipher $out2,$out2,v24 1707 stvx_u $in2,$x20,$out 1708 le?vperm $in4,$in4,$in4,$inpperm 1709 vcipher $out3,$out3,v24 1710 stvx_u $in3,$x30,$out 1711 le?vperm $in5,$in5,$in5,$inpperm 1712 vcipher $out4,$out4,v24 1713 stvx_u $in4,$x40,$out 1714 le?vperm $in6,$in6,$in6,$inpperm 1715 vcipher $out5,$out5,v24 1716 stvx_u $in5,$x50,$out 1717 le?vperm $in7,$in7,$in7,$inpperm 1718 vcipher $out6,$out6,v24 1719 stvx_u $in6,$x60,$out 1720 vcipher $out7,$out7,v24 1721 stvx_u $in7,$x70,$out 1722 addi $out,$out,0x80 1723 1724 b Loop_ctr32_enc8x_middle 1725 1726.align 5 1727Lctr32_enc8x_break: 1728 cmpwi $len,-0x60 1729 blt Lctr32_enc8x_one 1730 nop 1731 beq Lctr32_enc8x_two 1732 cmpwi $len,-0x40 1733 blt Lctr32_enc8x_three 1734 nop 1735 beq Lctr32_enc8x_four 1736 cmpwi $len,-0x20 1737 blt Lctr32_enc8x_five 1738 nop 1739 beq Lctr32_enc8x_six 1740 cmpwi $len,0x00 1741 blt Lctr32_enc8x_seven 1742 1743Lctr32_enc8x_eight: 1744 vcipherlast $out0,$out0,$in0 1745 vcipherlast $out1,$out1,$in1 1746 vcipherlast $out2,$out2,$in2 1747 vcipherlast $out3,$out3,$in3 1748 vcipherlast $out4,$out4,$in4 1749 vcipherlast $out5,$out5,$in5 1750 vcipherlast $out6,$out6,$in6 1751 vcipherlast $out7,$out7,$in7 1752 1753 le?vperm $out0,$out0,$out0,$inpperm 1754 le?vperm $out1,$out1,$out1,$inpperm 1755 stvx_u $out0,$x00,$out 1756 le?vperm $out2,$out2,$out2,$inpperm 1757 stvx_u $out1,$x10,$out 1758 le?vperm $out3,$out3,$out3,$inpperm 1759 stvx_u $out2,$x20,$out 1760 le?vperm $out4,$out4,$out4,$inpperm 1761 stvx_u $out3,$x30,$out 1762 le?vperm $out5,$out5,$out5,$inpperm 1763 stvx_u $out4,$x40,$out 1764 le?vperm $out6,$out6,$out6,$inpperm 1765 stvx_u $out5,$x50,$out 1766 le?vperm $out7,$out7,$out7,$inpperm 1767 stvx_u $out6,$x60,$out 1768 stvx_u $out7,$x70,$out 1769 addi $out,$out,0x80 1770 b Lctr32_enc8x_done 1771 1772.align 5 1773Lctr32_enc8x_seven: 1774 vcipherlast $out0,$out0,$in1 1775 vcipherlast $out1,$out1,$in2 1776 vcipherlast $out2,$out2,$in3 1777 vcipherlast $out3,$out3,$in4 1778 vcipherlast $out4,$out4,$in5 1779 vcipherlast $out5,$out5,$in6 1780 vcipherlast $out6,$out6,$in7 1781 1782 le?vperm $out0,$out0,$out0,$inpperm 1783 le?vperm $out1,$out1,$out1,$inpperm 1784 stvx_u $out0,$x00,$out 1785 le?vperm $out2,$out2,$out2,$inpperm 1786 stvx_u $out1,$x10,$out 1787 le?vperm $out3,$out3,$out3,$inpperm 1788 stvx_u $out2,$x20,$out 1789 le?vperm $out4,$out4,$out4,$inpperm 1790 stvx_u $out3,$x30,$out 1791 le?vperm $out5,$out5,$out5,$inpperm 1792 stvx_u $out4,$x40,$out 1793 le?vperm $out6,$out6,$out6,$inpperm 1794 stvx_u $out5,$x50,$out 1795 stvx_u $out6,$x60,$out 1796 addi $out,$out,0x70 1797 b Lctr32_enc8x_done 1798 1799.align 5 1800Lctr32_enc8x_six: 1801 vcipherlast $out0,$out0,$in2 1802 vcipherlast $out1,$out1,$in3 1803 vcipherlast $out2,$out2,$in4 1804 vcipherlast $out3,$out3,$in5 1805 vcipherlast $out4,$out4,$in6 1806 vcipherlast $out5,$out5,$in7 1807 1808 le?vperm $out0,$out0,$out0,$inpperm 1809 le?vperm $out1,$out1,$out1,$inpperm 1810 stvx_u $out0,$x00,$out 1811 le?vperm $out2,$out2,$out2,$inpperm 1812 stvx_u $out1,$x10,$out 1813 le?vperm $out3,$out3,$out3,$inpperm 1814 stvx_u $out2,$x20,$out 1815 le?vperm $out4,$out4,$out4,$inpperm 1816 stvx_u $out3,$x30,$out 1817 le?vperm $out5,$out5,$out5,$inpperm 1818 stvx_u $out4,$x40,$out 1819 stvx_u $out5,$x50,$out 1820 addi $out,$out,0x60 1821 b Lctr32_enc8x_done 1822 1823.align 5 1824Lctr32_enc8x_five: 1825 vcipherlast $out0,$out0,$in3 1826 vcipherlast $out1,$out1,$in4 1827 vcipherlast $out2,$out2,$in5 1828 vcipherlast $out3,$out3,$in6 1829 vcipherlast $out4,$out4,$in7 1830 1831 le?vperm $out0,$out0,$out0,$inpperm 1832 le?vperm $out1,$out1,$out1,$inpperm 1833 stvx_u $out0,$x00,$out 1834 le?vperm $out2,$out2,$out2,$inpperm 1835 stvx_u $out1,$x10,$out 1836 le?vperm $out3,$out3,$out3,$inpperm 1837 stvx_u $out2,$x20,$out 1838 le?vperm $out4,$out4,$out4,$inpperm 1839 stvx_u $out3,$x30,$out 1840 stvx_u $out4,$x40,$out 1841 addi $out,$out,0x50 1842 b Lctr32_enc8x_done 1843 1844.align 5 1845Lctr32_enc8x_four: 1846 vcipherlast $out0,$out0,$in4 1847 vcipherlast $out1,$out1,$in5 1848 vcipherlast $out2,$out2,$in6 1849 vcipherlast $out3,$out3,$in7 1850 1851 le?vperm $out0,$out0,$out0,$inpperm 1852 le?vperm $out1,$out1,$out1,$inpperm 1853 stvx_u $out0,$x00,$out 1854 le?vperm $out2,$out2,$out2,$inpperm 1855 stvx_u $out1,$x10,$out 1856 le?vperm $out3,$out3,$out3,$inpperm 1857 stvx_u $out2,$x20,$out 1858 stvx_u $out3,$x30,$out 1859 addi $out,$out,0x40 1860 b Lctr32_enc8x_done 1861 1862.align 5 1863Lctr32_enc8x_three: 1864 vcipherlast $out0,$out0,$in5 1865 vcipherlast $out1,$out1,$in6 1866 vcipherlast $out2,$out2,$in7 1867 1868 le?vperm $out0,$out0,$out0,$inpperm 1869 le?vperm $out1,$out1,$out1,$inpperm 1870 stvx_u $out0,$x00,$out 1871 le?vperm $out2,$out2,$out2,$inpperm 1872 stvx_u $out1,$x10,$out 1873 stvx_u $out2,$x20,$out 1874 addi $out,$out,0x30 1875 b Lctr32_enc8x_done 1876 1877.align 5 1878Lctr32_enc8x_two: 1879 vcipherlast $out0,$out0,$in6 1880 vcipherlast $out1,$out1,$in7 1881 1882 le?vperm $out0,$out0,$out0,$inpperm 1883 le?vperm $out1,$out1,$out1,$inpperm 1884 stvx_u $out0,$x00,$out 1885 stvx_u $out1,$x10,$out 1886 addi $out,$out,0x20 1887 b Lctr32_enc8x_done 1888 1889.align 5 1890Lctr32_enc8x_one: 1891 vcipherlast $out0,$out0,$in7 1892 1893 le?vperm $out0,$out0,$out0,$inpperm 1894 stvx_u $out0,0,$out 1895 addi $out,$out,0x10 1896 1897Lctr32_enc8x_done: 1898 li r10,`$FRAME+15` 1899 li r11,`$FRAME+31` 1900 stvx $inpperm,r10,$sp # wipe copies of round keys 1901 addi r10,r10,32 1902 stvx $inpperm,r11,$sp 1903 addi r11,r11,32 1904 stvx $inpperm,r10,$sp 1905 addi r10,r10,32 1906 stvx $inpperm,r11,$sp 1907 addi r11,r11,32 1908 stvx $inpperm,r10,$sp 1909 addi r10,r10,32 1910 stvx $inpperm,r11,$sp 1911 addi r11,r11,32 1912 stvx $inpperm,r10,$sp 1913 addi r10,r10,32 1914 stvx $inpperm,r11,$sp 1915 addi r11,r11,32 1916 1917 mtspr 256,$vrsave 1918 lvx v20,r10,$sp # ABI says so 1919 addi r10,r10,32 1920 lvx v21,r11,$sp 1921 addi r11,r11,32 1922 lvx v22,r10,$sp 1923 addi r10,r10,32 1924 lvx v23,r11,$sp 1925 addi r11,r11,32 1926 lvx v24,r10,$sp 1927 addi r10,r10,32 1928 lvx v25,r11,$sp 1929 addi r11,r11,32 1930 lvx v26,r10,$sp 1931 addi r10,r10,32 1932 lvx v27,r11,$sp 1933 addi r11,r11,32 1934 lvx v28,r10,$sp 1935 addi r10,r10,32 1936 lvx v29,r11,$sp 1937 addi r11,r11,32 1938 lvx v30,r10,$sp 1939 lvx v31,r11,$sp 1940 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1941 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1942 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1943 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1944 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1945 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1946 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1947 blr 1948 .long 0 1949 .byte 0,12,0x14,0,0x80,6,6,0 1950 .long 0 1951.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 1952___ 1953}} }}} 1954 1955######################################################################### 1956{{{ # XTS procedures # 1957# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # 1958# const AES_KEY *key1, const AES_KEY *key2, # 1959# [const] unsigned char iv[16]); # 1960# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # 1961# input tweak value is assumed to be encrypted already, and last tweak # 1962# value, one suitable for consecutive call on same chunk of data, is # 1963# written back to original buffer. In addition, in "tweak chaining" # 1964# mode only complete input blocks are processed. # 1965 1966my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); 1967my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); 1968my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); 1969my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); 1970my $taillen = $key2; 1971 1972 ($inp,$idx) = ($idx,$inp); # reassign 1973 1974$code.=<<___; 1975.globl .${prefix}_xts_encrypt 1976 mr $inp,r3 # reassign 1977 li r3,-1 1978 ${UCMP}i $len,16 1979 bltlr- 1980 1981 lis r0,0xfff0 1982 mfspr r12,256 # save vrsave 1983 li r11,0 1984 mtspr 256,r0 1985 1986 vspltisb $seven,0x07 # 0x070707..07 1987 le?lvsl $leperm,r11,r11 1988 le?vspltisb $tmp,0x0f 1989 le?vxor $leperm,$leperm,$seven 1990 1991 li $idx,15 1992 lvx $tweak,0,$ivp # load [unaligned] iv 1993 lvsl $inpperm,0,$ivp 1994 lvx $inptail,$idx,$ivp 1995 le?vxor $inpperm,$inpperm,$tmp 1996 vperm $tweak,$tweak,$inptail,$inpperm 1997 1998 neg r11,$inp 1999 lvsr $inpperm,0,r11 # prepare for unaligned load 2000 lvx $inout,0,$inp 2001 addi $inp,$inp,15 # 15 is not typo 2002 le?vxor $inpperm,$inpperm,$tmp 2003 2004 ${UCMP}i $key2,0 # key2==NULL? 2005 beq Lxts_enc_no_key2 2006 2007 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2008 lwz $rounds,240($key2) 2009 srwi $rounds,$rounds,1 2010 subi $rounds,$rounds,1 2011 li $idx,16 2012 2013 lvx $rndkey0,0,$key2 2014 lvx $rndkey1,$idx,$key2 2015 addi $idx,$idx,16 2016 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2017 vxor $tweak,$tweak,$rndkey0 2018 lvx $rndkey0,$idx,$key2 2019 addi $idx,$idx,16 2020 mtctr $rounds 2021 2022Ltweak_xts_enc: 2023 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2024 vcipher $tweak,$tweak,$rndkey1 2025 lvx $rndkey1,$idx,$key2 2026 addi $idx,$idx,16 2027 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2028 vcipher $tweak,$tweak,$rndkey0 2029 lvx $rndkey0,$idx,$key2 2030 addi $idx,$idx,16 2031 bdnz Ltweak_xts_enc 2032 2033 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2034 vcipher $tweak,$tweak,$rndkey1 2035 lvx $rndkey1,$idx,$key2 2036 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2037 vcipherlast $tweak,$tweak,$rndkey0 2038 2039 li $ivp,0 # don't chain the tweak 2040 b Lxts_enc 2041 2042Lxts_enc_no_key2: 2043 li $idx,-16 2044 and $len,$len,$idx # in "tweak chaining" 2045 # mode only complete 2046 # blocks are processed 2047Lxts_enc: 2048 lvx $inptail,0,$inp 2049 addi $inp,$inp,16 2050 2051 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2052 lwz $rounds,240($key1) 2053 srwi $rounds,$rounds,1 2054 subi $rounds,$rounds,1 2055 li $idx,16 2056 2057 vslb $eighty7,$seven,$seven # 0x808080..80 2058 vor $eighty7,$eighty7,$seven # 0x878787..87 2059 vspltisb $tmp,1 # 0x010101..01 2060 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2061 2062 ${UCMP}i $len,96 2063 bge _aesp8_xts_encrypt6x 2064 2065 andi. $taillen,$len,15 2066 subic r0,$len,32 2067 subi $taillen,$taillen,16 2068 subfe r0,r0,r0 2069 and r0,r0,$taillen 2070 add $inp,$inp,r0 2071 2072 lvx $rndkey0,0,$key1 2073 lvx $rndkey1,$idx,$key1 2074 addi $idx,$idx,16 2075 vperm $inout,$inout,$inptail,$inpperm 2076 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2077 vxor $inout,$inout,$tweak 2078 vxor $inout,$inout,$rndkey0 2079 lvx $rndkey0,$idx,$key1 2080 addi $idx,$idx,16 2081 mtctr $rounds 2082 b Loop_xts_enc 2083 2084.align 5 2085Loop_xts_enc: 2086 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2087 vcipher $inout,$inout,$rndkey1 2088 lvx $rndkey1,$idx,$key1 2089 addi $idx,$idx,16 2090 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2091 vcipher $inout,$inout,$rndkey0 2092 lvx $rndkey0,$idx,$key1 2093 addi $idx,$idx,16 2094 bdnz Loop_xts_enc 2095 2096 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2097 vcipher $inout,$inout,$rndkey1 2098 lvx $rndkey1,$idx,$key1 2099 li $idx,16 2100 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2101 vxor $rndkey0,$rndkey0,$tweak 2102 vcipherlast $output,$inout,$rndkey0 2103 2104 le?vperm $tmp,$output,$output,$leperm 2105 be?nop 2106 le?stvx_u $tmp,0,$out 2107 be?stvx_u $output,0,$out 2108 addi $out,$out,16 2109 2110 subic. $len,$len,16 2111 beq Lxts_enc_done 2112 2113 vmr $inout,$inptail 2114 lvx $inptail,0,$inp 2115 addi $inp,$inp,16 2116 lvx $rndkey0,0,$key1 2117 lvx $rndkey1,$idx,$key1 2118 addi $idx,$idx,16 2119 2120 subic r0,$len,32 2121 subfe r0,r0,r0 2122 and r0,r0,$taillen 2123 add $inp,$inp,r0 2124 2125 vsrab $tmp,$tweak,$seven # next tweak value 2126 vaddubm $tweak,$tweak,$tweak 2127 vsldoi $tmp,$tmp,$tmp,15 2128 vand $tmp,$tmp,$eighty7 2129 vxor $tweak,$tweak,$tmp 2130 2131 vperm $inout,$inout,$inptail,$inpperm 2132 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2133 vxor $inout,$inout,$tweak 2134 vxor $output,$output,$rndkey0 # just in case $len<16 2135 vxor $inout,$inout,$rndkey0 2136 lvx $rndkey0,$idx,$key1 2137 addi $idx,$idx,16 2138 2139 mtctr $rounds 2140 ${UCMP}i $len,16 2141 bge Loop_xts_enc 2142 2143 vxor $output,$output,$tweak 2144 lvsr $inpperm,0,$len # $inpperm is no longer needed 2145 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2146 vspltisb $tmp,-1 2147 vperm $inptail,$inptail,$tmp,$inpperm 2148 vsel $inout,$inout,$output,$inptail 2149 2150 subi r11,$out,17 2151 subi $out,$out,16 2152 mtctr $len 2153 li $len,16 2154Loop_xts_enc_steal: 2155 lbzu r0,1(r11) 2156 stb r0,16(r11) 2157 bdnz Loop_xts_enc_steal 2158 2159 mtctr $rounds 2160 b Loop_xts_enc # one more time... 2161 2162Lxts_enc_done: 2163 ${UCMP}i $ivp,0 2164 beq Lxts_enc_ret 2165 2166 vsrab $tmp,$tweak,$seven # next tweak value 2167 vaddubm $tweak,$tweak,$tweak 2168 vsldoi $tmp,$tmp,$tmp,15 2169 vand $tmp,$tmp,$eighty7 2170 vxor $tweak,$tweak,$tmp 2171 2172 le?vperm $tweak,$tweak,$tweak,$leperm 2173 stvx_u $tweak,0,$ivp 2174 2175Lxts_enc_ret: 2176 mtspr 256,r12 # restore vrsave 2177 li r3,0 2178 blr 2179 .long 0 2180 .byte 0,12,0x04,0,0x80,6,6,0 2181 .long 0 2182.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt 2183 2184.globl .${prefix}_xts_decrypt 2185 mr $inp,r3 # reassign 2186 li r3,-1 2187 ${UCMP}i $len,16 2188 bltlr- 2189 2190 lis r0,0xfff8 2191 mfspr r12,256 # save vrsave 2192 li r11,0 2193 mtspr 256,r0 2194 2195 andi. r0,$len,15 2196 neg r0,r0 2197 andi. r0,r0,16 2198 sub $len,$len,r0 2199 2200 vspltisb $seven,0x07 # 0x070707..07 2201 le?lvsl $leperm,r11,r11 2202 le?vspltisb $tmp,0x0f 2203 le?vxor $leperm,$leperm,$seven 2204 2205 li $idx,15 2206 lvx $tweak,0,$ivp # load [unaligned] iv 2207 lvsl $inpperm,0,$ivp 2208 lvx $inptail,$idx,$ivp 2209 le?vxor $inpperm,$inpperm,$tmp 2210 vperm $tweak,$tweak,$inptail,$inpperm 2211 2212 neg r11,$inp 2213 lvsr $inpperm,0,r11 # prepare for unaligned load 2214 lvx $inout,0,$inp 2215 addi $inp,$inp,15 # 15 is not typo 2216 le?vxor $inpperm,$inpperm,$tmp 2217 2218 ${UCMP}i $key2,0 # key2==NULL? 2219 beq Lxts_dec_no_key2 2220 2221 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2222 lwz $rounds,240($key2) 2223 srwi $rounds,$rounds,1 2224 subi $rounds,$rounds,1 2225 li $idx,16 2226 2227 lvx $rndkey0,0,$key2 2228 lvx $rndkey1,$idx,$key2 2229 addi $idx,$idx,16 2230 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2231 vxor $tweak,$tweak,$rndkey0 2232 lvx $rndkey0,$idx,$key2 2233 addi $idx,$idx,16 2234 mtctr $rounds 2235 2236Ltweak_xts_dec: 2237 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2238 vcipher $tweak,$tweak,$rndkey1 2239 lvx $rndkey1,$idx,$key2 2240 addi $idx,$idx,16 2241 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2242 vcipher $tweak,$tweak,$rndkey0 2243 lvx $rndkey0,$idx,$key2 2244 addi $idx,$idx,16 2245 bdnz Ltweak_xts_dec 2246 2247 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2248 vcipher $tweak,$tweak,$rndkey1 2249 lvx $rndkey1,$idx,$key2 2250 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2251 vcipherlast $tweak,$tweak,$rndkey0 2252 2253 li $ivp,0 # don't chain the tweak 2254 b Lxts_dec 2255 2256Lxts_dec_no_key2: 2257 neg $idx,$len 2258 andi. $idx,$idx,15 2259 add $len,$len,$idx # in "tweak chaining" 2260 # mode only complete 2261 # blocks are processed 2262Lxts_dec: 2263 lvx $inptail,0,$inp 2264 addi $inp,$inp,16 2265 2266 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2267 lwz $rounds,240($key1) 2268 srwi $rounds,$rounds,1 2269 subi $rounds,$rounds,1 2270 li $idx,16 2271 2272 vslb $eighty7,$seven,$seven # 0x808080..80 2273 vor $eighty7,$eighty7,$seven # 0x878787..87 2274 vspltisb $tmp,1 # 0x010101..01 2275 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2276 2277 ${UCMP}i $len,96 2278 bge _aesp8_xts_decrypt6x 2279 2280 lvx $rndkey0,0,$key1 2281 lvx $rndkey1,$idx,$key1 2282 addi $idx,$idx,16 2283 vperm $inout,$inout,$inptail,$inpperm 2284 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2285 vxor $inout,$inout,$tweak 2286 vxor $inout,$inout,$rndkey0 2287 lvx $rndkey0,$idx,$key1 2288 addi $idx,$idx,16 2289 mtctr $rounds 2290 2291 ${UCMP}i $len,16 2292 blt Ltail_xts_dec 2293 be?b Loop_xts_dec 2294 2295.align 5 2296Loop_xts_dec: 2297 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2298 vncipher $inout,$inout,$rndkey1 2299 lvx $rndkey1,$idx,$key1 2300 addi $idx,$idx,16 2301 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2302 vncipher $inout,$inout,$rndkey0 2303 lvx $rndkey0,$idx,$key1 2304 addi $idx,$idx,16 2305 bdnz Loop_xts_dec 2306 2307 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2308 vncipher $inout,$inout,$rndkey1 2309 lvx $rndkey1,$idx,$key1 2310 li $idx,16 2311 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2312 vxor $rndkey0,$rndkey0,$tweak 2313 vncipherlast $output,$inout,$rndkey0 2314 2315 le?vperm $tmp,$output,$output,$leperm 2316 be?nop 2317 le?stvx_u $tmp,0,$out 2318 be?stvx_u $output,0,$out 2319 addi $out,$out,16 2320 2321 subic. $len,$len,16 2322 beq Lxts_dec_done 2323 2324 vmr $inout,$inptail 2325 lvx $inptail,0,$inp 2326 addi $inp,$inp,16 2327 lvx $rndkey0,0,$key1 2328 lvx $rndkey1,$idx,$key1 2329 addi $idx,$idx,16 2330 2331 vsrab $tmp,$tweak,$seven # next tweak value 2332 vaddubm $tweak,$tweak,$tweak 2333 vsldoi $tmp,$tmp,$tmp,15 2334 vand $tmp,$tmp,$eighty7 2335 vxor $tweak,$tweak,$tmp 2336 2337 vperm $inout,$inout,$inptail,$inpperm 2338 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2339 vxor $inout,$inout,$tweak 2340 vxor $inout,$inout,$rndkey0 2341 lvx $rndkey0,$idx,$key1 2342 addi $idx,$idx,16 2343 2344 mtctr $rounds 2345 ${UCMP}i $len,16 2346 bge Loop_xts_dec 2347 2348Ltail_xts_dec: 2349 vsrab $tmp,$tweak,$seven # next tweak value 2350 vaddubm $tweak1,$tweak,$tweak 2351 vsldoi $tmp,$tmp,$tmp,15 2352 vand $tmp,$tmp,$eighty7 2353 vxor $tweak1,$tweak1,$tmp 2354 2355 subi $inp,$inp,16 2356 add $inp,$inp,$len 2357 2358 vxor $inout,$inout,$tweak # :-( 2359 vxor $inout,$inout,$tweak1 # :-) 2360 2361Loop_xts_dec_short: 2362 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2363 vncipher $inout,$inout,$rndkey1 2364 lvx $rndkey1,$idx,$key1 2365 addi $idx,$idx,16 2366 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2367 vncipher $inout,$inout,$rndkey0 2368 lvx $rndkey0,$idx,$key1 2369 addi $idx,$idx,16 2370 bdnz Loop_xts_dec_short 2371 2372 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2373 vncipher $inout,$inout,$rndkey1 2374 lvx $rndkey1,$idx,$key1 2375 li $idx,16 2376 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2377 vxor $rndkey0,$rndkey0,$tweak1 2378 vncipherlast $output,$inout,$rndkey0 2379 2380 le?vperm $tmp,$output,$output,$leperm 2381 be?nop 2382 le?stvx_u $tmp,0,$out 2383 be?stvx_u $output,0,$out 2384 2385 vmr $inout,$inptail 2386 lvx $inptail,0,$inp 2387 #addi $inp,$inp,16 2388 lvx $rndkey0,0,$key1 2389 lvx $rndkey1,$idx,$key1 2390 addi $idx,$idx,16 2391 vperm $inout,$inout,$inptail,$inpperm 2392 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2393 2394 lvsr $inpperm,0,$len # $inpperm is no longer needed 2395 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2396 vspltisb $tmp,-1 2397 vperm $inptail,$inptail,$tmp,$inpperm 2398 vsel $inout,$inout,$output,$inptail 2399 2400 vxor $rndkey0,$rndkey0,$tweak 2401 vxor $inout,$inout,$rndkey0 2402 lvx $rndkey0,$idx,$key1 2403 addi $idx,$idx,16 2404 2405 subi r11,$out,1 2406 mtctr $len 2407 li $len,16 2408Loop_xts_dec_steal: 2409 lbzu r0,1(r11) 2410 stb r0,16(r11) 2411 bdnz Loop_xts_dec_steal 2412 2413 mtctr $rounds 2414 b Loop_xts_dec # one more time... 2415 2416Lxts_dec_done: 2417 ${UCMP}i $ivp,0 2418 beq Lxts_dec_ret 2419 2420 vsrab $tmp,$tweak,$seven # next tweak value 2421 vaddubm $tweak,$tweak,$tweak 2422 vsldoi $tmp,$tmp,$tmp,15 2423 vand $tmp,$tmp,$eighty7 2424 vxor $tweak,$tweak,$tmp 2425 2426 le?vperm $tweak,$tweak,$tweak,$leperm 2427 stvx_u $tweak,0,$ivp 2428 2429Lxts_dec_ret: 2430 mtspr 256,r12 # restore vrsave 2431 li r3,0 2432 blr 2433 .long 0 2434 .byte 0,12,0x04,0,0x80,6,6,0 2435 .long 0 2436.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt 2437___ 2438######################################################################### 2439{{ # Optimized XTS procedures # 2440my $key_=$key2; 2441my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); 2442 $x00=0 if ($flavour =~ /osx/); 2443my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); 2444my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); 2445my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); 2446my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 2447 # v26-v31 last 6 round keys 2448my ($keyperm)=($out0); # aliases with "caller", redundant assignment 2449my $taillen=$x70; 2450 2451$code.=<<___; 2452.align 5 2453_aesp8_xts_encrypt6x: 2454 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 2455 mflr r11 2456 li r7,`$FRAME+8*16+15` 2457 li r3,`$FRAME+8*16+31` 2458 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 2459 stvx v20,r7,$sp # ABI says so 2460 addi r7,r7,32 2461 stvx v21,r3,$sp 2462 addi r3,r3,32 2463 stvx v22,r7,$sp 2464 addi r7,r7,32 2465 stvx v23,r3,$sp 2466 addi r3,r3,32 2467 stvx v24,r7,$sp 2468 addi r7,r7,32 2469 stvx v25,r3,$sp 2470 addi r3,r3,32 2471 stvx v26,r7,$sp 2472 addi r7,r7,32 2473 stvx v27,r3,$sp 2474 addi r3,r3,32 2475 stvx v28,r7,$sp 2476 addi r7,r7,32 2477 stvx v29,r3,$sp 2478 addi r3,r3,32 2479 stvx v30,r7,$sp 2480 stvx v31,r3,$sp 2481 li r0,-1 2482 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 2483 li $x10,0x10 2484 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2485 li $x20,0x20 2486 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2487 li $x30,0x30 2488 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2489 li $x40,0x40 2490 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2491 li $x50,0x50 2492 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2493 li $x60,0x60 2494 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2495 li $x70,0x70 2496 mtspr 256,r0 2497 2498 subi $rounds,$rounds,3 # -4 in total 2499 2500 lvx $rndkey0,$x00,$key1 # load key schedule 2501 lvx v30,$x10,$key1 2502 addi $key1,$key1,0x20 2503 lvx v31,$x00,$key1 2504 ?vperm $rndkey0,$rndkey0,v30,$keyperm 2505 addi $key_,$sp,$FRAME+15 2506 mtctr $rounds 2507 2508Load_xts_enc_key: 2509 ?vperm v24,v30,v31,$keyperm 2510 lvx v30,$x10,$key1 2511 addi $key1,$key1,0x20 2512 stvx v24,$x00,$key_ # off-load round[1] 2513 ?vperm v25,v31,v30,$keyperm 2514 lvx v31,$x00,$key1 2515 stvx v25,$x10,$key_ # off-load round[2] 2516 addi $key_,$key_,0x20 2517 bdnz Load_xts_enc_key 2518 2519 lvx v26,$x10,$key1 2520 ?vperm v24,v30,v31,$keyperm 2521 lvx v27,$x20,$key1 2522 stvx v24,$x00,$key_ # off-load round[3] 2523 ?vperm v25,v31,v26,$keyperm 2524 lvx v28,$x30,$key1 2525 stvx v25,$x10,$key_ # off-load round[4] 2526 addi $key_,$sp,$FRAME+15 # rewind $key_ 2527 ?vperm v26,v26,v27,$keyperm 2528 lvx v29,$x40,$key1 2529 ?vperm v27,v27,v28,$keyperm 2530 lvx v30,$x50,$key1 2531 ?vperm v28,v28,v29,$keyperm 2532 lvx v31,$x60,$key1 2533 ?vperm v29,v29,v30,$keyperm 2534 lvx $twk5,$x70,$key1 # borrow $twk5 2535 ?vperm v30,v30,v31,$keyperm 2536 lvx v24,$x00,$key_ # pre-load round[1] 2537 ?vperm v31,v31,$twk5,$keyperm 2538 lvx v25,$x10,$key_ # pre-load round[2] 2539 2540 vperm $in0,$inout,$inptail,$inpperm 2541 subi $inp,$inp,31 # undo "caller" 2542 vxor $twk0,$tweak,$rndkey0 2543 vsrab $tmp,$tweak,$seven # next tweak value 2544 vaddubm $tweak,$tweak,$tweak 2545 vsldoi $tmp,$tmp,$tmp,15 2546 vand $tmp,$tmp,$eighty7 2547 vxor $out0,$in0,$twk0 2548 vxor $tweak,$tweak,$tmp 2549 2550 lvx_u $in1,$x10,$inp 2551 vxor $twk1,$tweak,$rndkey0 2552 vsrab $tmp,$tweak,$seven # next tweak value 2553 vaddubm $tweak,$tweak,$tweak 2554 vsldoi $tmp,$tmp,$tmp,15 2555 le?vperm $in1,$in1,$in1,$leperm 2556 vand $tmp,$tmp,$eighty7 2557 vxor $out1,$in1,$twk1 2558 vxor $tweak,$tweak,$tmp 2559 2560 lvx_u $in2,$x20,$inp 2561 andi. $taillen,$len,15 2562 vxor $twk2,$tweak,$rndkey0 2563 vsrab $tmp,$tweak,$seven # next tweak value 2564 vaddubm $tweak,$tweak,$tweak 2565 vsldoi $tmp,$tmp,$tmp,15 2566 le?vperm $in2,$in2,$in2,$leperm 2567 vand $tmp,$tmp,$eighty7 2568 vxor $out2,$in2,$twk2 2569 vxor $tweak,$tweak,$tmp 2570 2571 lvx_u $in3,$x30,$inp 2572 sub $len,$len,$taillen 2573 vxor $twk3,$tweak,$rndkey0 2574 vsrab $tmp,$tweak,$seven # next tweak value 2575 vaddubm $tweak,$tweak,$tweak 2576 vsldoi $tmp,$tmp,$tmp,15 2577 le?vperm $in3,$in3,$in3,$leperm 2578 vand $tmp,$tmp,$eighty7 2579 vxor $out3,$in3,$twk3 2580 vxor $tweak,$tweak,$tmp 2581 2582 lvx_u $in4,$x40,$inp 2583 subi $len,$len,0x60 2584 vxor $twk4,$tweak,$rndkey0 2585 vsrab $tmp,$tweak,$seven # next tweak value 2586 vaddubm $tweak,$tweak,$tweak 2587 vsldoi $tmp,$tmp,$tmp,15 2588 le?vperm $in4,$in4,$in4,$leperm 2589 vand $tmp,$tmp,$eighty7 2590 vxor $out4,$in4,$twk4 2591 vxor $tweak,$tweak,$tmp 2592 2593 lvx_u $in5,$x50,$inp 2594 addi $inp,$inp,0x60 2595 vxor $twk5,$tweak,$rndkey0 2596 vsrab $tmp,$tweak,$seven # next tweak value 2597 vaddubm $tweak,$tweak,$tweak 2598 vsldoi $tmp,$tmp,$tmp,15 2599 le?vperm $in5,$in5,$in5,$leperm 2600 vand $tmp,$tmp,$eighty7 2601 vxor $out5,$in5,$twk5 2602 vxor $tweak,$tweak,$tmp 2603 2604 vxor v31,v31,$rndkey0 2605 mtctr $rounds 2606 b Loop_xts_enc6x 2607 2608.align 5 2609Loop_xts_enc6x: 2610 vcipher $out0,$out0,v24 2611 vcipher $out1,$out1,v24 2612 vcipher $out2,$out2,v24 2613 vcipher $out3,$out3,v24 2614 vcipher $out4,$out4,v24 2615 vcipher $out5,$out5,v24 2616 lvx v24,$x20,$key_ # round[3] 2617 addi $key_,$key_,0x20 2618 2619 vcipher $out0,$out0,v25 2620 vcipher $out1,$out1,v25 2621 vcipher $out2,$out2,v25 2622 vcipher $out3,$out3,v25 2623 vcipher $out4,$out4,v25 2624 vcipher $out5,$out5,v25 2625 lvx v25,$x10,$key_ # round[4] 2626 bdnz Loop_xts_enc6x 2627 2628 subic $len,$len,96 # $len-=96 2629 vxor $in0,$twk0,v31 # xor with last round key 2630 vcipher $out0,$out0,v24 2631 vcipher $out1,$out1,v24 2632 vsrab $tmp,$tweak,$seven # next tweak value 2633 vxor $twk0,$tweak,$rndkey0 2634 vaddubm $tweak,$tweak,$tweak 2635 vcipher $out2,$out2,v24 2636 vcipher $out3,$out3,v24 2637 vsldoi $tmp,$tmp,$tmp,15 2638 vcipher $out4,$out4,v24 2639 vcipher $out5,$out5,v24 2640 2641 subfe. r0,r0,r0 # borrow?-1:0 2642 vand $tmp,$tmp,$eighty7 2643 vcipher $out0,$out0,v25 2644 vcipher $out1,$out1,v25 2645 vxor $tweak,$tweak,$tmp 2646 vcipher $out2,$out2,v25 2647 vcipher $out3,$out3,v25 2648 vxor $in1,$twk1,v31 2649 vsrab $tmp,$tweak,$seven # next tweak value 2650 vxor $twk1,$tweak,$rndkey0 2651 vcipher $out4,$out4,v25 2652 vcipher $out5,$out5,v25 2653 2654 and r0,r0,$len 2655 vaddubm $tweak,$tweak,$tweak 2656 vsldoi $tmp,$tmp,$tmp,15 2657 vcipher $out0,$out0,v26 2658 vcipher $out1,$out1,v26 2659 vand $tmp,$tmp,$eighty7 2660 vcipher $out2,$out2,v26 2661 vcipher $out3,$out3,v26 2662 vxor $tweak,$tweak,$tmp 2663 vcipher $out4,$out4,v26 2664 vcipher $out5,$out5,v26 2665 2666 add $inp,$inp,r0 # $inp is adjusted in such 2667 # way that at exit from the 2668 # loop inX-in5 are loaded 2669 # with last "words" 2670 vxor $in2,$twk2,v31 2671 vsrab $tmp,$tweak,$seven # next tweak value 2672 vxor $twk2,$tweak,$rndkey0 2673 vaddubm $tweak,$tweak,$tweak 2674 vcipher $out0,$out0,v27 2675 vcipher $out1,$out1,v27 2676 vsldoi $tmp,$tmp,$tmp,15 2677 vcipher $out2,$out2,v27 2678 vcipher $out3,$out3,v27 2679 vand $tmp,$tmp,$eighty7 2680 vcipher $out4,$out4,v27 2681 vcipher $out5,$out5,v27 2682 2683 addi $key_,$sp,$FRAME+15 # rewind $key_ 2684 vxor $tweak,$tweak,$tmp 2685 vcipher $out0,$out0,v28 2686 vcipher $out1,$out1,v28 2687 vxor $in3,$twk3,v31 2688 vsrab $tmp,$tweak,$seven # next tweak value 2689 vxor $twk3,$tweak,$rndkey0 2690 vcipher $out2,$out2,v28 2691 vcipher $out3,$out3,v28 2692 vaddubm $tweak,$tweak,$tweak 2693 vsldoi $tmp,$tmp,$tmp,15 2694 vcipher $out4,$out4,v28 2695 vcipher $out5,$out5,v28 2696 lvx v24,$x00,$key_ # re-pre-load round[1] 2697 vand $tmp,$tmp,$eighty7 2698 2699 vcipher $out0,$out0,v29 2700 vcipher $out1,$out1,v29 2701 vxor $tweak,$tweak,$tmp 2702 vcipher $out2,$out2,v29 2703 vcipher $out3,$out3,v29 2704 vxor $in4,$twk4,v31 2705 vsrab $tmp,$tweak,$seven # next tweak value 2706 vxor $twk4,$tweak,$rndkey0 2707 vcipher $out4,$out4,v29 2708 vcipher $out5,$out5,v29 2709 lvx v25,$x10,$key_ # re-pre-load round[2] 2710 vaddubm $tweak,$tweak,$tweak 2711 vsldoi $tmp,$tmp,$tmp,15 2712 2713 vcipher $out0,$out0,v30 2714 vcipher $out1,$out1,v30 2715 vand $tmp,$tmp,$eighty7 2716 vcipher $out2,$out2,v30 2717 vcipher $out3,$out3,v30 2718 vxor $tweak,$tweak,$tmp 2719 vcipher $out4,$out4,v30 2720 vcipher $out5,$out5,v30 2721 vxor $in5,$twk5,v31 2722 vsrab $tmp,$tweak,$seven # next tweak value 2723 vxor $twk5,$tweak,$rndkey0 2724 2725 vcipherlast $out0,$out0,$in0 2726 lvx_u $in0,$x00,$inp # load next input block 2727 vaddubm $tweak,$tweak,$tweak 2728 vsldoi $tmp,$tmp,$tmp,15 2729 vcipherlast $out1,$out1,$in1 2730 lvx_u $in1,$x10,$inp 2731 vcipherlast $out2,$out2,$in2 2732 le?vperm $in0,$in0,$in0,$leperm 2733 lvx_u $in2,$x20,$inp 2734 vand $tmp,$tmp,$eighty7 2735 vcipherlast $out3,$out3,$in3 2736 le?vperm $in1,$in1,$in1,$leperm 2737 lvx_u $in3,$x30,$inp 2738 vcipherlast $out4,$out4,$in4 2739 le?vperm $in2,$in2,$in2,$leperm 2740 lvx_u $in4,$x40,$inp 2741 vxor $tweak,$tweak,$tmp 2742 vcipherlast $tmp,$out5,$in5 # last block might be needed 2743 # in stealing mode 2744 le?vperm $in3,$in3,$in3,$leperm 2745 lvx_u $in5,$x50,$inp 2746 addi $inp,$inp,0x60 2747 le?vperm $in4,$in4,$in4,$leperm 2748 le?vperm $in5,$in5,$in5,$leperm 2749 2750 le?vperm $out0,$out0,$out0,$leperm 2751 le?vperm $out1,$out1,$out1,$leperm 2752 stvx_u $out0,$x00,$out # store output 2753 vxor $out0,$in0,$twk0 2754 le?vperm $out2,$out2,$out2,$leperm 2755 stvx_u $out1,$x10,$out 2756 vxor $out1,$in1,$twk1 2757 le?vperm $out3,$out3,$out3,$leperm 2758 stvx_u $out2,$x20,$out 2759 vxor $out2,$in2,$twk2 2760 le?vperm $out4,$out4,$out4,$leperm 2761 stvx_u $out3,$x30,$out 2762 vxor $out3,$in3,$twk3 2763 le?vperm $out5,$tmp,$tmp,$leperm 2764 stvx_u $out4,$x40,$out 2765 vxor $out4,$in4,$twk4 2766 le?stvx_u $out5,$x50,$out 2767 be?stvx_u $tmp, $x50,$out 2768 vxor $out5,$in5,$twk5 2769 addi $out,$out,0x60 2770 2771 mtctr $rounds 2772 beq Loop_xts_enc6x # did $len-=96 borrow? 2773 2774 addic. $len,$len,0x60 2775 beq Lxts_enc6x_zero 2776 cmpwi $len,0x20 2777 blt Lxts_enc6x_one 2778 nop 2779 beq Lxts_enc6x_two 2780 cmpwi $len,0x40 2781 blt Lxts_enc6x_three 2782 nop 2783 beq Lxts_enc6x_four 2784 2785Lxts_enc6x_five: 2786 vxor $out0,$in1,$twk0 2787 vxor $out1,$in2,$twk1 2788 vxor $out2,$in3,$twk2 2789 vxor $out3,$in4,$twk3 2790 vxor $out4,$in5,$twk4 2791 2792 bl _aesp8_xts_enc5x 2793 2794 le?vperm $out0,$out0,$out0,$leperm 2795 vmr $twk0,$twk5 # unused tweak 2796 le?vperm $out1,$out1,$out1,$leperm 2797 stvx_u $out0,$x00,$out # store output 2798 le?vperm $out2,$out2,$out2,$leperm 2799 stvx_u $out1,$x10,$out 2800 le?vperm $out3,$out3,$out3,$leperm 2801 stvx_u $out2,$x20,$out 2802 vxor $tmp,$out4,$twk5 # last block prep for stealing 2803 le?vperm $out4,$out4,$out4,$leperm 2804 stvx_u $out3,$x30,$out 2805 stvx_u $out4,$x40,$out 2806 addi $out,$out,0x50 2807 bne Lxts_enc6x_steal 2808 b Lxts_enc6x_done 2809 2810.align 4 2811Lxts_enc6x_four: 2812 vxor $out0,$in2,$twk0 2813 vxor $out1,$in3,$twk1 2814 vxor $out2,$in4,$twk2 2815 vxor $out3,$in5,$twk3 2816 vxor $out4,$out4,$out4 2817 2818 bl _aesp8_xts_enc5x 2819 2820 le?vperm $out0,$out0,$out0,$leperm 2821 vmr $twk0,$twk4 # unused tweak 2822 le?vperm $out1,$out1,$out1,$leperm 2823 stvx_u $out0,$x00,$out # store output 2824 le?vperm $out2,$out2,$out2,$leperm 2825 stvx_u $out1,$x10,$out 2826 vxor $tmp,$out3,$twk4 # last block prep for stealing 2827 le?vperm $out3,$out3,$out3,$leperm 2828 stvx_u $out2,$x20,$out 2829 stvx_u $out3,$x30,$out 2830 addi $out,$out,0x40 2831 bne Lxts_enc6x_steal 2832 b Lxts_enc6x_done 2833 2834.align 4 2835Lxts_enc6x_three: 2836 vxor $out0,$in3,$twk0 2837 vxor $out1,$in4,$twk1 2838 vxor $out2,$in5,$twk2 2839 vxor $out3,$out3,$out3 2840 vxor $out4,$out4,$out4 2841 2842 bl _aesp8_xts_enc5x 2843 2844 le?vperm $out0,$out0,$out0,$leperm 2845 vmr $twk0,$twk3 # unused tweak 2846 le?vperm $out1,$out1,$out1,$leperm 2847 stvx_u $out0,$x00,$out # store output 2848 vxor $tmp,$out2,$twk3 # last block prep for stealing 2849 le?vperm $out2,$out2,$out2,$leperm 2850 stvx_u $out1,$x10,$out 2851 stvx_u $out2,$x20,$out 2852 addi $out,$out,0x30 2853 bne Lxts_enc6x_steal 2854 b Lxts_enc6x_done 2855 2856.align 4 2857Lxts_enc6x_two: 2858 vxor $out0,$in4,$twk0 2859 vxor $out1,$in5,$twk1 2860 vxor $out2,$out2,$out2 2861 vxor $out3,$out3,$out3 2862 vxor $out4,$out4,$out4 2863 2864 bl _aesp8_xts_enc5x 2865 2866 le?vperm $out0,$out0,$out0,$leperm 2867 vmr $twk0,$twk2 # unused tweak 2868 vxor $tmp,$out1,$twk2 # last block prep for stealing 2869 le?vperm $out1,$out1,$out1,$leperm 2870 stvx_u $out0,$x00,$out # store output 2871 stvx_u $out1,$x10,$out 2872 addi $out,$out,0x20 2873 bne Lxts_enc6x_steal 2874 b Lxts_enc6x_done 2875 2876.align 4 2877Lxts_enc6x_one: 2878 vxor $out0,$in5,$twk0 2879 nop 2880Loop_xts_enc1x: 2881 vcipher $out0,$out0,v24 2882 lvx v24,$x20,$key_ # round[3] 2883 addi $key_,$key_,0x20 2884 2885 vcipher $out0,$out0,v25 2886 lvx v25,$x10,$key_ # round[4] 2887 bdnz Loop_xts_enc1x 2888 2889 add $inp,$inp,$taillen 2890 cmpwi $taillen,0 2891 vcipher $out0,$out0,v24 2892 2893 subi $inp,$inp,16 2894 vcipher $out0,$out0,v25 2895 2896 lvsr $inpperm,0,$taillen 2897 vcipher $out0,$out0,v26 2898 2899 lvx_u $in0,0,$inp 2900 vcipher $out0,$out0,v27 2901 2902 addi $key_,$sp,$FRAME+15 # rewind $key_ 2903 vcipher $out0,$out0,v28 2904 lvx v24,$x00,$key_ # re-pre-load round[1] 2905 2906 vcipher $out0,$out0,v29 2907 lvx v25,$x10,$key_ # re-pre-load round[2] 2908 vxor $twk0,$twk0,v31 2909 2910 le?vperm $in0,$in0,$in0,$leperm 2911 vcipher $out0,$out0,v30 2912 2913 vperm $in0,$in0,$in0,$inpperm 2914 vcipherlast $out0,$out0,$twk0 2915 2916 vmr $twk0,$twk1 # unused tweak 2917 vxor $tmp,$out0,$twk1 # last block prep for stealing 2918 le?vperm $out0,$out0,$out0,$leperm 2919 stvx_u $out0,$x00,$out # store output 2920 addi $out,$out,0x10 2921 bne Lxts_enc6x_steal 2922 b Lxts_enc6x_done 2923 2924.align 4 2925Lxts_enc6x_zero: 2926 cmpwi $taillen,0 2927 beq Lxts_enc6x_done 2928 2929 add $inp,$inp,$taillen 2930 subi $inp,$inp,16 2931 lvx_u $in0,0,$inp 2932 lvsr $inpperm,0,$taillen # $in5 is no more 2933 le?vperm $in0,$in0,$in0,$leperm 2934 vperm $in0,$in0,$in0,$inpperm 2935 vxor $tmp,$tmp,$twk0 2936Lxts_enc6x_steal: 2937 vxor $in0,$in0,$twk0 2938 vxor $out0,$out0,$out0 2939 vspltisb $out1,-1 2940 vperm $out0,$out0,$out1,$inpperm 2941 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? 2942 2943 subi r30,$out,17 2944 subi $out,$out,16 2945 mtctr $taillen 2946Loop_xts_enc6x_steal: 2947 lbzu r0,1(r30) 2948 stb r0,16(r30) 2949 bdnz Loop_xts_enc6x_steal 2950 2951 li $taillen,0 2952 mtctr $rounds 2953 b Loop_xts_enc1x # one more time... 2954 2955.align 4 2956Lxts_enc6x_done: 2957 ${UCMP}i $ivp,0 2958 beq Lxts_enc6x_ret 2959 2960 vxor $tweak,$twk0,$rndkey0 2961 le?vperm $tweak,$tweak,$tweak,$leperm 2962 stvx_u $tweak,0,$ivp 2963 2964Lxts_enc6x_ret: 2965 mtlr r11 2966 li r10,`$FRAME+15` 2967 li r11,`$FRAME+31` 2968 stvx $seven,r10,$sp # wipe copies of round keys 2969 addi r10,r10,32 2970 stvx $seven,r11,$sp 2971 addi r11,r11,32 2972 stvx $seven,r10,$sp 2973 addi r10,r10,32 2974 stvx $seven,r11,$sp 2975 addi r11,r11,32 2976 stvx $seven,r10,$sp 2977 addi r10,r10,32 2978 stvx $seven,r11,$sp 2979 addi r11,r11,32 2980 stvx $seven,r10,$sp 2981 addi r10,r10,32 2982 stvx $seven,r11,$sp 2983 addi r11,r11,32 2984 2985 mtspr 256,$vrsave 2986 lvx v20,r10,$sp # ABI says so 2987 addi r10,r10,32 2988 lvx v21,r11,$sp 2989 addi r11,r11,32 2990 lvx v22,r10,$sp 2991 addi r10,r10,32 2992 lvx v23,r11,$sp 2993 addi r11,r11,32 2994 lvx v24,r10,$sp 2995 addi r10,r10,32 2996 lvx v25,r11,$sp 2997 addi r11,r11,32 2998 lvx v26,r10,$sp 2999 addi r10,r10,32 3000 lvx v27,r11,$sp 3001 addi r11,r11,32 3002 lvx v28,r10,$sp 3003 addi r10,r10,32 3004 lvx v29,r11,$sp 3005 addi r11,r11,32 3006 lvx v30,r10,$sp 3007 lvx v31,r11,$sp 3008 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3009 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3010 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3011 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3012 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3013 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3014 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3015 blr 3016 .long 0 3017 .byte 0,12,0x04,1,0x80,6,6,0 3018 .long 0 3019 3020.align 5 3021_aesp8_xts_enc5x: 3022 vcipher $out0,$out0,v24 3023 vcipher $out1,$out1,v24 3024 vcipher $out2,$out2,v24 3025 vcipher $out3,$out3,v24 3026 vcipher $out4,$out4,v24 3027 lvx v24,$x20,$key_ # round[3] 3028 addi $key_,$key_,0x20 3029 3030 vcipher $out0,$out0,v25 3031 vcipher $out1,$out1,v25 3032 vcipher $out2,$out2,v25 3033 vcipher $out3,$out3,v25 3034 vcipher $out4,$out4,v25 3035 lvx v25,$x10,$key_ # round[4] 3036 bdnz _aesp8_xts_enc5x 3037 3038 add $inp,$inp,$taillen 3039 cmpwi $taillen,0 3040 vcipher $out0,$out0,v24 3041 vcipher $out1,$out1,v24 3042 vcipher $out2,$out2,v24 3043 vcipher $out3,$out3,v24 3044 vcipher $out4,$out4,v24 3045 3046 subi $inp,$inp,16 3047 vcipher $out0,$out0,v25 3048 vcipher $out1,$out1,v25 3049 vcipher $out2,$out2,v25 3050 vcipher $out3,$out3,v25 3051 vcipher $out4,$out4,v25 3052 vxor $twk0,$twk0,v31 3053 3054 vcipher $out0,$out0,v26 3055 lvsr $inpperm,r0,$taillen # $in5 is no more 3056 vcipher $out1,$out1,v26 3057 vcipher $out2,$out2,v26 3058 vcipher $out3,$out3,v26 3059 vcipher $out4,$out4,v26 3060 vxor $in1,$twk1,v31 3061 3062 vcipher $out0,$out0,v27 3063 lvx_u $in0,0,$inp 3064 vcipher $out1,$out1,v27 3065 vcipher $out2,$out2,v27 3066 vcipher $out3,$out3,v27 3067 vcipher $out4,$out4,v27 3068 vxor $in2,$twk2,v31 3069 3070 addi $key_,$sp,$FRAME+15 # rewind $key_ 3071 vcipher $out0,$out0,v28 3072 vcipher $out1,$out1,v28 3073 vcipher $out2,$out2,v28 3074 vcipher $out3,$out3,v28 3075 vcipher $out4,$out4,v28 3076 lvx v24,$x00,$key_ # re-pre-load round[1] 3077 vxor $in3,$twk3,v31 3078 3079 vcipher $out0,$out0,v29 3080 le?vperm $in0,$in0,$in0,$leperm 3081 vcipher $out1,$out1,v29 3082 vcipher $out2,$out2,v29 3083 vcipher $out3,$out3,v29 3084 vcipher $out4,$out4,v29 3085 lvx v25,$x10,$key_ # re-pre-load round[2] 3086 vxor $in4,$twk4,v31 3087 3088 vcipher $out0,$out0,v30 3089 vperm $in0,$in0,$in0,$inpperm 3090 vcipher $out1,$out1,v30 3091 vcipher $out2,$out2,v30 3092 vcipher $out3,$out3,v30 3093 vcipher $out4,$out4,v30 3094 3095 vcipherlast $out0,$out0,$twk0 3096 vcipherlast $out1,$out1,$in1 3097 vcipherlast $out2,$out2,$in2 3098 vcipherlast $out3,$out3,$in3 3099 vcipherlast $out4,$out4,$in4 3100 blr 3101 .long 0 3102 .byte 0,12,0x14,0,0,0,0,0 3103 3104.align 5 3105_aesp8_xts_decrypt6x: 3106 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 3107 mflr r11 3108 li r7,`$FRAME+8*16+15` 3109 li r3,`$FRAME+8*16+31` 3110 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 3111 stvx v20,r7,$sp # ABI says so 3112 addi r7,r7,32 3113 stvx v21,r3,$sp 3114 addi r3,r3,32 3115 stvx v22,r7,$sp 3116 addi r7,r7,32 3117 stvx v23,r3,$sp 3118 addi r3,r3,32 3119 stvx v24,r7,$sp 3120 addi r7,r7,32 3121 stvx v25,r3,$sp 3122 addi r3,r3,32 3123 stvx v26,r7,$sp 3124 addi r7,r7,32 3125 stvx v27,r3,$sp 3126 addi r3,r3,32 3127 stvx v28,r7,$sp 3128 addi r7,r7,32 3129 stvx v29,r3,$sp 3130 addi r3,r3,32 3131 stvx v30,r7,$sp 3132 stvx v31,r3,$sp 3133 li r0,-1 3134 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 3135 li $x10,0x10 3136 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3137 li $x20,0x20 3138 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3139 li $x30,0x30 3140 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3141 li $x40,0x40 3142 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3143 li $x50,0x50 3144 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3145 li $x60,0x60 3146 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3147 li $x70,0x70 3148 mtspr 256,r0 3149 3150 subi $rounds,$rounds,3 # -4 in total 3151 3152 lvx $rndkey0,$x00,$key1 # load key schedule 3153 lvx v30,$x10,$key1 3154 addi $key1,$key1,0x20 3155 lvx v31,$x00,$key1 3156 ?vperm $rndkey0,$rndkey0,v30,$keyperm 3157 addi $key_,$sp,$FRAME+15 3158 mtctr $rounds 3159 3160Load_xts_dec_key: 3161 ?vperm v24,v30,v31,$keyperm 3162 lvx v30,$x10,$key1 3163 addi $key1,$key1,0x20 3164 stvx v24,$x00,$key_ # off-load round[1] 3165 ?vperm v25,v31,v30,$keyperm 3166 lvx v31,$x00,$key1 3167 stvx v25,$x10,$key_ # off-load round[2] 3168 addi $key_,$key_,0x20 3169 bdnz Load_xts_dec_key 3170 3171 lvx v26,$x10,$key1 3172 ?vperm v24,v30,v31,$keyperm 3173 lvx v27,$x20,$key1 3174 stvx v24,$x00,$key_ # off-load round[3] 3175 ?vperm v25,v31,v26,$keyperm 3176 lvx v28,$x30,$key1 3177 stvx v25,$x10,$key_ # off-load round[4] 3178 addi $key_,$sp,$FRAME+15 # rewind $key_ 3179 ?vperm v26,v26,v27,$keyperm 3180 lvx v29,$x40,$key1 3181 ?vperm v27,v27,v28,$keyperm 3182 lvx v30,$x50,$key1 3183 ?vperm v28,v28,v29,$keyperm 3184 lvx v31,$x60,$key1 3185 ?vperm v29,v29,v30,$keyperm 3186 lvx $twk5,$x70,$key1 # borrow $twk5 3187 ?vperm v30,v30,v31,$keyperm 3188 lvx v24,$x00,$key_ # pre-load round[1] 3189 ?vperm v31,v31,$twk5,$keyperm 3190 lvx v25,$x10,$key_ # pre-load round[2] 3191 3192 vperm $in0,$inout,$inptail,$inpperm 3193 subi $inp,$inp,31 # undo "caller" 3194 vxor $twk0,$tweak,$rndkey0 3195 vsrab $tmp,$tweak,$seven # next tweak value 3196 vaddubm $tweak,$tweak,$tweak 3197 vsldoi $tmp,$tmp,$tmp,15 3198 vand $tmp,$tmp,$eighty7 3199 vxor $out0,$in0,$twk0 3200 vxor $tweak,$tweak,$tmp 3201 3202 lvx_u $in1,$x10,$inp 3203 vxor $twk1,$tweak,$rndkey0 3204 vsrab $tmp,$tweak,$seven # next tweak value 3205 vaddubm $tweak,$tweak,$tweak 3206 vsldoi $tmp,$tmp,$tmp,15 3207 le?vperm $in1,$in1,$in1,$leperm 3208 vand $tmp,$tmp,$eighty7 3209 vxor $out1,$in1,$twk1 3210 vxor $tweak,$tweak,$tmp 3211 3212 lvx_u $in2,$x20,$inp 3213 andi. $taillen,$len,15 3214 vxor $twk2,$tweak,$rndkey0 3215 vsrab $tmp,$tweak,$seven # next tweak value 3216 vaddubm $tweak,$tweak,$tweak 3217 vsldoi $tmp,$tmp,$tmp,15 3218 le?vperm $in2,$in2,$in2,$leperm 3219 vand $tmp,$tmp,$eighty7 3220 vxor $out2,$in2,$twk2 3221 vxor $tweak,$tweak,$tmp 3222 3223 lvx_u $in3,$x30,$inp 3224 sub $len,$len,$taillen 3225 vxor $twk3,$tweak,$rndkey0 3226 vsrab $tmp,$tweak,$seven # next tweak value 3227 vaddubm $tweak,$tweak,$tweak 3228 vsldoi $tmp,$tmp,$tmp,15 3229 le?vperm $in3,$in3,$in3,$leperm 3230 vand $tmp,$tmp,$eighty7 3231 vxor $out3,$in3,$twk3 3232 vxor $tweak,$tweak,$tmp 3233 3234 lvx_u $in4,$x40,$inp 3235 subi $len,$len,0x60 3236 vxor $twk4,$tweak,$rndkey0 3237 vsrab $tmp,$tweak,$seven # next tweak value 3238 vaddubm $tweak,$tweak,$tweak 3239 vsldoi $tmp,$tmp,$tmp,15 3240 le?vperm $in4,$in4,$in4,$leperm 3241 vand $tmp,$tmp,$eighty7 3242 vxor $out4,$in4,$twk4 3243 vxor $tweak,$tweak,$tmp 3244 3245 lvx_u $in5,$x50,$inp 3246 addi $inp,$inp,0x60 3247 vxor $twk5,$tweak,$rndkey0 3248 vsrab $tmp,$tweak,$seven # next tweak value 3249 vaddubm $tweak,$tweak,$tweak 3250 vsldoi $tmp,$tmp,$tmp,15 3251 le?vperm $in5,$in5,$in5,$leperm 3252 vand $tmp,$tmp,$eighty7 3253 vxor $out5,$in5,$twk5 3254 vxor $tweak,$tweak,$tmp 3255 3256 vxor v31,v31,$rndkey0 3257 mtctr $rounds 3258 b Loop_xts_dec6x 3259 3260.align 5 3261Loop_xts_dec6x: 3262 vncipher $out0,$out0,v24 3263 vncipher $out1,$out1,v24 3264 vncipher $out2,$out2,v24 3265 vncipher $out3,$out3,v24 3266 vncipher $out4,$out4,v24 3267 vncipher $out5,$out5,v24 3268 lvx v24,$x20,$key_ # round[3] 3269 addi $key_,$key_,0x20 3270 3271 vncipher $out0,$out0,v25 3272 vncipher $out1,$out1,v25 3273 vncipher $out2,$out2,v25 3274 vncipher $out3,$out3,v25 3275 vncipher $out4,$out4,v25 3276 vncipher $out5,$out5,v25 3277 lvx v25,$x10,$key_ # round[4] 3278 bdnz Loop_xts_dec6x 3279 3280 subic $len,$len,96 # $len-=96 3281 vxor $in0,$twk0,v31 # xor with last round key 3282 vncipher $out0,$out0,v24 3283 vncipher $out1,$out1,v24 3284 vsrab $tmp,$tweak,$seven # next tweak value 3285 vxor $twk0,$tweak,$rndkey0 3286 vaddubm $tweak,$tweak,$tweak 3287 vncipher $out2,$out2,v24 3288 vncipher $out3,$out3,v24 3289 vsldoi $tmp,$tmp,$tmp,15 3290 vncipher $out4,$out4,v24 3291 vncipher $out5,$out5,v24 3292 3293 subfe. r0,r0,r0 # borrow?-1:0 3294 vand $tmp,$tmp,$eighty7 3295 vncipher $out0,$out0,v25 3296 vncipher $out1,$out1,v25 3297 vxor $tweak,$tweak,$tmp 3298 vncipher $out2,$out2,v25 3299 vncipher $out3,$out3,v25 3300 vxor $in1,$twk1,v31 3301 vsrab $tmp,$tweak,$seven # next tweak value 3302 vxor $twk1,$tweak,$rndkey0 3303 vncipher $out4,$out4,v25 3304 vncipher $out5,$out5,v25 3305 3306 and r0,r0,$len 3307 vaddubm $tweak,$tweak,$tweak 3308 vsldoi $tmp,$tmp,$tmp,15 3309 vncipher $out0,$out0,v26 3310 vncipher $out1,$out1,v26 3311 vand $tmp,$tmp,$eighty7 3312 vncipher $out2,$out2,v26 3313 vncipher $out3,$out3,v26 3314 vxor $tweak,$tweak,$tmp 3315 vncipher $out4,$out4,v26 3316 vncipher $out5,$out5,v26 3317 3318 add $inp,$inp,r0 # $inp is adjusted in such 3319 # way that at exit from the 3320 # loop inX-in5 are loaded 3321 # with last "words" 3322 vxor $in2,$twk2,v31 3323 vsrab $tmp,$tweak,$seven # next tweak value 3324 vxor $twk2,$tweak,$rndkey0 3325 vaddubm $tweak,$tweak,$tweak 3326 vncipher $out0,$out0,v27 3327 vncipher $out1,$out1,v27 3328 vsldoi $tmp,$tmp,$tmp,15 3329 vncipher $out2,$out2,v27 3330 vncipher $out3,$out3,v27 3331 vand $tmp,$tmp,$eighty7 3332 vncipher $out4,$out4,v27 3333 vncipher $out5,$out5,v27 3334 3335 addi $key_,$sp,$FRAME+15 # rewind $key_ 3336 vxor $tweak,$tweak,$tmp 3337 vncipher $out0,$out0,v28 3338 vncipher $out1,$out1,v28 3339 vxor $in3,$twk3,v31 3340 vsrab $tmp,$tweak,$seven # next tweak value 3341 vxor $twk3,$tweak,$rndkey0 3342 vncipher $out2,$out2,v28 3343 vncipher $out3,$out3,v28 3344 vaddubm $tweak,$tweak,$tweak 3345 vsldoi $tmp,$tmp,$tmp,15 3346 vncipher $out4,$out4,v28 3347 vncipher $out5,$out5,v28 3348 lvx v24,$x00,$key_ # re-pre-load round[1] 3349 vand $tmp,$tmp,$eighty7 3350 3351 vncipher $out0,$out0,v29 3352 vncipher $out1,$out1,v29 3353 vxor $tweak,$tweak,$tmp 3354 vncipher $out2,$out2,v29 3355 vncipher $out3,$out3,v29 3356 vxor $in4,$twk4,v31 3357 vsrab $tmp,$tweak,$seven # next tweak value 3358 vxor $twk4,$tweak,$rndkey0 3359 vncipher $out4,$out4,v29 3360 vncipher $out5,$out5,v29 3361 lvx v25,$x10,$key_ # re-pre-load round[2] 3362 vaddubm $tweak,$tweak,$tweak 3363 vsldoi $tmp,$tmp,$tmp,15 3364 3365 vncipher $out0,$out0,v30 3366 vncipher $out1,$out1,v30 3367 vand $tmp,$tmp,$eighty7 3368 vncipher $out2,$out2,v30 3369 vncipher $out3,$out3,v30 3370 vxor $tweak,$tweak,$tmp 3371 vncipher $out4,$out4,v30 3372 vncipher $out5,$out5,v30 3373 vxor $in5,$twk5,v31 3374 vsrab $tmp,$tweak,$seven # next tweak value 3375 vxor $twk5,$tweak,$rndkey0 3376 3377 vncipherlast $out0,$out0,$in0 3378 lvx_u $in0,$x00,$inp # load next input block 3379 vaddubm $tweak,$tweak,$tweak 3380 vsldoi $tmp,$tmp,$tmp,15 3381 vncipherlast $out1,$out1,$in1 3382 lvx_u $in1,$x10,$inp 3383 vncipherlast $out2,$out2,$in2 3384 le?vperm $in0,$in0,$in0,$leperm 3385 lvx_u $in2,$x20,$inp 3386 vand $tmp,$tmp,$eighty7 3387 vncipherlast $out3,$out3,$in3 3388 le?vperm $in1,$in1,$in1,$leperm 3389 lvx_u $in3,$x30,$inp 3390 vncipherlast $out4,$out4,$in4 3391 le?vperm $in2,$in2,$in2,$leperm 3392 lvx_u $in4,$x40,$inp 3393 vxor $tweak,$tweak,$tmp 3394 vncipherlast $out5,$out5,$in5 3395 le?vperm $in3,$in3,$in3,$leperm 3396 lvx_u $in5,$x50,$inp 3397 addi $inp,$inp,0x60 3398 le?vperm $in4,$in4,$in4,$leperm 3399 le?vperm $in5,$in5,$in5,$leperm 3400 3401 le?vperm $out0,$out0,$out0,$leperm 3402 le?vperm $out1,$out1,$out1,$leperm 3403 stvx_u $out0,$x00,$out # store output 3404 vxor $out0,$in0,$twk0 3405 le?vperm $out2,$out2,$out2,$leperm 3406 stvx_u $out1,$x10,$out 3407 vxor $out1,$in1,$twk1 3408 le?vperm $out3,$out3,$out3,$leperm 3409 stvx_u $out2,$x20,$out 3410 vxor $out2,$in2,$twk2 3411 le?vperm $out4,$out4,$out4,$leperm 3412 stvx_u $out3,$x30,$out 3413 vxor $out3,$in3,$twk3 3414 le?vperm $out5,$out5,$out5,$leperm 3415 stvx_u $out4,$x40,$out 3416 vxor $out4,$in4,$twk4 3417 stvx_u $out5,$x50,$out 3418 vxor $out5,$in5,$twk5 3419 addi $out,$out,0x60 3420 3421 mtctr $rounds 3422 beq Loop_xts_dec6x # did $len-=96 borrow? 3423 3424 addic. $len,$len,0x60 3425 beq Lxts_dec6x_zero 3426 cmpwi $len,0x20 3427 blt Lxts_dec6x_one 3428 nop 3429 beq Lxts_dec6x_two 3430 cmpwi $len,0x40 3431 blt Lxts_dec6x_three 3432 nop 3433 beq Lxts_dec6x_four 3434 3435Lxts_dec6x_five: 3436 vxor $out0,$in1,$twk0 3437 vxor $out1,$in2,$twk1 3438 vxor $out2,$in3,$twk2 3439 vxor $out3,$in4,$twk3 3440 vxor $out4,$in5,$twk4 3441 3442 bl _aesp8_xts_dec5x 3443 3444 le?vperm $out0,$out0,$out0,$leperm 3445 vmr $twk0,$twk5 # unused tweak 3446 vxor $twk1,$tweak,$rndkey0 3447 le?vperm $out1,$out1,$out1,$leperm 3448 stvx_u $out0,$x00,$out # store output 3449 vxor $out0,$in0,$twk1 3450 le?vperm $out2,$out2,$out2,$leperm 3451 stvx_u $out1,$x10,$out 3452 le?vperm $out3,$out3,$out3,$leperm 3453 stvx_u $out2,$x20,$out 3454 le?vperm $out4,$out4,$out4,$leperm 3455 stvx_u $out3,$x30,$out 3456 stvx_u $out4,$x40,$out 3457 addi $out,$out,0x50 3458 bne Lxts_dec6x_steal 3459 b Lxts_dec6x_done 3460 3461.align 4 3462Lxts_dec6x_four: 3463 vxor $out0,$in2,$twk0 3464 vxor $out1,$in3,$twk1 3465 vxor $out2,$in4,$twk2 3466 vxor $out3,$in5,$twk3 3467 vxor $out4,$out4,$out4 3468 3469 bl _aesp8_xts_dec5x 3470 3471 le?vperm $out0,$out0,$out0,$leperm 3472 vmr $twk0,$twk4 # unused tweak 3473 vmr $twk1,$twk5 3474 le?vperm $out1,$out1,$out1,$leperm 3475 stvx_u $out0,$x00,$out # store output 3476 vxor $out0,$in0,$twk5 3477 le?vperm $out2,$out2,$out2,$leperm 3478 stvx_u $out1,$x10,$out 3479 le?vperm $out3,$out3,$out3,$leperm 3480 stvx_u $out2,$x20,$out 3481 stvx_u $out3,$x30,$out 3482 addi $out,$out,0x40 3483 bne Lxts_dec6x_steal 3484 b Lxts_dec6x_done 3485 3486.align 4 3487Lxts_dec6x_three: 3488 vxor $out0,$in3,$twk0 3489 vxor $out1,$in4,$twk1 3490 vxor $out2,$in5,$twk2 3491 vxor $out3,$out3,$out3 3492 vxor $out4,$out4,$out4 3493 3494 bl _aesp8_xts_dec5x 3495 3496 le?vperm $out0,$out0,$out0,$leperm 3497 vmr $twk0,$twk3 # unused tweak 3498 vmr $twk1,$twk4 3499 le?vperm $out1,$out1,$out1,$leperm 3500 stvx_u $out0,$x00,$out # store output 3501 vxor $out0,$in0,$twk4 3502 le?vperm $out2,$out2,$out2,$leperm 3503 stvx_u $out1,$x10,$out 3504 stvx_u $out2,$x20,$out 3505 addi $out,$out,0x30 3506 bne Lxts_dec6x_steal 3507 b Lxts_dec6x_done 3508 3509.align 4 3510Lxts_dec6x_two: 3511 vxor $out0,$in4,$twk0 3512 vxor $out1,$in5,$twk1 3513 vxor $out2,$out2,$out2 3514 vxor $out3,$out3,$out3 3515 vxor $out4,$out4,$out4 3516 3517 bl _aesp8_xts_dec5x 3518 3519 le?vperm $out0,$out0,$out0,$leperm 3520 vmr $twk0,$twk2 # unused tweak 3521 vmr $twk1,$twk3 3522 le?vperm $out1,$out1,$out1,$leperm 3523 stvx_u $out0,$x00,$out # store output 3524 vxor $out0,$in0,$twk3 3525 stvx_u $out1,$x10,$out 3526 addi $out,$out,0x20 3527 bne Lxts_dec6x_steal 3528 b Lxts_dec6x_done 3529 3530.align 4 3531Lxts_dec6x_one: 3532 vxor $out0,$in5,$twk0 3533 nop 3534Loop_xts_dec1x: 3535 vncipher $out0,$out0,v24 3536 lvx v24,$x20,$key_ # round[3] 3537 addi $key_,$key_,0x20 3538 3539 vncipher $out0,$out0,v25 3540 lvx v25,$x10,$key_ # round[4] 3541 bdnz Loop_xts_dec1x 3542 3543 subi r0,$taillen,1 3544 vncipher $out0,$out0,v24 3545 3546 andi. r0,r0,16 3547 cmpwi $taillen,0 3548 vncipher $out0,$out0,v25 3549 3550 sub $inp,$inp,r0 3551 vncipher $out0,$out0,v26 3552 3553 lvx_u $in0,0,$inp 3554 vncipher $out0,$out0,v27 3555 3556 addi $key_,$sp,$FRAME+15 # rewind $key_ 3557 vncipher $out0,$out0,v28 3558 lvx v24,$x00,$key_ # re-pre-load round[1] 3559 3560 vncipher $out0,$out0,v29 3561 lvx v25,$x10,$key_ # re-pre-load round[2] 3562 vxor $twk0,$twk0,v31 3563 3564 le?vperm $in0,$in0,$in0,$leperm 3565 vncipher $out0,$out0,v30 3566 3567 mtctr $rounds 3568 vncipherlast $out0,$out0,$twk0 3569 3570 vmr $twk0,$twk1 # unused tweak 3571 vmr $twk1,$twk2 3572 le?vperm $out0,$out0,$out0,$leperm 3573 stvx_u $out0,$x00,$out # store output 3574 addi $out,$out,0x10 3575 vxor $out0,$in0,$twk2 3576 bne Lxts_dec6x_steal 3577 b Lxts_dec6x_done 3578 3579.align 4 3580Lxts_dec6x_zero: 3581 cmpwi $taillen,0 3582 beq Lxts_dec6x_done 3583 3584 lvx_u $in0,0,$inp 3585 le?vperm $in0,$in0,$in0,$leperm 3586 vxor $out0,$in0,$twk1 3587Lxts_dec6x_steal: 3588 vncipher $out0,$out0,v24 3589 lvx v24,$x20,$key_ # round[3] 3590 addi $key_,$key_,0x20 3591 3592 vncipher $out0,$out0,v25 3593 lvx v25,$x10,$key_ # round[4] 3594 bdnz Lxts_dec6x_steal 3595 3596 add $inp,$inp,$taillen 3597 vncipher $out0,$out0,v24 3598 3599 cmpwi $taillen,0 3600 vncipher $out0,$out0,v25 3601 3602 lvx_u $in0,0,$inp 3603 vncipher $out0,$out0,v26 3604 3605 lvsr $inpperm,0,$taillen # $in5 is no more 3606 vncipher $out0,$out0,v27 3607 3608 addi $key_,$sp,$FRAME+15 # rewind $key_ 3609 vncipher $out0,$out0,v28 3610 lvx v24,$x00,$key_ # re-pre-load round[1] 3611 3612 vncipher $out0,$out0,v29 3613 lvx v25,$x10,$key_ # re-pre-load round[2] 3614 vxor $twk1,$twk1,v31 3615 3616 le?vperm $in0,$in0,$in0,$leperm 3617 vncipher $out0,$out0,v30 3618 3619 vperm $in0,$in0,$in0,$inpperm 3620 vncipherlast $tmp,$out0,$twk1 3621 3622 le?vperm $out0,$tmp,$tmp,$leperm 3623 le?stvx_u $out0,0,$out 3624 be?stvx_u $tmp,0,$out 3625 3626 vxor $out0,$out0,$out0 3627 vspltisb $out1,-1 3628 vperm $out0,$out0,$out1,$inpperm 3629 vsel $out0,$in0,$tmp,$out0 3630 vxor $out0,$out0,$twk0 3631 3632 subi r30,$out,1 3633 mtctr $taillen 3634Loop_xts_dec6x_steal: 3635 lbzu r0,1(r30) 3636 stb r0,16(r30) 3637 bdnz Loop_xts_dec6x_steal 3638 3639 li $taillen,0 3640 mtctr $rounds 3641 b Loop_xts_dec1x # one more time... 3642 3643.align 4 3644Lxts_dec6x_done: 3645 ${UCMP}i $ivp,0 3646 beq Lxts_dec6x_ret 3647 3648 vxor $tweak,$twk0,$rndkey0 3649 le?vperm $tweak,$tweak,$tweak,$leperm 3650 stvx_u $tweak,0,$ivp 3651 3652Lxts_dec6x_ret: 3653 mtlr r11 3654 li r10,`$FRAME+15` 3655 li r11,`$FRAME+31` 3656 stvx $seven,r10,$sp # wipe copies of round keys 3657 addi r10,r10,32 3658 stvx $seven,r11,$sp 3659 addi r11,r11,32 3660 stvx $seven,r10,$sp 3661 addi r10,r10,32 3662 stvx $seven,r11,$sp 3663 addi r11,r11,32 3664 stvx $seven,r10,$sp 3665 addi r10,r10,32 3666 stvx $seven,r11,$sp 3667 addi r11,r11,32 3668 stvx $seven,r10,$sp 3669 addi r10,r10,32 3670 stvx $seven,r11,$sp 3671 addi r11,r11,32 3672 3673 mtspr 256,$vrsave 3674 lvx v20,r10,$sp # ABI says so 3675 addi r10,r10,32 3676 lvx v21,r11,$sp 3677 addi r11,r11,32 3678 lvx v22,r10,$sp 3679 addi r10,r10,32 3680 lvx v23,r11,$sp 3681 addi r11,r11,32 3682 lvx v24,r10,$sp 3683 addi r10,r10,32 3684 lvx v25,r11,$sp 3685 addi r11,r11,32 3686 lvx v26,r10,$sp 3687 addi r10,r10,32 3688 lvx v27,r11,$sp 3689 addi r11,r11,32 3690 lvx v28,r10,$sp 3691 addi r10,r10,32 3692 lvx v29,r11,$sp 3693 addi r11,r11,32 3694 lvx v30,r10,$sp 3695 lvx v31,r11,$sp 3696 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3697 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3698 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3699 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3700 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3701 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3702 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3703 blr 3704 .long 0 3705 .byte 0,12,0x04,1,0x80,6,6,0 3706 .long 0 3707 3708.align 5 3709_aesp8_xts_dec5x: 3710 vncipher $out0,$out0,v24 3711 vncipher $out1,$out1,v24 3712 vncipher $out2,$out2,v24 3713 vncipher $out3,$out3,v24 3714 vncipher $out4,$out4,v24 3715 lvx v24,$x20,$key_ # round[3] 3716 addi $key_,$key_,0x20 3717 3718 vncipher $out0,$out0,v25 3719 vncipher $out1,$out1,v25 3720 vncipher $out2,$out2,v25 3721 vncipher $out3,$out3,v25 3722 vncipher $out4,$out4,v25 3723 lvx v25,$x10,$key_ # round[4] 3724 bdnz _aesp8_xts_dec5x 3725 3726 subi r0,$taillen,1 3727 vncipher $out0,$out0,v24 3728 vncipher $out1,$out1,v24 3729 vncipher $out2,$out2,v24 3730 vncipher $out3,$out3,v24 3731 vncipher $out4,$out4,v24 3732 3733 andi. r0,r0,16 3734 cmpwi $taillen,0 3735 vncipher $out0,$out0,v25 3736 vncipher $out1,$out1,v25 3737 vncipher $out2,$out2,v25 3738 vncipher $out3,$out3,v25 3739 vncipher $out4,$out4,v25 3740 vxor $twk0,$twk0,v31 3741 3742 sub $inp,$inp,r0 3743 vncipher $out0,$out0,v26 3744 vncipher $out1,$out1,v26 3745 vncipher $out2,$out2,v26 3746 vncipher $out3,$out3,v26 3747 vncipher $out4,$out4,v26 3748 vxor $in1,$twk1,v31 3749 3750 vncipher $out0,$out0,v27 3751 lvx_u $in0,0,$inp 3752 vncipher $out1,$out1,v27 3753 vncipher $out2,$out2,v27 3754 vncipher $out3,$out3,v27 3755 vncipher $out4,$out4,v27 3756 vxor $in2,$twk2,v31 3757 3758 addi $key_,$sp,$FRAME+15 # rewind $key_ 3759 vncipher $out0,$out0,v28 3760 vncipher $out1,$out1,v28 3761 vncipher $out2,$out2,v28 3762 vncipher $out3,$out3,v28 3763 vncipher $out4,$out4,v28 3764 lvx v24,$x00,$key_ # re-pre-load round[1] 3765 vxor $in3,$twk3,v31 3766 3767 vncipher $out0,$out0,v29 3768 le?vperm $in0,$in0,$in0,$leperm 3769 vncipher $out1,$out1,v29 3770 vncipher $out2,$out2,v29 3771 vncipher $out3,$out3,v29 3772 vncipher $out4,$out4,v29 3773 lvx v25,$x10,$key_ # re-pre-load round[2] 3774 vxor $in4,$twk4,v31 3775 3776 vncipher $out0,$out0,v30 3777 vncipher $out1,$out1,v30 3778 vncipher $out2,$out2,v30 3779 vncipher $out3,$out3,v30 3780 vncipher $out4,$out4,v30 3781 3782 vncipherlast $out0,$out0,$twk0 3783 vncipherlast $out1,$out1,$in1 3784 vncipherlast $out2,$out2,$in2 3785 vncipherlast $out3,$out3,$in3 3786 vncipherlast $out4,$out4,$in4 3787 mtctr $rounds 3788 blr 3789 .long 0 3790 .byte 0,12,0x14,0,0,0,0,0 3791___ 3792}} }}} 3793 3794my $consts=1; 3795foreach(split("\n",$code)) { 3796 s/\`([^\`]*)\`/eval($1)/geo; 3797 3798 # constants table endian-specific conversion 3799 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 3800 my $conv=$3; 3801 my @bytes=(); 3802 3803 # convert to endian-agnostic format 3804 if ($1 eq "long") { 3805 foreach (split(/,\s*/,$2)) { 3806 my $l = /^0/?oct:int; 3807 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 3808 } 3809 } else { 3810 @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 3811 } 3812 3813 # little-endian conversion 3814 if ($flavour =~ /le$/o) { 3815 SWITCH: for($conv) { 3816 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 3817 /\?rev/ && do { @bytes=reverse(@bytes); last; }; 3818 } 3819 } 3820 3821 #emit 3822 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 3823 next; 3824 } 3825 $consts=0 if (m/Lconsts:/o); # end of table 3826 3827 # instructions prefixed with '?' are endian-specific and need 3828 # to be adjusted accordingly... 3829 if ($flavour =~ /le$/o) { # little-endian 3830 s/le\?//o or 3831 s/be\?/#be#/o or 3832 s/\?lvsr/lvsl/o or 3833 s/\?lvsl/lvsr/o or 3834 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 3835 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 3836 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 3837 } else { # big-endian 3838 s/le\?/#le#/o or 3839 s/be\?//o or 3840 s/\?([a-z]+)/$1/o; 3841 } 3842 3843 print $_,"\n"; 3844} 3845 3846close STDOUT;