ghashp8-ppc.pl - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
ghashp8-ppc.pl (5768B)
      1#!/usr/bin/env perl
      2# SPDX-License-Identifier: GPL-2.0
      3
      4# This code is taken from the OpenSSL project but the author (Andy Polyakov)
      5# has relicensed it under the GPLv2. Therefore this program is free software;
      6# you can redistribute it and/or modify it under the terms of the GNU General
      7# Public License version 2 as published by the Free Software Foundation.
      8#
      9# The original headers, including the original license headers, are
     10# included below for completeness.
     11
     12# ====================================================================
     13# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
     14# project. The module is, however, dual licensed under OpenSSL and
     15# CRYPTOGAMS licenses depending on where you obtain it. For further
     16# details see https://www.openssl.org/~appro/cryptogams/.
     17# ====================================================================
     18#
     19# GHASH for for PowerISA v2.07.
     20#
     21# July 2014
     22#
     23# Accurate performance measurements are problematic, because it's
     24# always virtualized setup with possibly throttled processor.
     25# Relative comparison is therefore more informative. This initial
     26# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
     27# faster than "4-bit" integer-only compiler-generated 64-bit code.
     28# "Initial version" means that there is room for futher improvement.
     29
     30$flavour=shift;
     31$output =shift;
     32
     33if ($flavour =~ /64/) {
     34	$SIZE_T=8;
     35	$LRSAVE=2*$SIZE_T;
     36	$STU="stdu";
     37	$POP="ld";
     38	$PUSH="std";
     39} elsif ($flavour =~ /32/) {
     40	$SIZE_T=4;
     41	$LRSAVE=$SIZE_T;
     42	$STU="stwu";
     43	$POP="lwz";
     44	$PUSH="stw";
     45} else { die "nonsense $flavour"; }
     46
     47$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
     48( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
     49( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
     50die "can't locate ppc-xlate.pl";
     51
     52open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
     53
     54my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6));	# argument block
     55
     56my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
     57my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
     58my $vrsave="r12";
     59
     60$code=<<___;
     61.machine	"any"
     62
     63.text
     64
     65.globl	.gcm_init_p8
     66	lis		r0,0xfff0
     67	li		r8,0x10
     68	mfspr		$vrsave,256
     69	li		r9,0x20
     70	mtspr		256,r0
     71	li		r10,0x30
     72	lvx_u		$H,0,r4			# load H
     73	le?xor		r7,r7,r7
     74	le?addi		r7,r7,0x8		# need a vperm start with 08
     75	le?lvsr		5,0,r7
     76	le?vspltisb	6,0x0f
     77	le?vxor		5,5,6			# set a b-endian mask
     78	le?vperm	$H,$H,$H,5
     79
     80	vspltisb	$xC2,-16		# 0xf0
     81	vspltisb	$t0,1			# one
     82	vaddubm		$xC2,$xC2,$xC2		# 0xe0
     83	vxor		$zero,$zero,$zero
     84	vor		$xC2,$xC2,$t0		# 0xe1
     85	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
     86	vsldoi		$t1,$zero,$t0,1		# ...1
     87	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
     88	vspltisb	$t2,7
     89	vor		$xC2,$xC2,$t1		# 0xc2....01
     90	vspltb		$t1,$H,0		# most significant byte
     91	vsl		$H,$H,$t0		# H<<=1
     92	vsrab		$t1,$t1,$t2		# broadcast carry bit
     93	vand		$t1,$t1,$xC2
     94	vxor		$H,$H,$t1		# twisted H
     95
     96	vsldoi		$H,$H,$H,8		# twist even more ...
     97	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
     98	vsldoi		$Hl,$zero,$H,8		# ... and split
     99	vsldoi		$Hh,$H,$zero,8
    100
    101	stvx_u		$xC2,0,r3		# save pre-computed table
    102	stvx_u		$Hl,r8,r3
    103	stvx_u		$H, r9,r3
    104	stvx_u		$Hh,r10,r3
    105
    106	mtspr		256,$vrsave
    107	blr
    108	.long		0
    109	.byte		0,12,0x14,0,0,0,2,0
    110	.long		0
    111.size	.gcm_init_p8,.-.gcm_init_p8
    112
    113.globl	.gcm_gmult_p8
    114	lis		r0,0xfff8
    115	li		r8,0x10
    116	mfspr		$vrsave,256
    117	li		r9,0x20
    118	mtspr		256,r0
    119	li		r10,0x30
    120	lvx_u		$IN,0,$Xip		# load Xi
    121
    122	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
    123	 le?lvsl	$lemask,r0,r0
    124	lvx_u		$H, r9,$Htbl
    125	 le?vspltisb	$t0,0x07
    126	lvx_u		$Hh,r10,$Htbl
    127	 le?vxor	$lemask,$lemask,$t0
    128	lvx_u		$xC2,0,$Htbl
    129	 le?vperm	$IN,$IN,$IN,$lemask
    130	vxor		$zero,$zero,$zero
    131
    132	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
    133	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
    134	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
    135
    136	vpmsumd		$t2,$Xl,$xC2		# 1st phase
    137
    138	vsldoi		$t0,$Xm,$zero,8
    139	vsldoi		$t1,$zero,$Xm,8
    140	vxor		$Xl,$Xl,$t0
    141	vxor		$Xh,$Xh,$t1
    142
    143	vsldoi		$Xl,$Xl,$Xl,8
    144	vxor		$Xl,$Xl,$t2
    145
    146	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
    147	vpmsumd		$Xl,$Xl,$xC2
    148	vxor		$t1,$t1,$Xh
    149	vxor		$Xl,$Xl,$t1
    150
    151	le?vperm	$Xl,$Xl,$Xl,$lemask
    152	stvx_u		$Xl,0,$Xip		# write out Xi
    153
    154	mtspr		256,$vrsave
    155	blr
    156	.long		0
    157	.byte		0,12,0x14,0,0,0,2,0
    158	.long		0
    159.size	.gcm_gmult_p8,.-.gcm_gmult_p8
    160
    161.globl	.gcm_ghash_p8
    162	lis		r0,0xfff8
    163	li		r8,0x10
    164	mfspr		$vrsave,256
    165	li		r9,0x20
    166	mtspr		256,r0
    167	li		r10,0x30
    168	lvx_u		$Xl,0,$Xip		# load Xi
    169
    170	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
    171	 le?lvsl	$lemask,r0,r0
    172	lvx_u		$H, r9,$Htbl
    173	 le?vspltisb	$t0,0x07
    174	lvx_u		$Hh,r10,$Htbl
    175	 le?vxor	$lemask,$lemask,$t0
    176	lvx_u		$xC2,0,$Htbl
    177	 le?vperm	$Xl,$Xl,$Xl,$lemask
    178	vxor		$zero,$zero,$zero
    179
    180	lvx_u		$IN,0,$inp
    181	addi		$inp,$inp,16
    182	subi		$len,$len,16
    183	 le?vperm	$IN,$IN,$IN,$lemask
    184	vxor		$IN,$IN,$Xl
    185	b		Loop
    186
    187.align	5
    188Loop:
    189	 subic		$len,$len,16
    190	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
    191	 subfe.		r0,r0,r0		# borrow?-1:0
    192	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
    193	 and		r0,r0,$len
    194	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
    195	 add		$inp,$inp,r0
    196
    197	vpmsumd		$t2,$Xl,$xC2		# 1st phase
    198
    199	vsldoi		$t0,$Xm,$zero,8
    200	vsldoi		$t1,$zero,$Xm,8
    201	vxor		$Xl,$Xl,$t0
    202	vxor		$Xh,$Xh,$t1
    203
    204	vsldoi		$Xl,$Xl,$Xl,8
    205	vxor		$Xl,$Xl,$t2
    206	 lvx_u		$IN,0,$inp
    207	 addi		$inp,$inp,16
    208
    209	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
    210	vpmsumd		$Xl,$Xl,$xC2
    211	 le?vperm	$IN,$IN,$IN,$lemask
    212	vxor		$t1,$t1,$Xh
    213	vxor		$IN,$IN,$t1
    214	vxor		$IN,$IN,$Xl
    215	beq		Loop			# did $len-=16 borrow?
    216
    217	vxor		$Xl,$Xl,$t1
    218	le?vperm	$Xl,$Xl,$Xl,$lemask
    219	stvx_u		$Xl,0,$Xip		# write out Xi
    220
    221	mtspr		256,$vrsave
    222	blr
    223	.long		0
    224	.byte		0,12,0x14,0,0,0,4,0
    225	.long		0
    226.size	.gcm_ghash_p8,.-.gcm_ghash_p8
    227
    228.asciz  "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
    229.align  2
    230___
    231
    232foreach (split("\n",$code)) {
    233	if ($flavour =~ /le$/o) {	# little-endian
    234	    s/le\?//o		or
    235	    s/be\?/#be#/o;
    236	} else {
    237	    s/le\?/#le#/o	or
    238	    s/be\?//o;
    239	}
    240	print $_,"\n";
    241}
    242
    243close STDOUT; # enforce flush