cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

twofish-i586-asm_32.S (8357B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/***************************************************************************
      3*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
      4*                                                                         *
      5***************************************************************************/
      6
      7.file "twofish-i586-asm.S"
      8.text
      9
     10#include <linux/linkage.h>
     11#include <asm/asm-offsets.h>
     12
     13/* return address at 0 */
     14
     15#define in_blk    12  /* input byte array address parameter*/
     16#define out_blk   8  /* output byte array address parameter*/
     17#define ctx       4  /* Twofish context structure */
     18
     19#define a_offset	0
     20#define b_offset	4
     21#define c_offset	8
     22#define d_offset	12
     23
     24/* Structure of the crypto context struct*/
     25
     26#define s0	0	/* S0 Array 256 Words each */
     27#define s1	1024	/* S1 Array */
     28#define s2	2048	/* S2 Array */
     29#define s3	3072	/* S3 Array */
     30#define w	4096	/* 8 whitening keys (word) */
     31#define k	4128	/* key 1-32 ( word ) */
     32
     33/* define a few register aliases to allow macro substitution */
     34
     35#define R0D    %eax
     36#define R0B    %al
     37#define R0H    %ah
     38
     39#define R1D    %ebx
     40#define R1B    %bl
     41#define R1H    %bh
     42
     43#define R2D    %ecx
     44#define R2B    %cl
     45#define R2H    %ch
     46
     47#define R3D    %edx
     48#define R3B    %dl
     49#define R3H    %dh
     50
     51
     52/* performs input whitening */
     53#define input_whitening(src,context,offset)\
     54	xor	w+offset(context),	src;
     55
     56/* performs input whitening */
     57#define output_whitening(src,context,offset)\
     58	xor	w+16+offset(context),	src;
     59
     60/*
     61 * a input register containing a (rotated 16)
     62 * b input register containing b
     63 * c input register containing c
     64 * d input register containing d (already rol $1)
     65 * operations on a and b are interleaved to increase performance
     66 */
     67#define encrypt_round(a,b,c,d,round)\
     68	push	d ## D;\
     69	movzx	b ## B,		%edi;\
     70	mov	s1(%ebp,%edi,4),d ## D;\
     71	movzx	a ## B,		%edi;\
     72	mov	s2(%ebp,%edi,4),%esi;\
     73	movzx	b ## H,		%edi;\
     74	ror	$16,		b ## D;\
     75	xor	s2(%ebp,%edi,4),d ## D;\
     76	movzx	a ## H,		%edi;\
     77	ror	$16,		a ## D;\
     78	xor	s3(%ebp,%edi,4),%esi;\
     79	movzx	b ## B,		%edi;\
     80	xor	s3(%ebp,%edi,4),d ## D;\
     81	movzx	a ## B,		%edi;\
     82	xor	(%ebp,%edi,4),	%esi;\
     83	movzx	b ## H,		%edi;\
     84	ror	$15,		b ## D;\
     85	xor	(%ebp,%edi,4),	d ## D;\
     86	movzx	a ## H,		%edi;\
     87	xor	s1(%ebp,%edi,4),%esi;\
     88	pop	%edi;\
     89	add	d ## D,		%esi;\
     90	add	%esi,		d ## D;\
     91	add	k+round(%ebp),	%esi;\
     92	xor	%esi,		c ## D;\
     93	rol	$15,		c ## D;\
     94	add	k+4+round(%ebp),d ## D;\
     95	xor	%edi,		d ## D;
     96
     97/*
     98 * a input register containing a (rotated 16)
     99 * b input register containing b
    100 * c input register containing c
    101 * d input register containing d (already rol $1)
    102 * operations on a and b are interleaved to increase performance
    103 * last round has different rotations for the output preparation
    104 */
    105#define encrypt_last_round(a,b,c,d,round)\
    106	push	d ## D;\
    107	movzx	b ## B,		%edi;\
    108	mov	s1(%ebp,%edi,4),d ## D;\
    109	movzx	a ## B,		%edi;\
    110	mov	s2(%ebp,%edi,4),%esi;\
    111	movzx	b ## H,		%edi;\
    112	ror	$16,		b ## D;\
    113	xor	s2(%ebp,%edi,4),d ## D;\
    114	movzx	a ## H,		%edi;\
    115	ror	$16,		a ## D;\
    116	xor	s3(%ebp,%edi,4),%esi;\
    117	movzx	b ## B,		%edi;\
    118	xor	s3(%ebp,%edi,4),d ## D;\
    119	movzx	a ## B,		%edi;\
    120	xor	(%ebp,%edi,4),	%esi;\
    121	movzx	b ## H,		%edi;\
    122	ror	$16,		b ## D;\
    123	xor	(%ebp,%edi,4),	d ## D;\
    124	movzx	a ## H,		%edi;\
    125	xor	s1(%ebp,%edi,4),%esi;\
    126	pop	%edi;\
    127	add	d ## D,		%esi;\
    128	add	%esi,		d ## D;\
    129	add	k+round(%ebp),	%esi;\
    130	xor	%esi,		c ## D;\
    131	ror	$1,		c ## D;\
    132	add	k+4+round(%ebp),d ## D;\
    133	xor	%edi,		d ## D;
    134
    135/*
    136 * a input register containing a
    137 * b input register containing b (rotated 16)
    138 * c input register containing c
    139 * d input register containing d (already rol $1)
    140 * operations on a and b are interleaved to increase performance
    141 */
    142#define decrypt_round(a,b,c,d,round)\
    143	push	c ## D;\
    144	movzx	a ## B,		%edi;\
    145	mov	(%ebp,%edi,4),	c ## D;\
    146	movzx	b ## B,		%edi;\
    147	mov	s3(%ebp,%edi,4),%esi;\
    148	movzx	a ## H,		%edi;\
    149	ror	$16,		a ## D;\
    150	xor	s1(%ebp,%edi,4),c ## D;\
    151	movzx	b ## H,		%edi;\
    152	ror	$16,		b ## D;\
    153	xor	(%ebp,%edi,4),	%esi;\
    154	movzx	a ## B,		%edi;\
    155	xor	s2(%ebp,%edi,4),c ## D;\
    156	movzx	b ## B,		%edi;\
    157	xor	s1(%ebp,%edi,4),%esi;\
    158	movzx	a ## H,		%edi;\
    159	ror	$15,		a ## D;\
    160	xor	s3(%ebp,%edi,4),c ## D;\
    161	movzx	b ## H,		%edi;\
    162	xor	s2(%ebp,%edi,4),%esi;\
    163	pop	%edi;\
    164	add	%esi,		c ## D;\
    165	add	c ## D,		%esi;\
    166	add	k+round(%ebp),	c ## D;\
    167	xor	%edi,		c ## D;\
    168	add	k+4+round(%ebp),%esi;\
    169	xor	%esi,		d ## D;\
    170	rol	$15,		d ## D;
    171
    172/*
    173 * a input register containing a
    174 * b input register containing b (rotated 16)
    175 * c input register containing c
    176 * d input register containing d (already rol $1)
    177 * operations on a and b are interleaved to increase performance
    178 * last round has different rotations for the output preparation
    179 */
    180#define decrypt_last_round(a,b,c,d,round)\
    181	push	c ## D;\
    182	movzx	a ## B,		%edi;\
    183	mov	(%ebp,%edi,4),	c ## D;\
    184	movzx	b ## B,		%edi;\
    185	mov	s3(%ebp,%edi,4),%esi;\
    186	movzx	a ## H,		%edi;\
    187	ror	$16,		a ## D;\
    188	xor	s1(%ebp,%edi,4),c ## D;\
    189	movzx	b ## H,		%edi;\
    190	ror	$16,		b ## D;\
    191	xor	(%ebp,%edi,4),	%esi;\
    192	movzx	a ## B,		%edi;\
    193	xor	s2(%ebp,%edi,4),c ## D;\
    194	movzx	b ## B,		%edi;\
    195	xor	s1(%ebp,%edi,4),%esi;\
    196	movzx	a ## H,		%edi;\
    197	ror	$16,		a ## D;\
    198	xor	s3(%ebp,%edi,4),c ## D;\
    199	movzx	b ## H,		%edi;\
    200	xor	s2(%ebp,%edi,4),%esi;\
    201	pop	%edi;\
    202	add	%esi,		c ## D;\
    203	add	c ## D,		%esi;\
    204	add	k+round(%ebp),	c ## D;\
    205	xor	%edi,		c ## D;\
    206	add	k+4+round(%ebp),%esi;\
    207	xor	%esi,		d ## D;\
    208	ror	$1,		d ## D;
    209
    210SYM_FUNC_START(twofish_enc_blk)
    211	push	%ebp			/* save registers according to calling convention*/
    212	push    %ebx
    213	push    %esi
    214	push    %edi
    215
    216	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
    217					 * pointer to the ctx address */
    218	mov     in_blk+16(%esp),%edi	/* input address in edi */
    219
    220	mov	(%edi),		%eax
    221	mov	b_offset(%edi),	%ebx
    222	mov	c_offset(%edi),	%ecx
    223	mov	d_offset(%edi),	%edx
    224	input_whitening(%eax,%ebp,a_offset)
    225	ror	$16,	%eax
    226	input_whitening(%ebx,%ebp,b_offset)
    227	input_whitening(%ecx,%ebp,c_offset)
    228	input_whitening(%edx,%ebp,d_offset)
    229	rol	$1,	%edx
    230
    231	encrypt_round(R0,R1,R2,R3,0);
    232	encrypt_round(R2,R3,R0,R1,8);
    233	encrypt_round(R0,R1,R2,R3,2*8);
    234	encrypt_round(R2,R3,R0,R1,3*8);
    235	encrypt_round(R0,R1,R2,R3,4*8);
    236	encrypt_round(R2,R3,R0,R1,5*8);
    237	encrypt_round(R0,R1,R2,R3,6*8);
    238	encrypt_round(R2,R3,R0,R1,7*8);
    239	encrypt_round(R0,R1,R2,R3,8*8);
    240	encrypt_round(R2,R3,R0,R1,9*8);
    241	encrypt_round(R0,R1,R2,R3,10*8);
    242	encrypt_round(R2,R3,R0,R1,11*8);
    243	encrypt_round(R0,R1,R2,R3,12*8);
    244	encrypt_round(R2,R3,R0,R1,13*8);
    245	encrypt_round(R0,R1,R2,R3,14*8);
    246	encrypt_last_round(R2,R3,R0,R1,15*8);
    247
    248	output_whitening(%eax,%ebp,c_offset)
    249	output_whitening(%ebx,%ebp,d_offset)
    250	output_whitening(%ecx,%ebp,a_offset)
    251	output_whitening(%edx,%ebp,b_offset)
    252	mov	out_blk+16(%esp),%edi;
    253	mov	%eax,		c_offset(%edi)
    254	mov	%ebx,		d_offset(%edi)
    255	mov	%ecx,		(%edi)
    256	mov	%edx,		b_offset(%edi)
    257
    258	pop	%edi
    259	pop	%esi
    260	pop	%ebx
    261	pop	%ebp
    262	mov	$1,	%eax
    263	RET
    264SYM_FUNC_END(twofish_enc_blk)
    265
    266SYM_FUNC_START(twofish_dec_blk)
    267	push	%ebp			/* save registers according to calling convention*/
    268	push    %ebx
    269	push    %esi
    270	push    %edi
    271
    272
    273	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
    274					 * pointer to the ctx address */
    275	mov     in_blk+16(%esp),%edi	/* input address in edi */
    276
    277	mov	(%edi),		%eax
    278	mov	b_offset(%edi),	%ebx
    279	mov	c_offset(%edi),	%ecx
    280	mov	d_offset(%edi),	%edx
    281	output_whitening(%eax,%ebp,a_offset)
    282	output_whitening(%ebx,%ebp,b_offset)
    283	ror	$16,	%ebx
    284	output_whitening(%ecx,%ebp,c_offset)
    285	output_whitening(%edx,%ebp,d_offset)
    286	rol	$1,	%ecx
    287
    288	decrypt_round(R0,R1,R2,R3,15*8);
    289	decrypt_round(R2,R3,R0,R1,14*8);
    290	decrypt_round(R0,R1,R2,R3,13*8);
    291	decrypt_round(R2,R3,R0,R1,12*8);
    292	decrypt_round(R0,R1,R2,R3,11*8);
    293	decrypt_round(R2,R3,R0,R1,10*8);
    294	decrypt_round(R0,R1,R2,R3,9*8);
    295	decrypt_round(R2,R3,R0,R1,8*8);
    296	decrypt_round(R0,R1,R2,R3,7*8);
    297	decrypt_round(R2,R3,R0,R1,6*8);
    298	decrypt_round(R0,R1,R2,R3,5*8);
    299	decrypt_round(R2,R3,R0,R1,4*8);
    300	decrypt_round(R0,R1,R2,R3,3*8);
    301	decrypt_round(R2,R3,R0,R1,2*8);
    302	decrypt_round(R0,R1,R2,R3,1*8);
    303	decrypt_last_round(R2,R3,R0,R1,0);
    304
    305	input_whitening(%eax,%ebp,c_offset)
    306	input_whitening(%ebx,%ebp,d_offset)
    307	input_whitening(%ecx,%ebp,a_offset)
    308	input_whitening(%edx,%ebp,b_offset)
    309	mov	out_blk+16(%esp),%edi;
    310	mov	%eax,		c_offset(%edi)
    311	mov	%ebx,		d_offset(%edi)
    312	mov	%ecx,		(%edi)
    313	mov	%edx,		b_offset(%edi)
    314
    315	pop	%edi
    316	pop	%esi
    317	pop	%ebx
    318	pop	%ebp
    319	mov	$1,	%eax
    320	RET
    321SYM_FUNC_END(twofish_dec_blk)