cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

twofish-x86_64-asm_64-3way.S (6589B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * Twofish Cipher 3-way parallel algorithm (x86_64)
      4 *
      5 * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
      6 */
      7
      8#include <linux/linkage.h>
      9
     10.file "twofish-x86_64-asm-3way.S"
     11.text
     12
     13/* structure of crypto context */
     14#define s0	0
     15#define s1	1024
     16#define s2	2048
     17#define s3	3072
     18#define w	4096
     19#define k	4128
     20
     21/**********************************************************************
     22  3-way twofish
     23 **********************************************************************/
     24#define CTX %rdi
     25#define RIO %rdx
     26
     27#define RAB0 %rax
     28#define RAB1 %rbx
     29#define RAB2 %rcx
     30
     31#define RAB0d %eax
     32#define RAB1d %ebx
     33#define RAB2d %ecx
     34
     35#define RAB0bh %ah
     36#define RAB1bh %bh
     37#define RAB2bh %ch
     38
     39#define RAB0bl %al
     40#define RAB1bl %bl
     41#define RAB2bl %cl
     42
     43#define CD0 0x0(%rsp)
     44#define CD1 0x8(%rsp)
     45#define CD2 0x10(%rsp)
     46
     47# used only before/after all rounds
     48#define RCD0 %r8
     49#define RCD1 %r9
     50#define RCD2 %r10
     51
     52# used only during rounds
     53#define RX0 %r8
     54#define RX1 %r9
     55#define RX2 %r10
     56
     57#define RX0d %r8d
     58#define RX1d %r9d
     59#define RX2d %r10d
     60
     61#define RY0 %r11
     62#define RY1 %r12
     63#define RY2 %r13
     64
     65#define RY0d %r11d
     66#define RY1d %r12d
     67#define RY2d %r13d
     68
     69#define RT0 %rdx
     70#define RT1 %rsi
     71
     72#define RT0d %edx
     73#define RT1d %esi
     74
     75#define RT1bl %sil
     76
     77#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
     78	movzbl ab ## bl,		tmp2 ## d; \
     79	movzbl ab ## bh,		tmp1 ## d; \
     80	rorq $(rot),			ab; \
     81	op1##l T0(CTX, tmp2, 4),	dst ## d; \
     82	op2##l T1(CTX, tmp1, 4),	dst ## d;
     83
     84#define swap_ab_with_cd(ab, cd, tmp)	\
     85	movq cd, tmp;			\
     86	movq ab, cd;			\
     87	movq tmp, ab;
     88
     89/*
     90 * Combined G1 & G2 function. Reordered with help of rotates to have moves
     91 * at beginning.
     92 */
     93#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \
     94	/* G1,1 && G2,1 */ \
     95	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \
     96	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \
     97	\
     98	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \
     99	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \
    100	\
    101	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \
    102	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \
    103	\
    104	/* G1,2 && G2,2 */ \
    105	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
    106	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
    107	swap_ab_with_cd(ab ## 0, cd ## 0, RT0); \
    108	\
    109	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
    110	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
    111	swap_ab_with_cd(ab ## 1, cd ## 1, RT0); \
    112	\
    113	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
    114	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
    115	swap_ab_with_cd(ab ## 2, cd ## 2, RT0);
    116
    117#define enc_round_end(ab, x, y, n) \
    118	addl y ## d,			x ## d; \
    119	addl x ## d,			y ## d; \
    120	addl k+4*(2*(n))(CTX),		x ## d; \
    121	xorl ab ## d,			x ## d; \
    122	addl k+4*(2*(n)+1)(CTX),	y ## d; \
    123	shrq $32,			ab; \
    124	roll $1,			ab ## d; \
    125	xorl y ## d,			ab ## d; \
    126	shlq $32,			ab; \
    127	rorl $1,			x ## d; \
    128	orq x,				ab;
    129
    130#define dec_round_end(ba, x, y, n) \
    131	addl y ## d,			x ## d; \
    132	addl x ## d,			y ## d; \
    133	addl k+4*(2*(n))(CTX),		x ## d; \
    134	addl k+4*(2*(n)+1)(CTX),	y ## d; \
    135	xorl ba ## d,			y ## d; \
    136	shrq $32,			ba; \
    137	roll $1,			ba ## d; \
    138	xorl x ## d,			ba ## d; \
    139	shlq $32,			ba; \
    140	rorl $1,			y ## d; \
    141	orq y,				ba;
    142
    143#define encrypt_round3(ab, cd, n) \
    144	g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \
    145	\
    146	enc_round_end(ab ## 0, RX0, RY0, n); \
    147	enc_round_end(ab ## 1, RX1, RY1, n); \
    148	enc_round_end(ab ## 2, RX2, RY2, n);
    149
    150#define decrypt_round3(ba, dc, n) \
    151	g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \
    152	\
    153	dec_round_end(ba ## 0, RX0, RY0, n); \
    154	dec_round_end(ba ## 1, RX1, RY1, n); \
    155	dec_round_end(ba ## 2, RX2, RY2, n);
    156
    157#define encrypt_cycle3(ab, cd, n) \
    158	encrypt_round3(ab, cd, n*2); \
    159	encrypt_round3(ab, cd, (n*2)+1);
    160
    161#define decrypt_cycle3(ba, dc, n) \
    162	decrypt_round3(ba, dc, (n*2)+1); \
    163	decrypt_round3(ba, dc, (n*2));
    164
    165#define push_cd()	\
    166	pushq RCD2;	\
    167	pushq RCD1;	\
    168	pushq RCD0;
    169
    170#define pop_cd()	\
    171	popq RCD0;	\
    172	popq RCD1;	\
    173	popq RCD2;
    174
    175#define inpack3(in, n, xy, m) \
    176	movq 4*(n)(in),			xy ## 0; \
    177	xorq w+4*m(CTX),		xy ## 0; \
    178	\
    179	movq 4*(4+(n))(in),		xy ## 1; \
    180	xorq w+4*m(CTX),		xy ## 1; \
    181	\
    182	movq 4*(8+(n))(in),		xy ## 2; \
    183	xorq w+4*m(CTX),		xy ## 2;
    184
    185#define outunpack3(op, out, n, xy, m) \
    186	xorq w+4*m(CTX),		xy ## 0; \
    187	op ## q xy ## 0,		4*(n)(out); \
    188	\
    189	xorq w+4*m(CTX),		xy ## 1; \
    190	op ## q xy ## 1,		4*(4+(n))(out); \
    191	\
    192	xorq w+4*m(CTX),		xy ## 2; \
    193	op ## q xy ## 2,		4*(8+(n))(out);
    194
    195#define inpack_enc3() \
    196	inpack3(RIO, 0, RAB, 0); \
    197	inpack3(RIO, 2, RCD, 2);
    198
    199#define outunpack_enc3(op) \
    200	outunpack3(op, RIO, 2, RAB, 6); \
    201	outunpack3(op, RIO, 0, RCD, 4);
    202
    203#define inpack_dec3() \
    204	inpack3(RIO, 0, RAB, 4); \
    205	rorq $32,			RAB0; \
    206	rorq $32,			RAB1; \
    207	rorq $32,			RAB2; \
    208	inpack3(RIO, 2, RCD, 6); \
    209	rorq $32,			RCD0; \
    210	rorq $32,			RCD1; \
    211	rorq $32,			RCD2;
    212
    213#define outunpack_dec3() \
    214	rorq $32,			RCD0; \
    215	rorq $32,			RCD1; \
    216	rorq $32,			RCD2; \
    217	outunpack3(mov, RIO, 0, RCD, 0); \
    218	rorq $32,			RAB0; \
    219	rorq $32,			RAB1; \
    220	rorq $32,			RAB2; \
    221	outunpack3(mov, RIO, 2, RAB, 2);
    222
    223SYM_FUNC_START(__twofish_enc_blk_3way)
    224	/* input:
    225	 *	%rdi: ctx, CTX
    226	 *	%rsi: dst
    227	 *	%rdx: src, RIO
    228	 *	%rcx: bool, if true: xor output
    229	 */
    230	pushq %r13;
    231	pushq %r12;
    232	pushq %rbx;
    233
    234	pushq %rcx; /* bool xor */
    235	pushq %rsi; /* dst */
    236
    237	inpack_enc3();
    238
    239	push_cd();
    240	encrypt_cycle3(RAB, CD, 0);
    241	encrypt_cycle3(RAB, CD, 1);
    242	encrypt_cycle3(RAB, CD, 2);
    243	encrypt_cycle3(RAB, CD, 3);
    244	encrypt_cycle3(RAB, CD, 4);
    245	encrypt_cycle3(RAB, CD, 5);
    246	encrypt_cycle3(RAB, CD, 6);
    247	encrypt_cycle3(RAB, CD, 7);
    248	pop_cd();
    249
    250	popq RIO; /* dst */
    251	popq RT1; /* bool xor */
    252
    253	testb RT1bl, RT1bl;
    254	jnz .L__enc_xor3;
    255
    256	outunpack_enc3(mov);
    257
    258	popq %rbx;
    259	popq %r12;
    260	popq %r13;
    261	RET;
    262
    263.L__enc_xor3:
    264	outunpack_enc3(xor);
    265
    266	popq %rbx;
    267	popq %r12;
    268	popq %r13;
    269	RET;
    270SYM_FUNC_END(__twofish_enc_blk_3way)
    271
    272SYM_FUNC_START(twofish_dec_blk_3way)
    273	/* input:
    274	 *	%rdi: ctx, CTX
    275	 *	%rsi: dst
    276	 *	%rdx: src, RIO
    277	 */
    278	pushq %r13;
    279	pushq %r12;
    280	pushq %rbx;
    281
    282	pushq %rsi; /* dst */
    283
    284	inpack_dec3();
    285
    286	push_cd();
    287	decrypt_cycle3(RAB, CD, 7);
    288	decrypt_cycle3(RAB, CD, 6);
    289	decrypt_cycle3(RAB, CD, 5);
    290	decrypt_cycle3(RAB, CD, 4);
    291	decrypt_cycle3(RAB, CD, 3);
    292	decrypt_cycle3(RAB, CD, 2);
    293	decrypt_cycle3(RAB, CD, 1);
    294	decrypt_cycle3(RAB, CD, 0);
    295	pop_cd();
    296
    297	popq RIO; /* dst */
    298
    299	outunpack_dec3();
    300
    301	popq %rbx;
    302	popq %r12;
    303	popq %r13;
    304	RET;
    305SYM_FUNC_END(twofish_dec_blk_3way)