cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

camellia-x86_64-asm_64.S (10447B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * Camellia Cipher Algorithm (x86_64)
      4 *
      5 * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
      6 */
      7
      8#include <linux/linkage.h>
      9
     10.file "camellia-x86_64-asm_64.S"
     11.text
     12
     13.extern camellia_sp10011110;
     14.extern camellia_sp22000222;
     15.extern camellia_sp03303033;
     16.extern camellia_sp00444404;
     17.extern camellia_sp02220222;
     18.extern camellia_sp30333033;
     19.extern camellia_sp44044404;
     20.extern camellia_sp11101110;
     21
     22#define sp10011110 camellia_sp10011110
     23#define sp22000222 camellia_sp22000222
     24#define sp03303033 camellia_sp03303033
     25#define sp00444404 camellia_sp00444404
     26#define sp02220222 camellia_sp02220222
     27#define sp30333033 camellia_sp30333033
     28#define sp44044404 camellia_sp44044404
     29#define sp11101110 camellia_sp11101110
     30
     31#define CAMELLIA_TABLE_BYTE_LEN 272
     32
     33/* struct camellia_ctx: */
     34#define key_table 0
     35#define key_length CAMELLIA_TABLE_BYTE_LEN
     36
     37/* register macros */
     38#define CTX %rdi
     39#define RIO %rsi
     40#define RIOd %esi
     41
     42#define RAB0 %rax
     43#define RCD0 %rcx
     44#define RAB1 %rbx
     45#define RCD1 %rdx
     46
     47#define RAB0d %eax
     48#define RCD0d %ecx
     49#define RAB1d %ebx
     50#define RCD1d %edx
     51
     52#define RAB0bl %al
     53#define RCD0bl %cl
     54#define RAB1bl %bl
     55#define RCD1bl %dl
     56
     57#define RAB0bh %ah
     58#define RCD0bh %ch
     59#define RAB1bh %bh
     60#define RCD1bh %dh
     61
     62#define RT0 %rsi
     63#define RT1 %r12
     64#define RT2 %r8
     65
     66#define RT0d %esi
     67#define RT1d %r12d
     68#define RT2d %r8d
     69
     70#define RT2bl %r8b
     71
     72#define RXOR %r9
     73#define RR12 %r10
     74#define RDST %r11
     75
     76#define RXORd %r9d
     77#define RXORbl %r9b
     78
     79#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
     80	movzbl ab ## bl,		tmp2 ## d; \
     81	movzbl ab ## bh,		tmp1 ## d; \
     82	rorq $16,			ab; \
     83	xorq T0(, tmp2, 8),		dst; \
     84	xorq T1(, tmp1, 8),		dst;
     85
     86/**********************************************************************
     87  1-way camellia
     88 **********************************************************************/
     89#define roundsm(ab, subkey, cd) \
     90	movq (key_table + ((subkey) * 2) * 4)(CTX),	RT2; \
     91	\
     92	xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
     93	xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
     94	xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
     95	xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
     96	\
     97	xorq RT2,					cd ## 0;
     98
     99#define fls(l, r, kl, kr) \
    100	movl (key_table + ((kl) * 2) * 4)(CTX),		RT0d; \
    101	andl l ## 0d,					RT0d; \
    102	roll $1,					RT0d; \
    103	shlq $32,					RT0; \
    104	xorq RT0,					l ## 0; \
    105	movq (key_table + ((kr) * 2) * 4)(CTX),		RT1; \
    106	orq r ## 0,					RT1; \
    107	shrq $32,					RT1; \
    108	xorq RT1,					r ## 0; \
    109	\
    110	movq (key_table + ((kl) * 2) * 4)(CTX),		RT2; \
    111	orq l ## 0,					RT2; \
    112	shrq $32,					RT2; \
    113	xorq RT2,					l ## 0; \
    114	movl (key_table + ((kr) * 2) * 4)(CTX),		RT0d; \
    115	andl r ## 0d,					RT0d; \
    116	roll $1,					RT0d; \
    117	shlq $32,					RT0; \
    118	xorq RT0,					r ## 0;
    119
    120#define enc_rounds(i) \
    121	roundsm(RAB, i + 2, RCD); \
    122	roundsm(RCD, i + 3, RAB); \
    123	roundsm(RAB, i + 4, RCD); \
    124	roundsm(RCD, i + 5, RAB); \
    125	roundsm(RAB, i + 6, RCD); \
    126	roundsm(RCD, i + 7, RAB);
    127
    128#define enc_fls(i) \
    129	fls(RAB, RCD, i + 0, i + 1);
    130
    131#define enc_inpack() \
    132	movq (RIO),			RAB0; \
    133	bswapq				RAB0; \
    134	rolq $32,			RAB0; \
    135	movq 4*2(RIO),			RCD0; \
    136	bswapq				RCD0; \
    137	rorq $32,			RCD0; \
    138	xorq key_table(CTX),		RAB0;
    139
    140#define enc_outunpack(op, max) \
    141	xorq key_table(CTX, max, 8),	RCD0; \
    142	rorq $32,			RCD0; \
    143	bswapq				RCD0; \
    144	op ## q RCD0,			(RIO); \
    145	rolq $32,			RAB0; \
    146	bswapq				RAB0; \
    147	op ## q RAB0,			4*2(RIO);
    148
    149#define dec_rounds(i) \
    150	roundsm(RAB, i + 7, RCD); \
    151	roundsm(RCD, i + 6, RAB); \
    152	roundsm(RAB, i + 5, RCD); \
    153	roundsm(RCD, i + 4, RAB); \
    154	roundsm(RAB, i + 3, RCD); \
    155	roundsm(RCD, i + 2, RAB);
    156
    157#define dec_fls(i) \
    158	fls(RAB, RCD, i + 1, i + 0);
    159
    160#define dec_inpack(max) \
    161	movq (RIO),			RAB0; \
    162	bswapq				RAB0; \
    163	rolq $32,			RAB0; \
    164	movq 4*2(RIO),			RCD0; \
    165	bswapq				RCD0; \
    166	rorq $32,			RCD0; \
    167	xorq key_table(CTX, max, 8),	RAB0;
    168
    169#define dec_outunpack() \
    170	xorq key_table(CTX),		RCD0; \
    171	rorq $32,			RCD0; \
    172	bswapq				RCD0; \
    173	movq RCD0,			(RIO); \
    174	rolq $32,			RAB0; \
    175	bswapq				RAB0; \
    176	movq RAB0,			4*2(RIO);
    177
    178SYM_FUNC_START(__camellia_enc_blk)
    179	/* input:
    180	 *	%rdi: ctx, CTX
    181	 *	%rsi: dst
    182	 *	%rdx: src
    183	 *	%rcx: bool xor
    184	 */
    185	movq %r12, RR12;
    186
    187	movq %rcx, RXOR;
    188	movq %rsi, RDST;
    189	movq %rdx, RIO;
    190
    191	enc_inpack();
    192
    193	enc_rounds(0);
    194	enc_fls(8);
    195	enc_rounds(8);
    196	enc_fls(16);
    197	enc_rounds(16);
    198	movl $24, RT1d; /* max */
    199
    200	cmpb $16, key_length(CTX);
    201	je .L__enc_done;
    202
    203	enc_fls(24);
    204	enc_rounds(24);
    205	movl $32, RT1d; /* max */
    206
    207.L__enc_done:
    208	testb RXORbl, RXORbl;
    209	movq RDST, RIO;
    210
    211	jnz .L__enc_xor;
    212
    213	enc_outunpack(mov, RT1);
    214
    215	movq RR12, %r12;
    216	RET;
    217
    218.L__enc_xor:
    219	enc_outunpack(xor, RT1);
    220
    221	movq RR12, %r12;
    222	RET;
    223SYM_FUNC_END(__camellia_enc_blk)
    224
    225SYM_FUNC_START(camellia_dec_blk)
    226	/* input:
    227	 *	%rdi: ctx, CTX
    228	 *	%rsi: dst
    229	 *	%rdx: src
    230	 */
    231	cmpl $16, key_length(CTX);
    232	movl $32, RT2d;
    233	movl $24, RXORd;
    234	cmovel RXORd, RT2d; /* max */
    235
    236	movq %r12, RR12;
    237	movq %rsi, RDST;
    238	movq %rdx, RIO;
    239
    240	dec_inpack(RT2);
    241
    242	cmpb $24, RT2bl;
    243	je .L__dec_rounds16;
    244
    245	dec_rounds(24);
    246	dec_fls(24);
    247
    248.L__dec_rounds16:
    249	dec_rounds(16);
    250	dec_fls(16);
    251	dec_rounds(8);
    252	dec_fls(8);
    253	dec_rounds(0);
    254
    255	movq RDST, RIO;
    256
    257	dec_outunpack();
    258
    259	movq RR12, %r12;
    260	RET;
    261SYM_FUNC_END(camellia_dec_blk)
    262
    263/**********************************************************************
    264  2-way camellia
    265 **********************************************************************/
    266#define roundsm2(ab, subkey, cd) \
    267	movq (key_table + ((subkey) * 2) * 4)(CTX),	RT2; \
    268	xorq RT2,					cd ## 1; \
    269	\
    270	xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
    271	xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
    272	xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
    273	xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
    274	\
    275		xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
    276		xorq RT2,					cd ## 0; \
    277		xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
    278		xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
    279		xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
    280
    281#define fls2(l, r, kl, kr) \
    282	movl (key_table + ((kl) * 2) * 4)(CTX),		RT0d; \
    283	andl l ## 0d,					RT0d; \
    284	roll $1,					RT0d; \
    285	shlq $32,					RT0; \
    286	xorq RT0,					l ## 0; \
    287	movq (key_table + ((kr) * 2) * 4)(CTX),		RT1; \
    288	orq r ## 0,					RT1; \
    289	shrq $32,					RT1; \
    290	xorq RT1,					r ## 0; \
    291	\
    292		movl (key_table + ((kl) * 2) * 4)(CTX),		RT2d; \
    293		andl l ## 1d,					RT2d; \
    294		roll $1,					RT2d; \
    295		shlq $32,					RT2; \
    296		xorq RT2,					l ## 1; \
    297		movq (key_table + ((kr) * 2) * 4)(CTX),		RT0; \
    298		orq r ## 1,					RT0; \
    299		shrq $32,					RT0; \
    300		xorq RT0,					r ## 1; \
    301	\
    302	movq (key_table + ((kl) * 2) * 4)(CTX),		RT1; \
    303	orq l ## 0,					RT1; \
    304	shrq $32,					RT1; \
    305	xorq RT1,					l ## 0; \
    306	movl (key_table + ((kr) * 2) * 4)(CTX),		RT2d; \
    307	andl r ## 0d,					RT2d; \
    308	roll $1,					RT2d; \
    309	shlq $32,					RT2; \
    310	xorq RT2,					r ## 0; \
    311	\
    312		movq (key_table + ((kl) * 2) * 4)(CTX),		RT0; \
    313		orq l ## 1,					RT0; \
    314		shrq $32,					RT0; \
    315		xorq RT0,					l ## 1; \
    316		movl (key_table + ((kr) * 2) * 4)(CTX),		RT1d; \
    317		andl r ## 1d,					RT1d; \
    318		roll $1,					RT1d; \
    319		shlq $32,					RT1; \
    320		xorq RT1,					r ## 1;
    321
    322#define enc_rounds2(i) \
    323	roundsm2(RAB, i + 2, RCD); \
    324	roundsm2(RCD, i + 3, RAB); \
    325	roundsm2(RAB, i + 4, RCD); \
    326	roundsm2(RCD, i + 5, RAB); \
    327	roundsm2(RAB, i + 6, RCD); \
    328	roundsm2(RCD, i + 7, RAB);
    329
    330#define enc_fls2(i) \
    331	fls2(RAB, RCD, i + 0, i + 1);
    332
    333#define enc_inpack2() \
    334	movq (RIO),			RAB0; \
    335	bswapq				RAB0; \
    336	rorq $32,			RAB0; \
    337	movq 4*2(RIO),			RCD0; \
    338	bswapq				RCD0; \
    339	rolq $32,			RCD0; \
    340	xorq key_table(CTX),		RAB0; \
    341	\
    342		movq 8*2(RIO),			RAB1; \
    343		bswapq				RAB1; \
    344		rorq $32,			RAB1; \
    345		movq 12*2(RIO),			RCD1; \
    346		bswapq				RCD1; \
    347		rolq $32,			RCD1; \
    348		xorq key_table(CTX),		RAB1;
    349
    350#define enc_outunpack2(op, max) \
    351	xorq key_table(CTX, max, 8),	RCD0; \
    352	rolq $32,			RCD0; \
    353	bswapq				RCD0; \
    354	op ## q RCD0,			(RIO); \
    355	rorq $32,			RAB0; \
    356	bswapq				RAB0; \
    357	op ## q RAB0,			4*2(RIO); \
    358	\
    359		xorq key_table(CTX, max, 8),	RCD1; \
    360		rolq $32,			RCD1; \
    361		bswapq				RCD1; \
    362		op ## q RCD1,			8*2(RIO); \
    363		rorq $32,			RAB1; \
    364		bswapq				RAB1; \
    365		op ## q RAB1,			12*2(RIO);
    366
    367#define dec_rounds2(i) \
    368	roundsm2(RAB, i + 7, RCD); \
    369	roundsm2(RCD, i + 6, RAB); \
    370	roundsm2(RAB, i + 5, RCD); \
    371	roundsm2(RCD, i + 4, RAB); \
    372	roundsm2(RAB, i + 3, RCD); \
    373	roundsm2(RCD, i + 2, RAB);
    374
    375#define dec_fls2(i) \
    376	fls2(RAB, RCD, i + 1, i + 0);
    377
    378#define dec_inpack2(max) \
    379	movq (RIO),			RAB0; \
    380	bswapq				RAB0; \
    381	rorq $32,			RAB0; \
    382	movq 4*2(RIO),			RCD0; \
    383	bswapq				RCD0; \
    384	rolq $32,			RCD0; \
    385	xorq key_table(CTX, max, 8),	RAB0; \
    386	\
    387		movq 8*2(RIO),			RAB1; \
    388		bswapq				RAB1; \
    389		rorq $32,			RAB1; \
    390		movq 12*2(RIO),			RCD1; \
    391		bswapq				RCD1; \
    392		rolq $32,			RCD1; \
    393		xorq key_table(CTX, max, 8),	RAB1;
    394
    395#define dec_outunpack2() \
    396	xorq key_table(CTX),		RCD0; \
    397	rolq $32,			RCD0; \
    398	bswapq				RCD0; \
    399	movq RCD0,			(RIO); \
    400	rorq $32,			RAB0; \
    401	bswapq				RAB0; \
    402	movq RAB0,			4*2(RIO); \
    403	\
    404		xorq key_table(CTX),		RCD1; \
    405		rolq $32,			RCD1; \
    406		bswapq				RCD1; \
    407		movq RCD1,			8*2(RIO); \
    408		rorq $32,			RAB1; \
    409		bswapq				RAB1; \
    410		movq RAB1,			12*2(RIO);
    411
    412SYM_FUNC_START(__camellia_enc_blk_2way)
    413	/* input:
    414	 *	%rdi: ctx, CTX
    415	 *	%rsi: dst
    416	 *	%rdx: src
    417	 *	%rcx: bool xor
    418	 */
    419	pushq %rbx;
    420
    421	movq %r12, RR12;
    422	movq %rcx, RXOR;
    423	movq %rsi, RDST;
    424	movq %rdx, RIO;
    425
    426	enc_inpack2();
    427
    428	enc_rounds2(0);
    429	enc_fls2(8);
    430	enc_rounds2(8);
    431	enc_fls2(16);
    432	enc_rounds2(16);
    433	movl $24, RT2d; /* max */
    434
    435	cmpb $16, key_length(CTX);
    436	je .L__enc2_done;
    437
    438	enc_fls2(24);
    439	enc_rounds2(24);
    440	movl $32, RT2d; /* max */
    441
    442.L__enc2_done:
    443	test RXORbl, RXORbl;
    444	movq RDST, RIO;
    445	jnz .L__enc2_xor;
    446
    447	enc_outunpack2(mov, RT2);
    448
    449	movq RR12, %r12;
    450	popq %rbx;
    451	RET;
    452
    453.L__enc2_xor:
    454	enc_outunpack2(xor, RT2);
    455
    456	movq RR12, %r12;
    457	popq %rbx;
    458	RET;
    459SYM_FUNC_END(__camellia_enc_blk_2way)
    460
    461SYM_FUNC_START(camellia_dec_blk_2way)
    462	/* input:
    463	 *	%rdi: ctx, CTX
    464	 *	%rsi: dst
    465	 *	%rdx: src
    466	 */
    467	cmpl $16, key_length(CTX);
    468	movl $32, RT2d;
    469	movl $24, RXORd;
    470	cmovel RXORd, RT2d; /* max */
    471
    472	movq %rbx, RXOR;
    473	movq %r12, RR12;
    474	movq %rsi, RDST;
    475	movq %rdx, RIO;
    476
    477	dec_inpack2(RT2);
    478
    479	cmpb $24, RT2bl;
    480	je .L__dec2_rounds16;
    481
    482	dec_rounds2(24);
    483	dec_fls2(24);
    484
    485.L__dec2_rounds16:
    486	dec_rounds2(16);
    487	dec_fls2(16);
    488	dec_rounds2(8);
    489	dec_fls2(8);
    490	dec_rounds2(0);
    491
    492	movq RDST, RIO;
    493
    494	dec_outunpack2();
    495
    496	movq RR12, %r12;
    497	movq RXOR, %rbx;
    498	RET;
    499SYM_FUNC_END(camellia_dec_blk_2way)