cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

aegis128-aesni-asm.S (14123B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 * AES-NI + SSE2 implementation of AEGIS-128
      4 *
      5 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
      6 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
      7 */
      8
      9#include <linux/linkage.h>
     10#include <asm/frame.h>
     11
     12#define STATE0	%xmm0
     13#define STATE1	%xmm1
     14#define STATE2	%xmm2
     15#define STATE3	%xmm3
     16#define STATE4	%xmm4
     17#define KEY	%xmm5
     18#define MSG	%xmm5
     19#define T0	%xmm6
     20#define T1	%xmm7
     21
     22#define STATEP	%rdi
     23#define LEN	%rsi
     24#define SRC	%rdx
     25#define DST	%rcx
     26
     27.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
     28.align 16
     29.Laegis128_const_0:
     30	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
     31	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
     32.Laegis128_const_1:
     33	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
     34	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
     35
     36.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
     37.align 16
     38.Laegis128_counter:
     39	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
     40	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
     41
     42.text
     43
     44/*
     45 * aegis128_update
     46 * input:
     47 *   STATE[0-4] - input state
     48 * output:
     49 *   STATE[0-4] - output state (shifted positions)
     50 * changed:
     51 *   T0
     52 */
     53.macro aegis128_update
     54	movdqa STATE4, T0
     55	aesenc STATE0, STATE4
     56	aesenc STATE1, STATE0
     57	aesenc STATE2, STATE1
     58	aesenc STATE3, STATE2
     59	aesenc T0,     STATE3
     60.endm
     61
     62/*
     63 * __load_partial: internal ABI
     64 * input:
     65 *   LEN - bytes
     66 *   SRC - src
     67 * output:
     68 *   MSG  - message block
     69 * changed:
     70 *   T0
     71 *   %r8
     72 *   %r9
     73 */
     74SYM_FUNC_START_LOCAL(__load_partial)
     75	xor %r9d, %r9d
     76	pxor MSG, MSG
     77
     78	mov LEN, %r8
     79	and $0x1, %r8
     80	jz .Lld_partial_1
     81
     82	mov LEN, %r8
     83	and $0x1E, %r8
     84	add SRC, %r8
     85	mov (%r8), %r9b
     86
     87.Lld_partial_1:
     88	mov LEN, %r8
     89	and $0x2, %r8
     90	jz .Lld_partial_2
     91
     92	mov LEN, %r8
     93	and $0x1C, %r8
     94	add SRC, %r8
     95	shl $0x10, %r9
     96	mov (%r8), %r9w
     97
     98.Lld_partial_2:
     99	mov LEN, %r8
    100	and $0x4, %r8
    101	jz .Lld_partial_4
    102
    103	mov LEN, %r8
    104	and $0x18, %r8
    105	add SRC, %r8
    106	shl $32, %r9
    107	mov (%r8), %r8d
    108	xor %r8, %r9
    109
    110.Lld_partial_4:
    111	movq %r9, MSG
    112
    113	mov LEN, %r8
    114	and $0x8, %r8
    115	jz .Lld_partial_8
    116
    117	mov LEN, %r8
    118	and $0x10, %r8
    119	add SRC, %r8
    120	pslldq $8, MSG
    121	movq (%r8), T0
    122	pxor T0, MSG
    123
    124.Lld_partial_8:
    125	RET
    126SYM_FUNC_END(__load_partial)
    127
    128/*
    129 * __store_partial: internal ABI
    130 * input:
    131 *   LEN - bytes
    132 *   DST - dst
    133 * output:
    134 *   T0   - message block
    135 * changed:
    136 *   %r8
    137 *   %r9
    138 *   %r10
    139 */
    140SYM_FUNC_START_LOCAL(__store_partial)
    141	mov LEN, %r8
    142	mov DST, %r9
    143
    144	movq T0, %r10
    145
    146	cmp $8, %r8
    147	jl .Lst_partial_8
    148
    149	mov %r10, (%r9)
    150	psrldq $8, T0
    151	movq T0, %r10
    152
    153	sub $8, %r8
    154	add $8, %r9
    155
    156.Lst_partial_8:
    157	cmp $4, %r8
    158	jl .Lst_partial_4
    159
    160	mov %r10d, (%r9)
    161	shr $32, %r10
    162
    163	sub $4, %r8
    164	add $4, %r9
    165
    166.Lst_partial_4:
    167	cmp $2, %r8
    168	jl .Lst_partial_2
    169
    170	mov %r10w, (%r9)
    171	shr $0x10, %r10
    172
    173	sub $2, %r8
    174	add $2, %r9
    175
    176.Lst_partial_2:
    177	cmp $1, %r8
    178	jl .Lst_partial_1
    179
    180	mov %r10b, (%r9)
    181
    182.Lst_partial_1:
    183	RET
    184SYM_FUNC_END(__store_partial)
    185
    186/*
    187 * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
    188 */
    189SYM_FUNC_START(crypto_aegis128_aesni_init)
    190	FRAME_BEGIN
    191
    192	/* load IV: */
    193	movdqu (%rdx), T1
    194
    195	/* load key: */
    196	movdqa (%rsi), KEY
    197	pxor KEY, T1
    198	movdqa T1, STATE0
    199	movdqa KEY, STATE3
    200	movdqa KEY, STATE4
    201
    202	/* load the constants: */
    203	movdqa .Laegis128_const_0, STATE2
    204	movdqa .Laegis128_const_1, STATE1
    205	pxor STATE2, STATE3
    206	pxor STATE1, STATE4
    207
    208	/* update 10 times with KEY / KEY xor IV: */
    209	aegis128_update; pxor KEY, STATE4
    210	aegis128_update; pxor T1,  STATE3
    211	aegis128_update; pxor KEY, STATE2
    212	aegis128_update; pxor T1,  STATE1
    213	aegis128_update; pxor KEY, STATE0
    214	aegis128_update; pxor T1,  STATE4
    215	aegis128_update; pxor KEY, STATE3
    216	aegis128_update; pxor T1,  STATE2
    217	aegis128_update; pxor KEY, STATE1
    218	aegis128_update; pxor T1,  STATE0
    219
    220	/* store the state: */
    221	movdqu STATE0, 0x00(STATEP)
    222	movdqu STATE1, 0x10(STATEP)
    223	movdqu STATE2, 0x20(STATEP)
    224	movdqu STATE3, 0x30(STATEP)
    225	movdqu STATE4, 0x40(STATEP)
    226
    227	FRAME_END
    228	RET
    229SYM_FUNC_END(crypto_aegis128_aesni_init)
    230
    231/*
    232 * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
    233 *                               const void *data);
    234 */
    235SYM_FUNC_START(crypto_aegis128_aesni_ad)
    236	FRAME_BEGIN
    237
    238	cmp $0x10, LEN
    239	jb .Lad_out
    240
    241	/* load the state: */
    242	movdqu 0x00(STATEP), STATE0
    243	movdqu 0x10(STATEP), STATE1
    244	movdqu 0x20(STATEP), STATE2
    245	movdqu 0x30(STATEP), STATE3
    246	movdqu 0x40(STATEP), STATE4
    247
    248	mov SRC, %r8
    249	and $0xF, %r8
    250	jnz .Lad_u_loop
    251
    252.align 8
    253.Lad_a_loop:
    254	movdqa 0x00(SRC), MSG
    255	aegis128_update
    256	pxor MSG, STATE4
    257	sub $0x10, LEN
    258	cmp $0x10, LEN
    259	jl .Lad_out_1
    260
    261	movdqa 0x10(SRC), MSG
    262	aegis128_update
    263	pxor MSG, STATE3
    264	sub $0x10, LEN
    265	cmp $0x10, LEN
    266	jl .Lad_out_2
    267
    268	movdqa 0x20(SRC), MSG
    269	aegis128_update
    270	pxor MSG, STATE2
    271	sub $0x10, LEN
    272	cmp $0x10, LEN
    273	jl .Lad_out_3
    274
    275	movdqa 0x30(SRC), MSG
    276	aegis128_update
    277	pxor MSG, STATE1
    278	sub $0x10, LEN
    279	cmp $0x10, LEN
    280	jl .Lad_out_4
    281
    282	movdqa 0x40(SRC), MSG
    283	aegis128_update
    284	pxor MSG, STATE0
    285	sub $0x10, LEN
    286	cmp $0x10, LEN
    287	jl .Lad_out_0
    288
    289	add $0x50, SRC
    290	jmp .Lad_a_loop
    291
    292.align 8
    293.Lad_u_loop:
    294	movdqu 0x00(SRC), MSG
    295	aegis128_update
    296	pxor MSG, STATE4
    297	sub $0x10, LEN
    298	cmp $0x10, LEN
    299	jl .Lad_out_1
    300
    301	movdqu 0x10(SRC), MSG
    302	aegis128_update
    303	pxor MSG, STATE3
    304	sub $0x10, LEN
    305	cmp $0x10, LEN
    306	jl .Lad_out_2
    307
    308	movdqu 0x20(SRC), MSG
    309	aegis128_update
    310	pxor MSG, STATE2
    311	sub $0x10, LEN
    312	cmp $0x10, LEN
    313	jl .Lad_out_3
    314
    315	movdqu 0x30(SRC), MSG
    316	aegis128_update
    317	pxor MSG, STATE1
    318	sub $0x10, LEN
    319	cmp $0x10, LEN
    320	jl .Lad_out_4
    321
    322	movdqu 0x40(SRC), MSG
    323	aegis128_update
    324	pxor MSG, STATE0
    325	sub $0x10, LEN
    326	cmp $0x10, LEN
    327	jl .Lad_out_0
    328
    329	add $0x50, SRC
    330	jmp .Lad_u_loop
    331
    332	/* store the state: */
    333.Lad_out_0:
    334	movdqu STATE0, 0x00(STATEP)
    335	movdqu STATE1, 0x10(STATEP)
    336	movdqu STATE2, 0x20(STATEP)
    337	movdqu STATE3, 0x30(STATEP)
    338	movdqu STATE4, 0x40(STATEP)
    339	FRAME_END
    340	RET
    341
    342.Lad_out_1:
    343	movdqu STATE4, 0x00(STATEP)
    344	movdqu STATE0, 0x10(STATEP)
    345	movdqu STATE1, 0x20(STATEP)
    346	movdqu STATE2, 0x30(STATEP)
    347	movdqu STATE3, 0x40(STATEP)
    348	FRAME_END
    349	RET
    350
    351.Lad_out_2:
    352	movdqu STATE3, 0x00(STATEP)
    353	movdqu STATE4, 0x10(STATEP)
    354	movdqu STATE0, 0x20(STATEP)
    355	movdqu STATE1, 0x30(STATEP)
    356	movdqu STATE2, 0x40(STATEP)
    357	FRAME_END
    358	RET
    359
    360.Lad_out_3:
    361	movdqu STATE2, 0x00(STATEP)
    362	movdqu STATE3, 0x10(STATEP)
    363	movdqu STATE4, 0x20(STATEP)
    364	movdqu STATE0, 0x30(STATEP)
    365	movdqu STATE1, 0x40(STATEP)
    366	FRAME_END
    367	RET
    368
    369.Lad_out_4:
    370	movdqu STATE1, 0x00(STATEP)
    371	movdqu STATE2, 0x10(STATEP)
    372	movdqu STATE3, 0x20(STATEP)
    373	movdqu STATE4, 0x30(STATEP)
    374	movdqu STATE0, 0x40(STATEP)
    375	FRAME_END
    376	RET
    377
    378.Lad_out:
    379	FRAME_END
    380	RET
    381SYM_FUNC_END(crypto_aegis128_aesni_ad)
    382
    383.macro encrypt_block a s0 s1 s2 s3 s4 i
    384	movdq\a (\i * 0x10)(SRC), MSG
    385	movdqa MSG, T0
    386	pxor \s1, T0
    387	pxor \s4, T0
    388	movdqa \s2, T1
    389	pand \s3, T1
    390	pxor T1, T0
    391	movdq\a T0, (\i * 0x10)(DST)
    392
    393	aegis128_update
    394	pxor MSG, \s4
    395
    396	sub $0x10, LEN
    397	cmp $0x10, LEN
    398	jl .Lenc_out_\i
    399.endm
    400
    401/*
    402 * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
    403 *                                const void *src, void *dst);
    404 */
    405SYM_FUNC_START(crypto_aegis128_aesni_enc)
    406	FRAME_BEGIN
    407
    408	cmp $0x10, LEN
    409	jb .Lenc_out
    410
    411	/* load the state: */
    412	movdqu 0x00(STATEP), STATE0
    413	movdqu 0x10(STATEP), STATE1
    414	movdqu 0x20(STATEP), STATE2
    415	movdqu 0x30(STATEP), STATE3
    416	movdqu 0x40(STATEP), STATE4
    417
    418	mov  SRC,  %r8
    419	or   DST,  %r8
    420	and $0xF, %r8
    421	jnz .Lenc_u_loop
    422
    423.align 8
    424.Lenc_a_loop:
    425	encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
    426	encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
    427	encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
    428	encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
    429	encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
    430
    431	add $0x50, SRC
    432	add $0x50, DST
    433	jmp .Lenc_a_loop
    434
    435.align 8
    436.Lenc_u_loop:
    437	encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
    438	encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
    439	encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
    440	encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
    441	encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
    442
    443	add $0x50, SRC
    444	add $0x50, DST
    445	jmp .Lenc_u_loop
    446
    447	/* store the state: */
    448.Lenc_out_0:
    449	movdqu STATE4, 0x00(STATEP)
    450	movdqu STATE0, 0x10(STATEP)
    451	movdqu STATE1, 0x20(STATEP)
    452	movdqu STATE2, 0x30(STATEP)
    453	movdqu STATE3, 0x40(STATEP)
    454	FRAME_END
    455	RET
    456
    457.Lenc_out_1:
    458	movdqu STATE3, 0x00(STATEP)
    459	movdqu STATE4, 0x10(STATEP)
    460	movdqu STATE0, 0x20(STATEP)
    461	movdqu STATE1, 0x30(STATEP)
    462	movdqu STATE2, 0x40(STATEP)
    463	FRAME_END
    464	RET
    465
    466.Lenc_out_2:
    467	movdqu STATE2, 0x00(STATEP)
    468	movdqu STATE3, 0x10(STATEP)
    469	movdqu STATE4, 0x20(STATEP)
    470	movdqu STATE0, 0x30(STATEP)
    471	movdqu STATE1, 0x40(STATEP)
    472	FRAME_END
    473	RET
    474
    475.Lenc_out_3:
    476	movdqu STATE1, 0x00(STATEP)
    477	movdqu STATE2, 0x10(STATEP)
    478	movdqu STATE3, 0x20(STATEP)
    479	movdqu STATE4, 0x30(STATEP)
    480	movdqu STATE0, 0x40(STATEP)
    481	FRAME_END
    482	RET
    483
    484.Lenc_out_4:
    485	movdqu STATE0, 0x00(STATEP)
    486	movdqu STATE1, 0x10(STATEP)
    487	movdqu STATE2, 0x20(STATEP)
    488	movdqu STATE3, 0x30(STATEP)
    489	movdqu STATE4, 0x40(STATEP)
    490	FRAME_END
    491	RET
    492
    493.Lenc_out:
    494	FRAME_END
    495	RET
    496SYM_FUNC_END(crypto_aegis128_aesni_enc)
    497
    498/*
    499 * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
    500 *                                     const void *src, void *dst);
    501 */
    502SYM_FUNC_START(crypto_aegis128_aesni_enc_tail)
    503	FRAME_BEGIN
    504
    505	/* load the state: */
    506	movdqu 0x00(STATEP), STATE0
    507	movdqu 0x10(STATEP), STATE1
    508	movdqu 0x20(STATEP), STATE2
    509	movdqu 0x30(STATEP), STATE3
    510	movdqu 0x40(STATEP), STATE4
    511
    512	/* encrypt message: */
    513	call __load_partial
    514
    515	movdqa MSG, T0
    516	pxor STATE1, T0
    517	pxor STATE4, T0
    518	movdqa STATE2, T1
    519	pand STATE3, T1
    520	pxor T1, T0
    521
    522	call __store_partial
    523
    524	aegis128_update
    525	pxor MSG, STATE4
    526
    527	/* store the state: */
    528	movdqu STATE4, 0x00(STATEP)
    529	movdqu STATE0, 0x10(STATEP)
    530	movdqu STATE1, 0x20(STATEP)
    531	movdqu STATE2, 0x30(STATEP)
    532	movdqu STATE3, 0x40(STATEP)
    533
    534	FRAME_END
    535	RET
    536SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
    537
    538.macro decrypt_block a s0 s1 s2 s3 s4 i
    539	movdq\a (\i * 0x10)(SRC), MSG
    540	pxor \s1, MSG
    541	pxor \s4, MSG
    542	movdqa \s2, T1
    543	pand \s3, T1
    544	pxor T1, MSG
    545	movdq\a MSG, (\i * 0x10)(DST)
    546
    547	aegis128_update
    548	pxor MSG, \s4
    549
    550	sub $0x10, LEN
    551	cmp $0x10, LEN
    552	jl .Ldec_out_\i
    553.endm
    554
    555/*
    556 * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
    557 *                                const void *src, void *dst);
    558 */
    559SYM_FUNC_START(crypto_aegis128_aesni_dec)
    560	FRAME_BEGIN
    561
    562	cmp $0x10, LEN
    563	jb .Ldec_out
    564
    565	/* load the state: */
    566	movdqu 0x00(STATEP), STATE0
    567	movdqu 0x10(STATEP), STATE1
    568	movdqu 0x20(STATEP), STATE2
    569	movdqu 0x30(STATEP), STATE3
    570	movdqu 0x40(STATEP), STATE4
    571
    572	mov  SRC, %r8
    573	or   DST, %r8
    574	and $0xF, %r8
    575	jnz .Ldec_u_loop
    576
    577.align 8
    578.Ldec_a_loop:
    579	decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
    580	decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
    581	decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
    582	decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
    583	decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
    584
    585	add $0x50, SRC
    586	add $0x50, DST
    587	jmp .Ldec_a_loop
    588
    589.align 8
    590.Ldec_u_loop:
    591	decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
    592	decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
    593	decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
    594	decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
    595	decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
    596
    597	add $0x50, SRC
    598	add $0x50, DST
    599	jmp .Ldec_u_loop
    600
    601	/* store the state: */
    602.Ldec_out_0:
    603	movdqu STATE4, 0x00(STATEP)
    604	movdqu STATE0, 0x10(STATEP)
    605	movdqu STATE1, 0x20(STATEP)
    606	movdqu STATE2, 0x30(STATEP)
    607	movdqu STATE3, 0x40(STATEP)
    608	FRAME_END
    609	RET
    610
    611.Ldec_out_1:
    612	movdqu STATE3, 0x00(STATEP)
    613	movdqu STATE4, 0x10(STATEP)
    614	movdqu STATE0, 0x20(STATEP)
    615	movdqu STATE1, 0x30(STATEP)
    616	movdqu STATE2, 0x40(STATEP)
    617	FRAME_END
    618	RET
    619
    620.Ldec_out_2:
    621	movdqu STATE2, 0x00(STATEP)
    622	movdqu STATE3, 0x10(STATEP)
    623	movdqu STATE4, 0x20(STATEP)
    624	movdqu STATE0, 0x30(STATEP)
    625	movdqu STATE1, 0x40(STATEP)
    626	FRAME_END
    627	RET
    628
    629.Ldec_out_3:
    630	movdqu STATE1, 0x00(STATEP)
    631	movdqu STATE2, 0x10(STATEP)
    632	movdqu STATE3, 0x20(STATEP)
    633	movdqu STATE4, 0x30(STATEP)
    634	movdqu STATE0, 0x40(STATEP)
    635	FRAME_END
    636	RET
    637
    638.Ldec_out_4:
    639	movdqu STATE0, 0x00(STATEP)
    640	movdqu STATE1, 0x10(STATEP)
    641	movdqu STATE2, 0x20(STATEP)
    642	movdqu STATE3, 0x30(STATEP)
    643	movdqu STATE4, 0x40(STATEP)
    644	FRAME_END
    645	RET
    646
    647.Ldec_out:
    648	FRAME_END
    649	RET
    650SYM_FUNC_END(crypto_aegis128_aesni_dec)
    651
    652/*
    653 * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
    654 *                                     const void *src, void *dst);
    655 */
    656SYM_FUNC_START(crypto_aegis128_aesni_dec_tail)
    657	FRAME_BEGIN
    658
    659	/* load the state: */
    660	movdqu 0x00(STATEP), STATE0
    661	movdqu 0x10(STATEP), STATE1
    662	movdqu 0x20(STATEP), STATE2
    663	movdqu 0x30(STATEP), STATE3
    664	movdqu 0x40(STATEP), STATE4
    665
    666	/* decrypt message: */
    667	call __load_partial
    668
    669	pxor STATE1, MSG
    670	pxor STATE4, MSG
    671	movdqa STATE2, T1
    672	pand STATE3, T1
    673	pxor T1, MSG
    674
    675	movdqa MSG, T0
    676	call __store_partial
    677
    678	/* mask with byte count: */
    679	movq LEN, T0
    680	punpcklbw T0, T0
    681	punpcklbw T0, T0
    682	punpcklbw T0, T0
    683	punpcklbw T0, T0
    684	movdqa .Laegis128_counter, T1
    685	pcmpgtb T1, T0
    686	pand T0, MSG
    687
    688	aegis128_update
    689	pxor MSG, STATE4
    690
    691	/* store the state: */
    692	movdqu STATE4, 0x00(STATEP)
    693	movdqu STATE0, 0x10(STATEP)
    694	movdqu STATE1, 0x20(STATEP)
    695	movdqu STATE2, 0x30(STATEP)
    696	movdqu STATE3, 0x40(STATEP)
    697
    698	FRAME_END
    699	RET
    700SYM_FUNC_END(crypto_aegis128_aesni_dec_tail)
    701
    702/*
    703 * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
    704 *                                  u64 assoclen, u64 cryptlen);
    705 */
    706SYM_FUNC_START(crypto_aegis128_aesni_final)
    707	FRAME_BEGIN
    708
    709	/* load the state: */
    710	movdqu 0x00(STATEP), STATE0
    711	movdqu 0x10(STATEP), STATE1
    712	movdqu 0x20(STATEP), STATE2
    713	movdqu 0x30(STATEP), STATE3
    714	movdqu 0x40(STATEP), STATE4
    715
    716	/* prepare length block: */
    717	movq %rdx, MSG
    718	movq %rcx, T0
    719	pslldq $8, T0
    720	pxor T0, MSG
    721	psllq $3, MSG /* multiply by 8 (to get bit count) */
    722
    723	pxor STATE3, MSG
    724
    725	/* update state: */
    726	aegis128_update; pxor MSG, STATE4
    727	aegis128_update; pxor MSG, STATE3
    728	aegis128_update; pxor MSG, STATE2
    729	aegis128_update; pxor MSG, STATE1
    730	aegis128_update; pxor MSG, STATE0
    731	aegis128_update; pxor MSG, STATE4
    732	aegis128_update; pxor MSG, STATE3
    733
    734	/* xor tag: */
    735	movdqu (%rsi), MSG
    736
    737	pxor STATE0, MSG
    738	pxor STATE1, MSG
    739	pxor STATE2, MSG
    740	pxor STATE3, MSG
    741	pxor STATE4, MSG
    742
    743	movdqu MSG, (%rsi)
    744
    745	FRAME_END
    746	RET
    747SYM_FUNC_END(crypto_aegis128_aesni_final)