cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ghash-clmulni-intel_asm.S (2785B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
      4 * instructions. This file contains accelerated part of ghash
      5 * implementation. More information about PCLMULQDQ can be found at:
      6 *
      7 * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
      8 *
      9 * Copyright (c) 2009 Intel Corp.
     10 *   Author: Huang Ying <ying.huang@intel.com>
     11 *	     Vinodh Gopal
     12 *	     Erdinc Ozturk
     13 *	     Deniz Karakoyunlu
     14 */
     15
     16#include <linux/linkage.h>
     17#include <asm/frame.h>
     18
     19.section	.rodata.cst16.bswap_mask, "aM", @progbits, 16
     20.align 16
     21.Lbswap_mask:
     22	.octa 0x000102030405060708090a0b0c0d0e0f
     23
     24#define DATA	%xmm0
     25#define SHASH	%xmm1
     26#define T1	%xmm2
     27#define T2	%xmm3
     28#define T3	%xmm4
     29#define BSWAP	%xmm5
     30#define IN1	%xmm6
     31
     32.text
     33
     34/*
     35 * __clmul_gf128mul_ble:	internal ABI
     36 * input:
     37 *	DATA:			operand1
     38 *	SHASH:			operand2, hash_key << 1 mod poly
     39 * output:
     40 *	DATA:			operand1 * operand2 mod poly
     41 * changed:
     42 *	T1
     43 *	T2
     44 *	T3
     45 */
     46SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
     47	movaps DATA, T1
     48	pshufd $0b01001110, DATA, T2
     49	pshufd $0b01001110, SHASH, T3
     50	pxor DATA, T2
     51	pxor SHASH, T3
     52
     53	pclmulqdq $0x00, SHASH, DATA	# DATA = a0 * b0
     54	pclmulqdq $0x11, SHASH, T1	# T1 = a1 * b1
     55	pclmulqdq $0x00, T3, T2		# T2 = (a1 + a0) * (b1 + b0)
     56	pxor DATA, T2
     57	pxor T1, T2			# T2 = a0 * b1 + a1 * b0
     58
     59	movaps T2, T3
     60	pslldq $8, T3
     61	psrldq $8, T2
     62	pxor T3, DATA
     63	pxor T2, T1			# <T1:DATA> is result of
     64					# carry-less multiplication
     65
     66	# first phase of the reduction
     67	movaps DATA, T3
     68	psllq $1, T3
     69	pxor DATA, T3
     70	psllq $5, T3
     71	pxor DATA, T3
     72	psllq $57, T3
     73	movaps T3, T2
     74	pslldq $8, T2
     75	psrldq $8, T3
     76	pxor T2, DATA
     77	pxor T3, T1
     78
     79	# second phase of the reduction
     80	movaps DATA, T2
     81	psrlq $5, T2
     82	pxor DATA, T2
     83	psrlq $1, T2
     84	pxor DATA, T2
     85	psrlq $1, T2
     86	pxor T2, T1
     87	pxor T1, DATA
     88	RET
     89SYM_FUNC_END(__clmul_gf128mul_ble)
     90
     91/* void clmul_ghash_mul(char *dst, const u128 *shash) */
     92SYM_FUNC_START(clmul_ghash_mul)
     93	FRAME_BEGIN
     94	movups (%rdi), DATA
     95	movups (%rsi), SHASH
     96	movaps .Lbswap_mask, BSWAP
     97	pshufb BSWAP, DATA
     98	call __clmul_gf128mul_ble
     99	pshufb BSWAP, DATA
    100	movups DATA, (%rdi)
    101	FRAME_END
    102	RET
    103SYM_FUNC_END(clmul_ghash_mul)
    104
    105/*
    106 * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
    107 *			   const u128 *shash);
    108 */
    109SYM_FUNC_START(clmul_ghash_update)
    110	FRAME_BEGIN
    111	cmp $16, %rdx
    112	jb .Lupdate_just_ret	# check length
    113	movaps .Lbswap_mask, BSWAP
    114	movups (%rdi), DATA
    115	movups (%rcx), SHASH
    116	pshufb BSWAP, DATA
    117.align 4
    118.Lupdate_loop:
    119	movups (%rsi), IN1
    120	pshufb BSWAP, IN1
    121	pxor IN1, DATA
    122	call __clmul_gf128mul_ble
    123	sub $16, %rdx
    124	add $16, %rsi
    125	cmp $16, %rdx
    126	jge .Lupdate_loop
    127	pshufb BSWAP, DATA
    128	movups DATA, (%rdi)
    129.Lupdate_just_ret:
    130	FRAME_END
    131	RET
    132SYM_FUNC_END(clmul_ghash_update)