cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

nh-sse2-x86_64.S (2742B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/*
      3 * NH - ε-almost-universal hash function, x86_64 SSE2 accelerated
      4 *
      5 * Copyright 2018 Google LLC
      6 *
      7 * Author: Eric Biggers <ebiggers@google.com>
      8 */
      9
     10#include <linux/linkage.h>
     11
     12#define		PASS0_SUMS	%xmm0
     13#define		PASS1_SUMS	%xmm1
     14#define		PASS2_SUMS	%xmm2
     15#define		PASS3_SUMS	%xmm3
     16#define		K0		%xmm4
     17#define		K1		%xmm5
     18#define		K2		%xmm6
     19#define		K3		%xmm7
     20#define		T0		%xmm8
     21#define		T1		%xmm9
     22#define		T2		%xmm10
     23#define		T3		%xmm11
     24#define		T4		%xmm12
     25#define		T5		%xmm13
     26#define		T6		%xmm14
     27#define		T7		%xmm15
     28#define		KEY		%rdi
     29#define		MESSAGE		%rsi
     30#define		MESSAGE_LEN	%rdx
     31#define		HASH		%rcx
     32
     33.macro _nh_stride	k0, k1, k2, k3, offset
     34
     35	// Load next message stride
     36	movdqu		\offset(MESSAGE), T1
     37
     38	// Load next key stride
     39	movdqu		\offset(KEY), \k3
     40
     41	// Add message words to key words
     42	movdqa		T1, T2
     43	movdqa		T1, T3
     44	paddd		T1, \k0    // reuse k0 to avoid a move
     45	paddd		\k1, T1
     46	paddd		\k2, T2
     47	paddd		\k3, T3
     48
     49	// Multiply 32x32 => 64 and accumulate
     50	pshufd		$0x10, \k0, T4
     51	pshufd		$0x32, \k0, \k0
     52	pshufd		$0x10, T1, T5
     53	pshufd		$0x32, T1, T1
     54	pshufd		$0x10, T2, T6
     55	pshufd		$0x32, T2, T2
     56	pshufd		$0x10, T3, T7
     57	pshufd		$0x32, T3, T3
     58	pmuludq		T4, \k0
     59	pmuludq		T5, T1
     60	pmuludq		T6, T2
     61	pmuludq		T7, T3
     62	paddq		\k0, PASS0_SUMS
     63	paddq		T1, PASS1_SUMS
     64	paddq		T2, PASS2_SUMS
     65	paddq		T3, PASS3_SUMS
     66.endm
     67
     68/*
     69 * void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
     70 *		u8 hash[NH_HASH_BYTES])
     71 *
     72 * It's guaranteed that message_len % 16 == 0.
     73 */
     74SYM_FUNC_START(nh_sse2)
     75
     76	movdqu		0x00(KEY), K0
     77	movdqu		0x10(KEY), K1
     78	movdqu		0x20(KEY), K2
     79	add		$0x30, KEY
     80	pxor		PASS0_SUMS, PASS0_SUMS
     81	pxor		PASS1_SUMS, PASS1_SUMS
     82	pxor		PASS2_SUMS, PASS2_SUMS
     83	pxor		PASS3_SUMS, PASS3_SUMS
     84
     85	sub		$0x40, MESSAGE_LEN
     86	jl		.Lloop4_done
     87.Lloop4:
     88	_nh_stride	K0, K1, K2, K3, 0x00
     89	_nh_stride	K1, K2, K3, K0, 0x10
     90	_nh_stride	K2, K3, K0, K1, 0x20
     91	_nh_stride	K3, K0, K1, K2, 0x30
     92	add		$0x40, KEY
     93	add		$0x40, MESSAGE
     94	sub		$0x40, MESSAGE_LEN
     95	jge		.Lloop4
     96
     97.Lloop4_done:
     98	and		$0x3f, MESSAGE_LEN
     99	jz		.Ldone
    100	_nh_stride	K0, K1, K2, K3, 0x00
    101
    102	sub		$0x10, MESSAGE_LEN
    103	jz		.Ldone
    104	_nh_stride	K1, K2, K3, K0, 0x10
    105
    106	sub		$0x10, MESSAGE_LEN
    107	jz		.Ldone
    108	_nh_stride	K2, K3, K0, K1, 0x20
    109
    110.Ldone:
    111	// Sum the accumulators for each pass, then store the sums to 'hash'
    112	movdqa		PASS0_SUMS, T0
    113	movdqa		PASS2_SUMS, T1
    114	punpcklqdq	PASS1_SUMS, T0		// => (PASS0_SUM_A PASS1_SUM_A)
    115	punpcklqdq	PASS3_SUMS, T1		// => (PASS2_SUM_A PASS3_SUM_A)
    116	punpckhqdq	PASS1_SUMS, PASS0_SUMS	// => (PASS0_SUM_B PASS1_SUM_B)
    117	punpckhqdq	PASS3_SUMS, PASS2_SUMS	// => (PASS2_SUM_B PASS3_SUM_B)
    118	paddq		PASS0_SUMS, T0
    119	paddq		PASS2_SUMS, T1
    120	movdqu		T0, 0x00(HASH)
    121	movdqu		T1, 0x10(HASH)
    122	RET
    123SYM_FUNC_END(nh_sse2)