xor_avx.h - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
xor_avx.h (4641B)
      1/* SPDX-License-Identifier: GPL-2.0-only */
      2#ifndef _ASM_X86_XOR_AVX_H
      3#define _ASM_X86_XOR_AVX_H
      4
      5/*
      6 * Optimized RAID-5 checksumming functions for AVX
      7 *
      8 * Copyright (C) 2012 Intel Corporation
      9 * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
     10 *
     11 * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
     12 */
     13
     14#include <linux/compiler.h>
     15#include <asm/fpu/api.h>
     16
     17#define BLOCK4(i) \
     18		BLOCK(32 * i, 0) \
     19		BLOCK(32 * (i + 1), 1) \
     20		BLOCK(32 * (i + 2), 2) \
     21		BLOCK(32 * (i + 3), 3)
     22
     23#define BLOCK16() \
     24		BLOCK4(0) \
     25		BLOCK4(4) \
     26		BLOCK4(8) \
     27		BLOCK4(12)
     28
     29static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0,
     30		      const unsigned long * __restrict p1)
     31{
     32	unsigned long lines = bytes >> 9;
     33
     34	kernel_fpu_begin();
     35
     36	while (lines--) {
     37#undef BLOCK
     38#define BLOCK(i, reg) \
     39do { \
     40	asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
     41	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm"  #reg : : \
     42		"m" (p0[i / sizeof(*p0)])); \
     43	asm volatile("vmovdqa %%ymm" #reg ", %0" : \
     44		"=m" (p0[i / sizeof(*p0)])); \
     45} while (0);
     46
     47		BLOCK16()
     48
     49		p0 = (unsigned long *)((uintptr_t)p0 + 512);
     50		p1 = (unsigned long *)((uintptr_t)p1 + 512);
     51	}
     52
     53	kernel_fpu_end();
     54}
     55
     56static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0,
     57		      const unsigned long * __restrict p1,
     58		      const unsigned long * __restrict p2)
     59{
     60	unsigned long lines = bytes >> 9;
     61
     62	kernel_fpu_begin();
     63
     64	while (lines--) {
     65#undef BLOCK
     66#define BLOCK(i, reg) \
     67do { \
     68	asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
     69	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
     70		"m" (p1[i / sizeof(*p1)])); \
     71	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
     72		"m" (p0[i / sizeof(*p0)])); \
     73	asm volatile("vmovdqa %%ymm" #reg ", %0" : \
     74		"=m" (p0[i / sizeof(*p0)])); \
     75} while (0);
     76
     77		BLOCK16()
     78
     79		p0 = (unsigned long *)((uintptr_t)p0 + 512);
     80		p1 = (unsigned long *)((uintptr_t)p1 + 512);
     81		p2 = (unsigned long *)((uintptr_t)p2 + 512);
     82	}
     83
     84	kernel_fpu_end();
     85}
     86
     87static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0,
     88		      const unsigned long * __restrict p1,
     89		      const unsigned long * __restrict p2,
     90		      const unsigned long * __restrict p3)
     91{
     92	unsigned long lines = bytes >> 9;
     93
     94	kernel_fpu_begin();
     95
     96	while (lines--) {
     97#undef BLOCK
     98#define BLOCK(i, reg) \
     99do { \
    100	asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
    101	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
    102		"m" (p2[i / sizeof(*p2)])); \
    103	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
    104		"m" (p1[i / sizeof(*p1)])); \
    105	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
    106		"m" (p0[i / sizeof(*p0)])); \
    107	asm volatile("vmovdqa %%ymm" #reg ", %0" : \
    108		"=m" (p0[i / sizeof(*p0)])); \
    109} while (0);
    110
    111		BLOCK16();
    112
    113		p0 = (unsigned long *)((uintptr_t)p0 + 512);
    114		p1 = (unsigned long *)((uintptr_t)p1 + 512);
    115		p2 = (unsigned long *)((uintptr_t)p2 + 512);
    116		p3 = (unsigned long *)((uintptr_t)p3 + 512);
    117	}
    118
    119	kernel_fpu_end();
    120}
    121
    122static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0,
    123	     const unsigned long * __restrict p1,
    124	     const unsigned long * __restrict p2,
    125	     const unsigned long * __restrict p3,
    126	     const unsigned long * __restrict p4)
    127{
    128	unsigned long lines = bytes >> 9;
    129
    130	kernel_fpu_begin();
    131
    132	while (lines--) {
    133#undef BLOCK
    134#define BLOCK(i, reg) \
    135do { \
    136	asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
    137	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
    138		"m" (p3[i / sizeof(*p3)])); \
    139	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
    140		"m" (p2[i / sizeof(*p2)])); \
    141	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
    142		"m" (p1[i / sizeof(*p1)])); \
    143	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
    144		"m" (p0[i / sizeof(*p0)])); \
    145	asm volatile("vmovdqa %%ymm" #reg ", %0" : \
    146		"=m" (p0[i / sizeof(*p0)])); \
    147} while (0);
    148
    149		BLOCK16()
    150
    151		p0 = (unsigned long *)((uintptr_t)p0 + 512);
    152		p1 = (unsigned long *)((uintptr_t)p1 + 512);
    153		p2 = (unsigned long *)((uintptr_t)p2 + 512);
    154		p3 = (unsigned long *)((uintptr_t)p3 + 512);
    155		p4 = (unsigned long *)((uintptr_t)p4 + 512);
    156	}
    157
    158	kernel_fpu_end();
    159}
    160
    161static struct xor_block_template xor_block_avx = {
    162	.name = "avx",
    163	.do_2 = xor_avx_2,
    164	.do_3 = xor_avx_3,
    165	.do_4 = xor_avx_4,
    166	.do_5 = xor_avx_5,
    167};
    168
    169#define AVX_XOR_SPEED \
    170do { \
    171	if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
    172		xor_speed(&xor_block_avx); \
    173} while (0)
    174
    175#define AVX_SELECT(FASTEST) \
    176	(boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
    177
    178#endif