cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

recov_avx512.c (9583B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2016 Intel Corporation
      4 *
      5 * Author: Gayatri Kammela <gayatri.kammela@intel.com>
      6 * Author: Megha Dey <megha.dey@linux.intel.com>
      7 */
      8
      9#ifdef CONFIG_AS_AVX512
     10
     11#include <linux/raid/pq.h>
     12#include "x86.h"
     13
     14static int raid6_has_avx512(void)
     15{
     16	return boot_cpu_has(X86_FEATURE_AVX2) &&
     17		boot_cpu_has(X86_FEATURE_AVX) &&
     18		boot_cpu_has(X86_FEATURE_AVX512F) &&
     19		boot_cpu_has(X86_FEATURE_AVX512BW) &&
     20		boot_cpu_has(X86_FEATURE_AVX512VL) &&
     21		boot_cpu_has(X86_FEATURE_AVX512DQ);
     22}
     23
     24static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
     25				     int failb, void **ptrs)
     26{
     27	u8 *p, *q, *dp, *dq;
     28	const u8 *pbmul;	/* P multiplier table for B data */
     29	const u8 *qmul;		/* Q multiplier table (for both) */
     30	const u8 x0f = 0x0f;
     31
     32	p = (u8 *)ptrs[disks-2];
     33	q = (u8 *)ptrs[disks-1];
     34
     35	/*
     36	 * Compute syndrome with zero for the missing data pages
     37	 * Use the dead data pages as temporary storage for
     38	 * delta p and delta q
     39	 */
     40
     41	dp = (u8 *)ptrs[faila];
     42	ptrs[faila] = (void *)raid6_empty_zero_page;
     43	ptrs[disks-2] = dp;
     44	dq = (u8 *)ptrs[failb];
     45	ptrs[failb] = (void *)raid6_empty_zero_page;
     46	ptrs[disks-1] = dq;
     47
     48	raid6_call.gen_syndrome(disks, bytes, ptrs);
     49
     50	/* Restore pointer table */
     51	ptrs[faila]   = dp;
     52	ptrs[failb]   = dq;
     53	ptrs[disks-2] = p;
     54	ptrs[disks-1] = q;
     55
     56	/* Now, pick the proper data tables */
     57	pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
     58	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
     59		raid6_gfexp[failb]]];
     60
     61	kernel_fpu_begin();
     62
     63	/* zmm0 = x0f[16] */
     64	asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
     65
     66	while (bytes) {
     67#ifdef CONFIG_X86_64
     68		asm volatile("vmovdqa64 %0, %%zmm1\n\t"
     69			     "vmovdqa64 %1, %%zmm9\n\t"
     70			     "vmovdqa64 %2, %%zmm0\n\t"
     71			     "vmovdqa64 %3, %%zmm8\n\t"
     72			     "vpxorq %4, %%zmm1, %%zmm1\n\t"
     73			     "vpxorq %5, %%zmm9, %%zmm9\n\t"
     74			     "vpxorq %6, %%zmm0, %%zmm0\n\t"
     75			     "vpxorq %7, %%zmm8, %%zmm8"
     76			     :
     77			     : "m" (q[0]), "m" (q[64]), "m" (p[0]),
     78			       "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
     79			       "m" (dp[0]), "m" (dp[64]));
     80
     81		/*
     82		 * 1 = dq[0]  ^ q[0]
     83		 * 9 = dq[64] ^ q[64]
     84		 * 0 = dp[0]  ^ p[0]
     85		 * 8 = dp[64] ^ p[64]
     86		 */
     87
     88		asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
     89			     "vbroadcasti64x2 %1, %%zmm5"
     90			     :
     91			     : "m" (qmul[0]), "m" (qmul[16]));
     92
     93		asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
     94			     "vpsraw $4, %%zmm9, %%zmm12\n\t"
     95			     "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
     96			     "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
     97			     "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
     98			     "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
     99			     "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
    100			     "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
    101			     "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
    102			     "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
    103			     "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
    104			     "vpxorq %%zmm4, %%zmm5, %%zmm5"
    105			     :
    106			     : );
    107
    108		/*
    109		 * 5 = qx[0]
    110		 * 15 = qx[64]
    111		 */
    112
    113		asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
    114			     "vbroadcasti64x2 %1, %%zmm1\n\t"
    115			     "vpsraw $4, %%zmm0, %%zmm2\n\t"
    116			     "vpsraw $4, %%zmm8, %%zmm6\n\t"
    117			     "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
    118			     "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
    119			     "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
    120			     "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
    121			     "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
    122			     "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
    123			     "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
    124			     "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
    125			     "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
    126			     "vpxorq %%zmm12, %%zmm13, %%zmm13"
    127			     :
    128			     : "m" (pbmul[0]), "m" (pbmul[16]));
    129
    130		/*
    131		 * 1  = pbmul[px[0]]
    132		 * 13 = pbmul[px[64]]
    133		 */
    134		asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
    135			     "vpxorq %%zmm15, %%zmm13, %%zmm13"
    136			     :
    137			     : );
    138
    139		/*
    140		 * 1 = db = DQ
    141		 * 13 = db[64] = DQ[64]
    142		 */
    143		asm volatile("vmovdqa64 %%zmm1, %0\n\t"
    144			     "vmovdqa64 %%zmm13,%1\n\t"
    145			     "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
    146			     "vpxorq %%zmm13, %%zmm8, %%zmm8"
    147			     :
    148			     : "m" (dq[0]), "m" (dq[64]));
    149
    150		asm volatile("vmovdqa64 %%zmm0, %0\n\t"
    151			     "vmovdqa64 %%zmm8, %1"
    152			     :
    153			     : "m" (dp[0]), "m" (dp[64]));
    154
    155		bytes -= 128;
    156		p += 128;
    157		q += 128;
    158		dp += 128;
    159		dq += 128;
    160#else
    161		asm volatile("vmovdqa64 %0, %%zmm1\n\t"
    162			     "vmovdqa64 %1, %%zmm0\n\t"
    163			     "vpxorq %2, %%zmm1, %%zmm1\n\t"
    164			     "vpxorq %3, %%zmm0, %%zmm0"
    165			     :
    166			     : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
    167
    168		/* 1 = dq ^ q;  0 = dp ^ p */
    169
    170		asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
    171			     "vbroadcasti64x2 %1, %%zmm5"
    172			     :
    173			     : "m" (qmul[0]), "m" (qmul[16]));
    174
    175		/*
    176		 * 1 = dq ^ q
    177		 * 3 = dq ^ p >> 4
    178		 */
    179		asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
    180			     "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
    181			     "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
    182			     "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
    183			     "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
    184			     "vpxorq %%zmm4, %%zmm5, %%zmm5"
    185			     :
    186			     : );
    187
    188		/* 5 = qx */
    189
    190		asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
    191			     "vbroadcasti64x2 %1, %%zmm1"
    192			     :
    193			     : "m" (pbmul[0]), "m" (pbmul[16]));
    194
    195		asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
    196			     "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
    197			     "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
    198			     "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
    199			     "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
    200			     "vpxorq %%zmm4, %%zmm1, %%zmm1"
    201			     :
    202			     : );
    203
    204		/* 1 = pbmul[px] */
    205		asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
    206			     /* 1 = db = DQ */
    207			     "vmovdqa64 %%zmm1, %0\n\t"
    208			     :
    209			     : "m" (dq[0]));
    210
    211		asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
    212			     "vmovdqa64 %%zmm0, %0"
    213			     :
    214			     : "m" (dp[0]));
    215
    216		bytes -= 64;
    217		p += 64;
    218		q += 64;
    219		dp += 64;
    220		dq += 64;
    221#endif
    222	}
    223
    224	kernel_fpu_end();
    225}
    226
    227static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
    228				     void **ptrs)
    229{
    230	u8 *p, *q, *dq;
    231	const u8 *qmul;		/* Q multiplier table */
    232	const u8 x0f = 0x0f;
    233
    234	p = (u8 *)ptrs[disks-2];
    235	q = (u8 *)ptrs[disks-1];
    236
    237	/*
    238	 * Compute syndrome with zero for the missing data page
    239	 * Use the dead data page as temporary storage for delta q
    240	 */
    241
    242	dq = (u8 *)ptrs[faila];
    243	ptrs[faila] = (void *)raid6_empty_zero_page;
    244	ptrs[disks-1] = dq;
    245
    246	raid6_call.gen_syndrome(disks, bytes, ptrs);
    247
    248	/* Restore pointer table */
    249	ptrs[faila]   = dq;
    250	ptrs[disks-1] = q;
    251
    252	/* Now, pick the proper data tables */
    253	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
    254
    255	kernel_fpu_begin();
    256
    257	asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
    258
    259	while (bytes) {
    260#ifdef CONFIG_X86_64
    261		asm volatile("vmovdqa64 %0, %%zmm3\n\t"
    262			     "vmovdqa64 %1, %%zmm8\n\t"
    263			     "vpxorq %2, %%zmm3, %%zmm3\n\t"
    264			     "vpxorq %3, %%zmm8, %%zmm8"
    265			     :
    266			     : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
    267			       "m" (q[64]));
    268
    269		/*
    270		 * 3 = q[0] ^ dq[0]
    271		 * 8 = q[64] ^ dq[64]
    272		 */
    273		asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
    274			     "vmovapd %%zmm0, %%zmm13\n\t"
    275			     "vbroadcasti64x2 %1, %%zmm1\n\t"
    276			     "vmovapd %%zmm1, %%zmm14"
    277			     :
    278			     : "m" (qmul[0]), "m" (qmul[16]));
    279
    280		asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
    281			     "vpsraw $4, %%zmm8, %%zmm12\n\t"
    282			     "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
    283			     "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
    284			     "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
    285			     "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
    286			     "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
    287			     "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
    288			     "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
    289			     "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
    290			     "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
    291			     "vpxorq %%zmm13, %%zmm14, %%zmm14"
    292			     :
    293			     : );
    294
    295		/*
    296		 * 1  = qmul[q[0]  ^ dq[0]]
    297		 * 14 = qmul[q[64] ^ dq[64]]
    298		 */
    299		asm volatile("vmovdqa64 %0, %%zmm2\n\t"
    300			     "vmovdqa64 %1, %%zmm12\n\t"
    301			     "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
    302			     "vpxorq %%zmm14, %%zmm12, %%zmm12"
    303			     :
    304			     : "m" (p[0]), "m" (p[64]));
    305
    306		/*
    307		 * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
    308		 * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
    309		 */
    310
    311		asm volatile("vmovdqa64 %%zmm1, %0\n\t"
    312			     "vmovdqa64 %%zmm14, %1\n\t"
    313			     "vmovdqa64 %%zmm2, %2\n\t"
    314			     "vmovdqa64 %%zmm12,%3"
    315			     :
    316			     : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
    317			       "m" (p[64]));
    318
    319		bytes -= 128;
    320		p += 128;
    321		q += 128;
    322		dq += 128;
    323#else
    324		asm volatile("vmovdqa64 %0, %%zmm3\n\t"
    325			     "vpxorq %1, %%zmm3, %%zmm3"
    326			     :
    327			     : "m" (dq[0]), "m" (q[0]));
    328
    329		/* 3 = q ^ dq */
    330
    331		asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
    332			     "vbroadcasti64x2 %1, %%zmm1"
    333			     :
    334			     : "m" (qmul[0]), "m" (qmul[16]));
    335
    336		asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
    337			     "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
    338			     "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
    339			     "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
    340			     "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
    341			     "vpxorq %%zmm0, %%zmm1, %%zmm1"
    342			     :
    343			     : );
    344
    345		/* 1 = qmul[q ^ dq] */
    346
    347		asm volatile("vmovdqa64 %0, %%zmm2\n\t"
    348			     "vpxorq %%zmm1, %%zmm2, %%zmm2"
    349			     :
    350			     : "m" (p[0]));
    351
    352		/* 2 = p ^ qmul[q ^ dq] */
    353
    354		asm volatile("vmovdqa64 %%zmm1, %0\n\t"
    355			     "vmovdqa64 %%zmm2, %1"
    356			     :
    357			     : "m" (dq[0]), "m" (p[0]));
    358
    359		bytes -= 64;
    360		p += 64;
    361		q += 64;
    362		dq += 64;
    363#endif
    364	}
    365
    366	kernel_fpu_end();
    367}
    368
    369const struct raid6_recov_calls raid6_recov_avx512 = {
    370	.data2 = raid6_2data_recov_avx512,
    371	.datap = raid6_datap_recov_avx512,
    372	.valid = raid6_has_avx512,
    373#ifdef CONFIG_X86_64
    374	.name = "avx512x2",
    375#else
    376	.name = "avx512x1",
    377#endif
    378	.priority = 3,
    379};
    380
    381#else
    382#warning "your version of binutils lacks AVX512 support"
    383#endif