cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

recov_avx2.c (8970B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2012 Intel Corporation
      4 * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
      5 */
      6
      7#include <linux/raid/pq.h>
      8#include "x86.h"
      9
     10static int raid6_has_avx2(void)
     11{
     12	return boot_cpu_has(X86_FEATURE_AVX2) &&
     13		boot_cpu_has(X86_FEATURE_AVX);
     14}
     15
     16static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila,
     17		int failb, void **ptrs)
     18{
     19	u8 *p, *q, *dp, *dq;
     20	const u8 *pbmul;	/* P multiplier table for B data */
     21	const u8 *qmul;		/* Q multiplier table (for both) */
     22	const u8 x0f = 0x0f;
     23
     24	p = (u8 *)ptrs[disks-2];
     25	q = (u8 *)ptrs[disks-1];
     26
     27	/* Compute syndrome with zero for the missing data pages
     28	   Use the dead data pages as temporary storage for
     29	   delta p and delta q */
     30	dp = (u8 *)ptrs[faila];
     31	ptrs[faila] = (void *)raid6_empty_zero_page;
     32	ptrs[disks-2] = dp;
     33	dq = (u8 *)ptrs[failb];
     34	ptrs[failb] = (void *)raid6_empty_zero_page;
     35	ptrs[disks-1] = dq;
     36
     37	raid6_call.gen_syndrome(disks, bytes, ptrs);
     38
     39	/* Restore pointer table */
     40	ptrs[faila]   = dp;
     41	ptrs[failb]   = dq;
     42	ptrs[disks-2] = p;
     43	ptrs[disks-1] = q;
     44
     45	/* Now, pick the proper data tables */
     46	pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
     47	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
     48		raid6_gfexp[failb]]];
     49
     50	kernel_fpu_begin();
     51
     52	/* ymm0 = x0f[16] */
     53	asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
     54
     55	while (bytes) {
     56#ifdef CONFIG_X86_64
     57		asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0]));
     58		asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32]));
     59		asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0]));
     60		asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32]));
     61		asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0]));
     62		asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32]));
     63		asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0]));
     64		asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32]));
     65
     66		/*
     67		 * 1 = dq[0]  ^ q[0]
     68		 * 9 = dq[32] ^ q[32]
     69		 * 0 = dp[0]  ^ p[0]
     70		 * 8 = dp[32] ^ p[32]
     71		 */
     72
     73		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
     74		asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
     75
     76		asm volatile("vpsraw $4, %ymm1, %ymm3");
     77		asm volatile("vpsraw $4, %ymm9, %ymm12");
     78		asm volatile("vpand %ymm7, %ymm1, %ymm1");
     79		asm volatile("vpand %ymm7, %ymm9, %ymm9");
     80		asm volatile("vpand %ymm7, %ymm3, %ymm3");
     81		asm volatile("vpand %ymm7, %ymm12, %ymm12");
     82		asm volatile("vpshufb %ymm9, %ymm4, %ymm14");
     83		asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
     84		asm volatile("vpshufb %ymm12, %ymm5, %ymm15");
     85		asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
     86		asm volatile("vpxor %ymm14, %ymm15, %ymm15");
     87		asm volatile("vpxor %ymm4, %ymm5, %ymm5");
     88
     89		/*
     90		 * 5 = qx[0]
     91		 * 15 = qx[32]
     92		 */
     93
     94		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
     95		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
     96		asm volatile("vpsraw $4, %ymm0, %ymm2");
     97		asm volatile("vpsraw $4, %ymm8, %ymm6");
     98		asm volatile("vpand %ymm7, %ymm0, %ymm3");
     99		asm volatile("vpand %ymm7, %ymm8, %ymm14");
    100		asm volatile("vpand %ymm7, %ymm2, %ymm2");
    101		asm volatile("vpand %ymm7, %ymm6, %ymm6");
    102		asm volatile("vpshufb %ymm14, %ymm4, %ymm12");
    103		asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
    104		asm volatile("vpshufb %ymm6, %ymm1, %ymm13");
    105		asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
    106		asm volatile("vpxor %ymm4, %ymm1, %ymm1");
    107		asm volatile("vpxor %ymm12, %ymm13, %ymm13");
    108
    109		/*
    110		 * 1  = pbmul[px[0]]
    111		 * 13 = pbmul[px[32]]
    112		 */
    113		asm volatile("vpxor %ymm5, %ymm1, %ymm1");
    114		asm volatile("vpxor %ymm15, %ymm13, %ymm13");
    115
    116		/*
    117		 * 1 = db = DQ
    118		 * 13 = db[32] = DQ[32]
    119		 */
    120		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
    121		asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32]));
    122		asm volatile("vpxor %ymm1, %ymm0, %ymm0");
    123		asm volatile("vpxor %ymm13, %ymm8, %ymm8");
    124
    125		asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
    126		asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32]));
    127
    128		bytes -= 64;
    129		p += 64;
    130		q += 64;
    131		dp += 64;
    132		dq += 64;
    133#else
    134		asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q));
    135		asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p));
    136		asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq));
    137		asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp));
    138
    139		/* 1 = dq ^ q;  0 = dp ^ p */
    140
    141		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
    142		asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
    143
    144		/*
    145		 * 1 = dq ^ q
    146		 * 3 = dq ^ p >> 4
    147		 */
    148		asm volatile("vpsraw $4, %ymm1, %ymm3");
    149		asm volatile("vpand %ymm7, %ymm1, %ymm1");
    150		asm volatile("vpand %ymm7, %ymm3, %ymm3");
    151		asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
    152		asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
    153		asm volatile("vpxor %ymm4, %ymm5, %ymm5");
    154
    155		/* 5 = qx */
    156
    157		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
    158		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
    159
    160		asm volatile("vpsraw $4, %ymm0, %ymm2");
    161		asm volatile("vpand %ymm7, %ymm0, %ymm3");
    162		asm volatile("vpand %ymm7, %ymm2, %ymm2");
    163		asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
    164		asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
    165		asm volatile("vpxor %ymm4, %ymm1, %ymm1");
    166
    167		/* 1 = pbmul[px] */
    168		asm volatile("vpxor %ymm5, %ymm1, %ymm1");
    169		/* 1 = db = DQ */
    170		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
    171
    172		asm volatile("vpxor %ymm1, %ymm0, %ymm0");
    173		asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
    174
    175		bytes -= 32;
    176		p += 32;
    177		q += 32;
    178		dp += 32;
    179		dq += 32;
    180#endif
    181	}
    182
    183	kernel_fpu_end();
    184}
    185
    186static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila,
    187		void **ptrs)
    188{
    189	u8 *p, *q, *dq;
    190	const u8 *qmul;		/* Q multiplier table */
    191	const u8 x0f = 0x0f;
    192
    193	p = (u8 *)ptrs[disks-2];
    194	q = (u8 *)ptrs[disks-1];
    195
    196	/* Compute syndrome with zero for the missing data page
    197	   Use the dead data page as temporary storage for delta q */
    198	dq = (u8 *)ptrs[faila];
    199	ptrs[faila] = (void *)raid6_empty_zero_page;
    200	ptrs[disks-1] = dq;
    201
    202	raid6_call.gen_syndrome(disks, bytes, ptrs);
    203
    204	/* Restore pointer table */
    205	ptrs[faila]   = dq;
    206	ptrs[disks-1] = q;
    207
    208	/* Now, pick the proper data tables */
    209	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
    210
    211	kernel_fpu_begin();
    212
    213	asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
    214
    215	while (bytes) {
    216#ifdef CONFIG_X86_64
    217		asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
    218		asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32]));
    219		asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
    220		asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32]));
    221
    222		/*
    223		 * 3 = q[0] ^ dq[0]
    224		 * 8 = q[32] ^ dq[32]
    225		 */
    226		asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
    227		asm volatile("vmovapd %ymm0, %ymm13");
    228		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
    229		asm volatile("vmovapd %ymm1, %ymm14");
    230
    231		asm volatile("vpsraw $4, %ymm3, %ymm6");
    232		asm volatile("vpsraw $4, %ymm8, %ymm12");
    233		asm volatile("vpand %ymm7, %ymm3, %ymm3");
    234		asm volatile("vpand %ymm7, %ymm8, %ymm8");
    235		asm volatile("vpand %ymm7, %ymm6, %ymm6");
    236		asm volatile("vpand %ymm7, %ymm12, %ymm12");
    237		asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
    238		asm volatile("vpshufb %ymm8, %ymm13, %ymm13");
    239		asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
    240		asm volatile("vpshufb %ymm12, %ymm14, %ymm14");
    241		asm volatile("vpxor %ymm0, %ymm1, %ymm1");
    242		asm volatile("vpxor %ymm13, %ymm14, %ymm14");
    243
    244		/*
    245		 * 1  = qmul[q[0]  ^ dq[0]]
    246		 * 14 = qmul[q[32] ^ dq[32]]
    247		 */
    248		asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
    249		asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32]));
    250		asm volatile("vpxor %ymm1, %ymm2, %ymm2");
    251		asm volatile("vpxor %ymm14, %ymm12, %ymm12");
    252
    253		/*
    254		 * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
    255		 * 12 = p[32] ^ qmul[q[32] ^ dq[32]]
    256		 */
    257
    258		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
    259		asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32]));
    260		asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
    261		asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32]));
    262
    263		bytes -= 64;
    264		p += 64;
    265		q += 64;
    266		dq += 64;
    267#else
    268		asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
    269		asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
    270
    271		/* 3 = q ^ dq */
    272
    273		asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
    274		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
    275
    276		asm volatile("vpsraw $4, %ymm3, %ymm6");
    277		asm volatile("vpand %ymm7, %ymm3, %ymm3");
    278		asm volatile("vpand %ymm7, %ymm6, %ymm6");
    279		asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
    280		asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
    281		asm volatile("vpxor %ymm0, %ymm1, %ymm1");
    282
    283		/* 1 = qmul[q ^ dq] */
    284
    285		asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
    286		asm volatile("vpxor %ymm1, %ymm2, %ymm2");
    287
    288		/* 2 = p ^ qmul[q ^ dq] */
    289
    290		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
    291		asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
    292
    293		bytes -= 32;
    294		p += 32;
    295		q += 32;
    296		dq += 32;
    297#endif
    298	}
    299
    300	kernel_fpu_end();
    301}
    302
    303const struct raid6_recov_calls raid6_recov_avx2 = {
    304	.data2 = raid6_2data_recov_avx2,
    305	.datap = raid6_datap_recov_avx2,
    306	.valid = raid6_has_avx2,
    307#ifdef CONFIG_X86_64
    308	.name = "avx2x2",
    309#else
    310	.name = "avx2x1",
    311#endif
    312	.priority = 2,
    313};