cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sse2.c (16187B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/* -*- linux-c -*- ------------------------------------------------------- *
      3 *
      4 *   Copyright 2002 H. Peter Anvin - All Rights Reserved
      5 *
      6 * ----------------------------------------------------------------------- */
      7
      8/*
      9 * raid6/sse2.c
     10 *
     11 * SSE-2 implementation of RAID-6 syndrome functions
     12 *
     13 */
     14
     15#include <linux/raid/pq.h>
     16#include "x86.h"
     17
     18static const struct raid6_sse_constants {
     19	u64 x1d[2];
     20} raid6_sse_constants  __attribute__((aligned(16))) = {
     21	{ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
     22};
     23
     24static int raid6_have_sse2(void)
     25{
     26	/* Not really boot_cpu but "all_cpus" */
     27	return boot_cpu_has(X86_FEATURE_MMX) &&
     28		boot_cpu_has(X86_FEATURE_FXSR) &&
     29		boot_cpu_has(X86_FEATURE_XMM) &&
     30		boot_cpu_has(X86_FEATURE_XMM2);
     31}
     32
     33/*
     34 * Plain SSE2 implementation
     35 */
     36static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
     37{
     38	u8 **dptr = (u8 **)ptrs;
     39	u8 *p, *q;
     40	int d, z, z0;
     41
     42	z0 = disks - 3;		/* Highest data disk */
     43	p = dptr[z0+1];		/* XOR parity */
     44	q = dptr[z0+2];		/* RS syndrome */
     45
     46	kernel_fpu_begin();
     47
     48	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
     49	asm volatile("pxor %xmm5,%xmm5");	/* Zero temp */
     50
     51	for ( d = 0 ; d < bytes ; d += 16 ) {
     52		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
     53		asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
     54		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
     55		asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
     56		asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
     57		for ( z = z0-2 ; z >= 0 ; z-- ) {
     58			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
     59			asm volatile("pcmpgtb %xmm4,%xmm5");
     60			asm volatile("paddb %xmm4,%xmm4");
     61			asm volatile("pand %xmm0,%xmm5");
     62			asm volatile("pxor %xmm5,%xmm4");
     63			asm volatile("pxor %xmm5,%xmm5");
     64			asm volatile("pxor %xmm6,%xmm2");
     65			asm volatile("pxor %xmm6,%xmm4");
     66			asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
     67		}
     68		asm volatile("pcmpgtb %xmm4,%xmm5");
     69		asm volatile("paddb %xmm4,%xmm4");
     70		asm volatile("pand %xmm0,%xmm5");
     71		asm volatile("pxor %xmm5,%xmm4");
     72		asm volatile("pxor %xmm5,%xmm5");
     73		asm volatile("pxor %xmm6,%xmm2");
     74		asm volatile("pxor %xmm6,%xmm4");
     75
     76		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
     77		asm volatile("pxor %xmm2,%xmm2");
     78		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
     79		asm volatile("pxor %xmm4,%xmm4");
     80	}
     81
     82	asm volatile("sfence" : : : "memory");
     83	kernel_fpu_end();
     84}
     85
     86
     87static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
     88				     size_t bytes, void **ptrs)
     89{
     90	u8 **dptr = (u8 **)ptrs;
     91	u8 *p, *q;
     92	int d, z, z0;
     93
     94	z0 = stop;		/* P/Q right side optimization */
     95	p = dptr[disks-2];	/* XOR parity */
     96	q = dptr[disks-1];	/* RS syndrome */
     97
     98	kernel_fpu_begin();
     99
    100	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
    101
    102	for ( d = 0 ; d < bytes ; d += 16 ) {
    103		asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
    104		asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
    105		asm volatile("pxor %xmm4,%xmm2");
    106		/* P/Q data pages */
    107		for ( z = z0-1 ; z >= start ; z-- ) {
    108			asm volatile("pxor %xmm5,%xmm5");
    109			asm volatile("pcmpgtb %xmm4,%xmm5");
    110			asm volatile("paddb %xmm4,%xmm4");
    111			asm volatile("pand %xmm0,%xmm5");
    112			asm volatile("pxor %xmm5,%xmm4");
    113			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
    114			asm volatile("pxor %xmm5,%xmm2");
    115			asm volatile("pxor %xmm5,%xmm4");
    116		}
    117		/* P/Q left side optimization */
    118		for ( z = start-1 ; z >= 0 ; z-- ) {
    119			asm volatile("pxor %xmm5,%xmm5");
    120			asm volatile("pcmpgtb %xmm4,%xmm5");
    121			asm volatile("paddb %xmm4,%xmm4");
    122			asm volatile("pand %xmm0,%xmm5");
    123			asm volatile("pxor %xmm5,%xmm4");
    124		}
    125		asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
    126		/* Don't use movntdq for r/w memory area < cache line */
    127		asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
    128		asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
    129	}
    130
    131	asm volatile("sfence" : : : "memory");
    132	kernel_fpu_end();
    133}
    134
    135const struct raid6_calls raid6_sse2x1 = {
    136	raid6_sse21_gen_syndrome,
    137	raid6_sse21_xor_syndrome,
    138	raid6_have_sse2,
    139	"sse2x1",
    140	1			/* Has cache hints */
    141};
    142
    143/*
    144 * Unrolled-by-2 SSE2 implementation
    145 */
    146static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
    147{
    148	u8 **dptr = (u8 **)ptrs;
    149	u8 *p, *q;
    150	int d, z, z0;
    151
    152	z0 = disks - 3;		/* Highest data disk */
    153	p = dptr[z0+1];		/* XOR parity */
    154	q = dptr[z0+2];		/* RS syndrome */
    155
    156	kernel_fpu_begin();
    157
    158	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
    159	asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
    160	asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
    161
    162	/* We uniformly assume a single prefetch covers at least 32 bytes */
    163	for ( d = 0 ; d < bytes ; d += 32 ) {
    164		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
    165		asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d]));    /* P[0] */
    166		asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
    167		asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
    168		asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
    169		for ( z = z0-1 ; z >= 0 ; z-- ) {
    170			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
    171			asm volatile("pcmpgtb %xmm4,%xmm5");
    172			asm volatile("pcmpgtb %xmm6,%xmm7");
    173			asm volatile("paddb %xmm4,%xmm4");
    174			asm volatile("paddb %xmm6,%xmm6");
    175			asm volatile("pand %xmm0,%xmm5");
    176			asm volatile("pand %xmm0,%xmm7");
    177			asm volatile("pxor %xmm5,%xmm4");
    178			asm volatile("pxor %xmm7,%xmm6");
    179			asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
    180			asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
    181			asm volatile("pxor %xmm5,%xmm2");
    182			asm volatile("pxor %xmm7,%xmm3");
    183			asm volatile("pxor %xmm5,%xmm4");
    184			asm volatile("pxor %xmm7,%xmm6");
    185			asm volatile("pxor %xmm5,%xmm5");
    186			asm volatile("pxor %xmm7,%xmm7");
    187		}
    188		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
    189		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
    190		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
    191		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
    192	}
    193
    194	asm volatile("sfence" : : : "memory");
    195	kernel_fpu_end();
    196}
    197
    198static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
    199				     size_t bytes, void **ptrs)
    200{
    201	u8 **dptr = (u8 **)ptrs;
    202	u8 *p, *q;
    203	int d, z, z0;
    204
    205	z0 = stop;		/* P/Q right side optimization */
    206	p = dptr[disks-2];	/* XOR parity */
    207	q = dptr[disks-1];	/* RS syndrome */
    208
    209	kernel_fpu_begin();
    210
    211	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
    212
    213	for ( d = 0 ; d < bytes ; d += 32 ) {
    214		asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
    215		asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
    216		asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
    217		asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
    218		asm volatile("pxor %xmm4,%xmm2");
    219		asm volatile("pxor %xmm6,%xmm3");
    220		/* P/Q data pages */
    221		for ( z = z0-1 ; z >= start ; z-- ) {
    222			asm volatile("pxor %xmm5,%xmm5");
    223			asm volatile("pxor %xmm7,%xmm7");
    224			asm volatile("pcmpgtb %xmm4,%xmm5");
    225			asm volatile("pcmpgtb %xmm6,%xmm7");
    226			asm volatile("paddb %xmm4,%xmm4");
    227			asm volatile("paddb %xmm6,%xmm6");
    228			asm volatile("pand %xmm0,%xmm5");
    229			asm volatile("pand %xmm0,%xmm7");
    230			asm volatile("pxor %xmm5,%xmm4");
    231			asm volatile("pxor %xmm7,%xmm6");
    232			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
    233			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
    234			asm volatile("pxor %xmm5,%xmm2");
    235			asm volatile("pxor %xmm7,%xmm3");
    236			asm volatile("pxor %xmm5,%xmm4");
    237			asm volatile("pxor %xmm7,%xmm6");
    238		}
    239		/* P/Q left side optimization */
    240		for ( z = start-1 ; z >= 0 ; z-- ) {
    241			asm volatile("pxor %xmm5,%xmm5");
    242			asm volatile("pxor %xmm7,%xmm7");
    243			asm volatile("pcmpgtb %xmm4,%xmm5");
    244			asm volatile("pcmpgtb %xmm6,%xmm7");
    245			asm volatile("paddb %xmm4,%xmm4");
    246			asm volatile("paddb %xmm6,%xmm6");
    247			asm volatile("pand %xmm0,%xmm5");
    248			asm volatile("pand %xmm0,%xmm7");
    249			asm volatile("pxor %xmm5,%xmm4");
    250			asm volatile("pxor %xmm7,%xmm6");
    251		}
    252		asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
    253		asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
    254		/* Don't use movntdq for r/w memory area < cache line */
    255		asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
    256		asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
    257		asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
    258		asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
    259	}
    260
    261	asm volatile("sfence" : : : "memory");
    262	kernel_fpu_end();
    263}
    264
    265const struct raid6_calls raid6_sse2x2 = {
    266	raid6_sse22_gen_syndrome,
    267	raid6_sse22_xor_syndrome,
    268	raid6_have_sse2,
    269	"sse2x2",
    270	1			/* Has cache hints */
    271};
    272
    273#ifdef CONFIG_X86_64
    274
    275/*
    276 * Unrolled-by-4 SSE2 implementation
    277 */
    278static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
    279{
    280	u8 **dptr = (u8 **)ptrs;
    281	u8 *p, *q;
    282	int d, z, z0;
    283
    284	z0 = disks - 3;		/* Highest data disk */
    285	p = dptr[z0+1];		/* XOR parity */
    286	q = dptr[z0+2];		/* RS syndrome */
    287
    288	kernel_fpu_begin();
    289
    290	asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
    291	asm volatile("pxor %xmm2,%xmm2");	/* P[0] */
    292	asm volatile("pxor %xmm3,%xmm3");	/* P[1] */
    293	asm volatile("pxor %xmm4,%xmm4"); 	/* Q[0] */
    294	asm volatile("pxor %xmm5,%xmm5");	/* Zero temp */
    295	asm volatile("pxor %xmm6,%xmm6"); 	/* Q[1] */
    296	asm volatile("pxor %xmm7,%xmm7"); 	/* Zero temp */
    297	asm volatile("pxor %xmm10,%xmm10");	/* P[2] */
    298	asm volatile("pxor %xmm11,%xmm11");	/* P[3] */
    299	asm volatile("pxor %xmm12,%xmm12"); 	/* Q[2] */
    300	asm volatile("pxor %xmm13,%xmm13");	/* Zero temp */
    301	asm volatile("pxor %xmm14,%xmm14"); 	/* Q[3] */
    302	asm volatile("pxor %xmm15,%xmm15"); 	/* Zero temp */
    303
    304	for ( d = 0 ; d < bytes ; d += 64 ) {
    305		for ( z = z0 ; z >= 0 ; z-- ) {
    306			/* The second prefetch seems to improve performance... */
    307			asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
    308			asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
    309			asm volatile("pcmpgtb %xmm4,%xmm5");
    310			asm volatile("pcmpgtb %xmm6,%xmm7");
    311			asm volatile("pcmpgtb %xmm12,%xmm13");
    312			asm volatile("pcmpgtb %xmm14,%xmm15");
    313			asm volatile("paddb %xmm4,%xmm4");
    314			asm volatile("paddb %xmm6,%xmm6");
    315			asm volatile("paddb %xmm12,%xmm12");
    316			asm volatile("paddb %xmm14,%xmm14");
    317			asm volatile("pand %xmm0,%xmm5");
    318			asm volatile("pand %xmm0,%xmm7");
    319			asm volatile("pand %xmm0,%xmm13");
    320			asm volatile("pand %xmm0,%xmm15");
    321			asm volatile("pxor %xmm5,%xmm4");
    322			asm volatile("pxor %xmm7,%xmm6");
    323			asm volatile("pxor %xmm13,%xmm12");
    324			asm volatile("pxor %xmm15,%xmm14");
    325			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
    326			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
    327			asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
    328			asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
    329			asm volatile("pxor %xmm5,%xmm2");
    330			asm volatile("pxor %xmm7,%xmm3");
    331			asm volatile("pxor %xmm13,%xmm10");
    332			asm volatile("pxor %xmm15,%xmm11");
    333			asm volatile("pxor %xmm5,%xmm4");
    334			asm volatile("pxor %xmm7,%xmm6");
    335			asm volatile("pxor %xmm13,%xmm12");
    336			asm volatile("pxor %xmm15,%xmm14");
    337			asm volatile("pxor %xmm5,%xmm5");
    338			asm volatile("pxor %xmm7,%xmm7");
    339			asm volatile("pxor %xmm13,%xmm13");
    340			asm volatile("pxor %xmm15,%xmm15");
    341		}
    342		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
    343		asm volatile("pxor %xmm2,%xmm2");
    344		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
    345		asm volatile("pxor %xmm3,%xmm3");
    346		asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
    347		asm volatile("pxor %xmm10,%xmm10");
    348		asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
    349		asm volatile("pxor %xmm11,%xmm11");
    350		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
    351		asm volatile("pxor %xmm4,%xmm4");
    352		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
    353		asm volatile("pxor %xmm6,%xmm6");
    354		asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
    355		asm volatile("pxor %xmm12,%xmm12");
    356		asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
    357		asm volatile("pxor %xmm14,%xmm14");
    358	}
    359
    360	asm volatile("sfence" : : : "memory");
    361	kernel_fpu_end();
    362}
    363
    364static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
    365				     size_t bytes, void **ptrs)
    366{
    367	u8 **dptr = (u8 **)ptrs;
    368	u8 *p, *q;
    369	int d, z, z0;
    370
    371	z0 = stop;		/* P/Q right side optimization */
    372	p = dptr[disks-2];	/* XOR parity */
    373	q = dptr[disks-1];	/* RS syndrome */
    374
    375	kernel_fpu_begin();
    376
    377	asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
    378
    379	for ( d = 0 ; d < bytes ; d += 64 ) {
    380		asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
    381		asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
    382		asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
    383		asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
    384		asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
    385		asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
    386		asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
    387		asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
    388		asm volatile("pxor %xmm4,%xmm2");
    389		asm volatile("pxor %xmm6,%xmm3");
    390		asm volatile("pxor %xmm12,%xmm10");
    391		asm volatile("pxor %xmm14,%xmm11");
    392		/* P/Q data pages */
    393		for ( z = z0-1 ; z >= start ; z-- ) {
    394			asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
    395			asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
    396			asm volatile("pxor %xmm5,%xmm5");
    397			asm volatile("pxor %xmm7,%xmm7");
    398			asm volatile("pxor %xmm13,%xmm13");
    399			asm volatile("pxor %xmm15,%xmm15");
    400			asm volatile("pcmpgtb %xmm4,%xmm5");
    401			asm volatile("pcmpgtb %xmm6,%xmm7");
    402			asm volatile("pcmpgtb %xmm12,%xmm13");
    403			asm volatile("pcmpgtb %xmm14,%xmm15");
    404			asm volatile("paddb %xmm4,%xmm4");
    405			asm volatile("paddb %xmm6,%xmm6");
    406			asm volatile("paddb %xmm12,%xmm12");
    407			asm volatile("paddb %xmm14,%xmm14");
    408			asm volatile("pand %xmm0,%xmm5");
    409			asm volatile("pand %xmm0,%xmm7");
    410			asm volatile("pand %xmm0,%xmm13");
    411			asm volatile("pand %xmm0,%xmm15");
    412			asm volatile("pxor %xmm5,%xmm4");
    413			asm volatile("pxor %xmm7,%xmm6");
    414			asm volatile("pxor %xmm13,%xmm12");
    415			asm volatile("pxor %xmm15,%xmm14");
    416			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
    417			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
    418			asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
    419			asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
    420			asm volatile("pxor %xmm5,%xmm2");
    421			asm volatile("pxor %xmm7,%xmm3");
    422			asm volatile("pxor %xmm13,%xmm10");
    423			asm volatile("pxor %xmm15,%xmm11");
    424			asm volatile("pxor %xmm5,%xmm4");
    425			asm volatile("pxor %xmm7,%xmm6");
    426			asm volatile("pxor %xmm13,%xmm12");
    427			asm volatile("pxor %xmm15,%xmm14");
    428		}
    429		asm volatile("prefetchnta %0" :: "m" (q[d]));
    430		asm volatile("prefetchnta %0" :: "m" (q[d+32]));
    431		/* P/Q left side optimization */
    432		for ( z = start-1 ; z >= 0 ; z-- ) {
    433			asm volatile("pxor %xmm5,%xmm5");
    434			asm volatile("pxor %xmm7,%xmm7");
    435			asm volatile("pxor %xmm13,%xmm13");
    436			asm volatile("pxor %xmm15,%xmm15");
    437			asm volatile("pcmpgtb %xmm4,%xmm5");
    438			asm volatile("pcmpgtb %xmm6,%xmm7");
    439			asm volatile("pcmpgtb %xmm12,%xmm13");
    440			asm volatile("pcmpgtb %xmm14,%xmm15");
    441			asm volatile("paddb %xmm4,%xmm4");
    442			asm volatile("paddb %xmm6,%xmm6");
    443			asm volatile("paddb %xmm12,%xmm12");
    444			asm volatile("paddb %xmm14,%xmm14");
    445			asm volatile("pand %xmm0,%xmm5");
    446			asm volatile("pand %xmm0,%xmm7");
    447			asm volatile("pand %xmm0,%xmm13");
    448			asm volatile("pand %xmm0,%xmm15");
    449			asm volatile("pxor %xmm5,%xmm4");
    450			asm volatile("pxor %xmm7,%xmm6");
    451			asm volatile("pxor %xmm13,%xmm12");
    452			asm volatile("pxor %xmm15,%xmm14");
    453		}
    454		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
    455		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
    456		asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
    457		asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
    458		asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
    459		asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
    460		asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
    461		asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
    462		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
    463		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
    464		asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
    465		asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
    466	}
    467	asm volatile("sfence" : : : "memory");
    468	kernel_fpu_end();
    469}
    470
    471
    472const struct raid6_calls raid6_sse2x4 = {
    473	raid6_sse24_gen_syndrome,
    474	raid6_sse24_xor_syndrome,
    475	raid6_have_sse2,
    476	"sse2x4",
    477	1			/* Has cache hints */
    478};
    479
    480#endif /* CONFIG_X86_64 */