cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sse1.c (4718B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/* -*- linux-c -*- ------------------------------------------------------- *
      3 *
      4 *   Copyright 2002 H. Peter Anvin - All Rights Reserved
      5 *
      6 * ----------------------------------------------------------------------- */
      7
      8/*
      9 * raid6/sse1.c
     10 *
     11 * SSE-1/MMXEXT implementation of RAID-6 syndrome functions
     12 *
     13 * This is really an MMX implementation, but it requires SSE-1 or
     14 * AMD MMXEXT for prefetch support and a few other features.  The
     15 * support for nontemporal memory accesses is enough to make this
     16 * worthwhile as a separate implementation.
     17 */
     18
     19#ifdef CONFIG_X86_32
     20
     21#include <linux/raid/pq.h>
     22#include "x86.h"
     23
     24/* Defined in raid6/mmx.c */
     25extern const struct raid6_mmx_constants {
     26	u64 x1d;
     27} raid6_mmx_constants;
     28
     29static int raid6_have_sse1_or_mmxext(void)
     30{
     31	/* Not really boot_cpu but "all_cpus" */
     32	return boot_cpu_has(X86_FEATURE_MMX) &&
     33		(boot_cpu_has(X86_FEATURE_XMM) ||
     34		 boot_cpu_has(X86_FEATURE_MMXEXT));
     35}
     36
     37/*
     38 * Plain SSE1 implementation
     39 */
     40static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
     41{
     42	u8 **dptr = (u8 **)ptrs;
     43	u8 *p, *q;
     44	int d, z, z0;
     45
     46	z0 = disks - 3;		/* Highest data disk */
     47	p = dptr[z0+1];		/* XOR parity */
     48	q = dptr[z0+2];		/* RS syndrome */
     49
     50	kernel_fpu_begin();
     51
     52	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
     53	asm volatile("pxor %mm5,%mm5");	/* Zero temp */
     54
     55	for ( d = 0 ; d < bytes ; d += 8 ) {
     56		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
     57		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
     58		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
     59		asm volatile("movq %mm2,%mm4");	/* Q[0] */
     60		asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
     61		for ( z = z0-2 ; z >= 0 ; z-- ) {
     62			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
     63			asm volatile("pcmpgtb %mm4,%mm5");
     64			asm volatile("paddb %mm4,%mm4");
     65			asm volatile("pand %mm0,%mm5");
     66			asm volatile("pxor %mm5,%mm4");
     67			asm volatile("pxor %mm5,%mm5");
     68			asm volatile("pxor %mm6,%mm2");
     69			asm volatile("pxor %mm6,%mm4");
     70			asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
     71		}
     72		asm volatile("pcmpgtb %mm4,%mm5");
     73		asm volatile("paddb %mm4,%mm4");
     74		asm volatile("pand %mm0,%mm5");
     75		asm volatile("pxor %mm5,%mm4");
     76		asm volatile("pxor %mm5,%mm5");
     77		asm volatile("pxor %mm6,%mm2");
     78		asm volatile("pxor %mm6,%mm4");
     79
     80		asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
     81		asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
     82	}
     83
     84	asm volatile("sfence" : : : "memory");
     85	kernel_fpu_end();
     86}
     87
     88const struct raid6_calls raid6_sse1x1 = {
     89	raid6_sse11_gen_syndrome,
     90	NULL,			/* XOR not yet implemented */
     91	raid6_have_sse1_or_mmxext,
     92	"sse1x1",
     93	1			/* Has cache hints */
     94};
     95
     96/*
     97 * Unrolled-by-2 SSE1 implementation
     98 */
     99static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
    100{
    101	u8 **dptr = (u8 **)ptrs;
    102	u8 *p, *q;
    103	int d, z, z0;
    104
    105	z0 = disks - 3;		/* Highest data disk */
    106	p = dptr[z0+1];		/* XOR parity */
    107	q = dptr[z0+2];		/* RS syndrome */
    108
    109	kernel_fpu_begin();
    110
    111	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
    112	asm volatile("pxor %mm5,%mm5");	/* Zero temp */
    113	asm volatile("pxor %mm7,%mm7"); /* Zero temp */
    114
    115	/* We uniformly assume a single prefetch covers at least 16 bytes */
    116	for ( d = 0 ; d < bytes ; d += 16 ) {
    117		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
    118		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
    119		asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
    120		asm volatile("movq %mm2,%mm4");	/* Q[0] */
    121		asm volatile("movq %mm3,%mm6"); /* Q[1] */
    122		for ( z = z0-1 ; z >= 0 ; z-- ) {
    123			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
    124			asm volatile("pcmpgtb %mm4,%mm5");
    125			asm volatile("pcmpgtb %mm6,%mm7");
    126			asm volatile("paddb %mm4,%mm4");
    127			asm volatile("paddb %mm6,%mm6");
    128			asm volatile("pand %mm0,%mm5");
    129			asm volatile("pand %mm0,%mm7");
    130			asm volatile("pxor %mm5,%mm4");
    131			asm volatile("pxor %mm7,%mm6");
    132			asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
    133			asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
    134			asm volatile("pxor %mm5,%mm2");
    135			asm volatile("pxor %mm7,%mm3");
    136			asm volatile("pxor %mm5,%mm4");
    137			asm volatile("pxor %mm7,%mm6");
    138			asm volatile("pxor %mm5,%mm5");
    139			asm volatile("pxor %mm7,%mm7");
    140		}
    141		asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
    142		asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
    143		asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
    144		asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
    145	}
    146
    147	asm volatile("sfence" : :: "memory");
    148	kernel_fpu_end();
    149}
    150
    151const struct raid6_calls raid6_sse1x2 = {
    152	raid6_sse12_gen_syndrome,
    153	NULL,			/* XOR not yet implemented */
    154	raid6_have_sse1_or_mmxext,
    155	"sse1x2",
    156	1			/* Has cache hints */
    157};
    158
    159#endif