cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xor.h (5768B)


      1/* SPDX-License-Identifier: GPL-2.0-only */
      2/*
      3 *  arch/arm/include/asm/xor.h
      4 *
      5 *  Copyright (C) 2001 Russell King
      6 */
      7#include <linux/hardirq.h>
      8#include <asm-generic/xor.h>
      9#include <asm/hwcap.h>
     10#include <asm/neon.h>
     11
     12#define __XOR(a1, a2) a1 ^= a2
     13
     14#define GET_BLOCK_2(dst) \
     15	__asm__("ldmia	%0, {%1, %2}" \
     16		: "=r" (dst), "=r" (a1), "=r" (a2) \
     17		: "0" (dst))
     18
     19#define GET_BLOCK_4(dst) \
     20	__asm__("ldmia	%0, {%1, %2, %3, %4}" \
     21		: "=r" (dst), "=r" (a1), "=r" (a2), "=r" (a3), "=r" (a4) \
     22		: "0" (dst))
     23
     24#define XOR_BLOCK_2(src) \
     25	__asm__("ldmia	%0!, {%1, %2}" \
     26		: "=r" (src), "=r" (b1), "=r" (b2) \
     27		: "0" (src)); \
     28	__XOR(a1, b1); __XOR(a2, b2);
     29
     30#define XOR_BLOCK_4(src) \
     31	__asm__("ldmia	%0!, {%1, %2, %3, %4}" \
     32		: "=r" (src), "=r" (b1), "=r" (b2), "=r" (b3), "=r" (b4) \
     33		: "0" (src)); \
     34	__XOR(a1, b1); __XOR(a2, b2); __XOR(a3, b3); __XOR(a4, b4)
     35
     36#define PUT_BLOCK_2(dst) \
     37	__asm__ __volatile__("stmia	%0!, {%2, %3}" \
     38		: "=r" (dst) \
     39		: "0" (dst), "r" (a1), "r" (a2))
     40
     41#define PUT_BLOCK_4(dst) \
     42	__asm__ __volatile__("stmia	%0!, {%2, %3, %4, %5}" \
     43		: "=r" (dst) \
     44		: "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4))
     45
     46static void
     47xor_arm4regs_2(unsigned long bytes, unsigned long * __restrict p1,
     48	       const unsigned long * __restrict p2)
     49{
     50	unsigned int lines = bytes / sizeof(unsigned long) / 4;
     51	register unsigned int a1 __asm__("r4");
     52	register unsigned int a2 __asm__("r5");
     53	register unsigned int a3 __asm__("r6");
     54	register unsigned int a4 __asm__("r7");
     55	register unsigned int b1 __asm__("r8");
     56	register unsigned int b2 __asm__("r9");
     57	register unsigned int b3 __asm__("ip");
     58	register unsigned int b4 __asm__("lr");
     59
     60	do {
     61		GET_BLOCK_4(p1);
     62		XOR_BLOCK_4(p2);
     63		PUT_BLOCK_4(p1);
     64	} while (--lines);
     65}
     66
     67static void
     68xor_arm4regs_3(unsigned long bytes, unsigned long * __restrict p1,
     69	       const unsigned long * __restrict p2,
     70	       const unsigned long * __restrict p3)
     71{
     72	unsigned int lines = bytes / sizeof(unsigned long) / 4;
     73	register unsigned int a1 __asm__("r4");
     74	register unsigned int a2 __asm__("r5");
     75	register unsigned int a3 __asm__("r6");
     76	register unsigned int a4 __asm__("r7");
     77	register unsigned int b1 __asm__("r8");
     78	register unsigned int b2 __asm__("r9");
     79	register unsigned int b3 __asm__("ip");
     80	register unsigned int b4 __asm__("lr");
     81
     82	do {
     83		GET_BLOCK_4(p1);
     84		XOR_BLOCK_4(p2);
     85		XOR_BLOCK_4(p3);
     86		PUT_BLOCK_4(p1);
     87	} while (--lines);
     88}
     89
     90static void
     91xor_arm4regs_4(unsigned long bytes, unsigned long * __restrict p1,
     92	       const unsigned long * __restrict p2,
     93	       const unsigned long * __restrict p3,
     94	       const unsigned long * __restrict p4)
     95{
     96	unsigned int lines = bytes / sizeof(unsigned long) / 2;
     97	register unsigned int a1 __asm__("r8");
     98	register unsigned int a2 __asm__("r9");
     99	register unsigned int b1 __asm__("ip");
    100	register unsigned int b2 __asm__("lr");
    101
    102	do {
    103		GET_BLOCK_2(p1);
    104		XOR_BLOCK_2(p2);
    105		XOR_BLOCK_2(p3);
    106		XOR_BLOCK_2(p4);
    107		PUT_BLOCK_2(p1);
    108	} while (--lines);
    109}
    110
    111static void
    112xor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1,
    113	       const unsigned long * __restrict p2,
    114	       const unsigned long * __restrict p3,
    115	       const unsigned long * __restrict p4,
    116	       const unsigned long * __restrict p5)
    117{
    118	unsigned int lines = bytes / sizeof(unsigned long) / 2;
    119	register unsigned int a1 __asm__("r8");
    120	register unsigned int a2 __asm__("r9");
    121	register unsigned int b1 __asm__("ip");
    122	register unsigned int b2 __asm__("lr");
    123
    124	do {
    125		GET_BLOCK_2(p1);
    126		XOR_BLOCK_2(p2);
    127		XOR_BLOCK_2(p3);
    128		XOR_BLOCK_2(p4);
    129		XOR_BLOCK_2(p5);
    130		PUT_BLOCK_2(p1);
    131	} while (--lines);
    132}
    133
    134static struct xor_block_template xor_block_arm4regs = {
    135	.name	= "arm4regs",
    136	.do_2	= xor_arm4regs_2,
    137	.do_3	= xor_arm4regs_3,
    138	.do_4	= xor_arm4regs_4,
    139	.do_5	= xor_arm4regs_5,
    140};
    141
    142#undef XOR_TRY_TEMPLATES
    143#define XOR_TRY_TEMPLATES			\
    144	do {					\
    145		xor_speed(&xor_block_arm4regs);	\
    146		xor_speed(&xor_block_8regs);	\
    147		xor_speed(&xor_block_32regs);	\
    148		NEON_TEMPLATES;			\
    149	} while (0)
    150
    151#ifdef CONFIG_KERNEL_MODE_NEON
    152
    153extern struct xor_block_template const xor_block_neon_inner;
    154
    155static void
    156xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
    157	   const unsigned long * __restrict p2)
    158{
    159	if (in_interrupt()) {
    160		xor_arm4regs_2(bytes, p1, p2);
    161	} else {
    162		kernel_neon_begin();
    163		xor_block_neon_inner.do_2(bytes, p1, p2);
    164		kernel_neon_end();
    165	}
    166}
    167
    168static void
    169xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
    170	   const unsigned long * __restrict p2,
    171	   const unsigned long * __restrict p3)
    172{
    173	if (in_interrupt()) {
    174		xor_arm4regs_3(bytes, p1, p2, p3);
    175	} else {
    176		kernel_neon_begin();
    177		xor_block_neon_inner.do_3(bytes, p1, p2, p3);
    178		kernel_neon_end();
    179	}
    180}
    181
    182static void
    183xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
    184	   const unsigned long * __restrict p2,
    185	   const unsigned long * __restrict p3,
    186	   const unsigned long * __restrict p4)
    187{
    188	if (in_interrupt()) {
    189		xor_arm4regs_4(bytes, p1, p2, p3, p4);
    190	} else {
    191		kernel_neon_begin();
    192		xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4);
    193		kernel_neon_end();
    194	}
    195}
    196
    197static void
    198xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
    199	   const unsigned long * __restrict p2,
    200	   const unsigned long * __restrict p3,
    201	   const unsigned long * __restrict p4,
    202	   const unsigned long * __restrict p5)
    203{
    204	if (in_interrupt()) {
    205		xor_arm4regs_5(bytes, p1, p2, p3, p4, p5);
    206	} else {
    207		kernel_neon_begin();
    208		xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5);
    209		kernel_neon_end();
    210	}
    211}
    212
    213static struct xor_block_template xor_block_neon = {
    214	.name	= "neon",
    215	.do_2	= xor_neon_2,
    216	.do_3	= xor_neon_3,
    217	.do_4	= xor_neon_4,
    218	.do_5	= xor_neon_5
    219};
    220
    221#define NEON_TEMPLATES	\
    222	do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0)
    223#else
    224#define NEON_TEMPLATES
    225#endif