cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xor.h (14672B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * include/asm-generic/xor.h
      4 *
      5 * Generic optimized RAID-5 checksumming functions.
      6 */
      7
      8#include <linux/prefetch.h>
      9
     10static void
     11xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1,
     12	    const unsigned long * __restrict p2)
     13{
     14	long lines = bytes / (sizeof (long)) / 8;
     15
     16	do {
     17		p1[0] ^= p2[0];
     18		p1[1] ^= p2[1];
     19		p1[2] ^= p2[2];
     20		p1[3] ^= p2[3];
     21		p1[4] ^= p2[4];
     22		p1[5] ^= p2[5];
     23		p1[6] ^= p2[6];
     24		p1[7] ^= p2[7];
     25		p1 += 8;
     26		p2 += 8;
     27	} while (--lines > 0);
     28}
     29
     30static void
     31xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1,
     32	    const unsigned long * __restrict p2,
     33	    const unsigned long * __restrict p3)
     34{
     35	long lines = bytes / (sizeof (long)) / 8;
     36
     37	do {
     38		p1[0] ^= p2[0] ^ p3[0];
     39		p1[1] ^= p2[1] ^ p3[1];
     40		p1[2] ^= p2[2] ^ p3[2];
     41		p1[3] ^= p2[3] ^ p3[3];
     42		p1[4] ^= p2[4] ^ p3[4];
     43		p1[5] ^= p2[5] ^ p3[5];
     44		p1[6] ^= p2[6] ^ p3[6];
     45		p1[7] ^= p2[7] ^ p3[7];
     46		p1 += 8;
     47		p2 += 8;
     48		p3 += 8;
     49	} while (--lines > 0);
     50}
     51
     52static void
     53xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1,
     54	    const unsigned long * __restrict p2,
     55	    const unsigned long * __restrict p3,
     56	    const unsigned long * __restrict p4)
     57{
     58	long lines = bytes / (sizeof (long)) / 8;
     59
     60	do {
     61		p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
     62		p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
     63		p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
     64		p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
     65		p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
     66		p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
     67		p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
     68		p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
     69		p1 += 8;
     70		p2 += 8;
     71		p3 += 8;
     72		p4 += 8;
     73	} while (--lines > 0);
     74}
     75
     76static void
     77xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1,
     78	    const unsigned long * __restrict p2,
     79	    const unsigned long * __restrict p3,
     80	    const unsigned long * __restrict p4,
     81	    const unsigned long * __restrict p5)
     82{
     83	long lines = bytes / (sizeof (long)) / 8;
     84
     85	do {
     86		p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
     87		p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
     88		p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
     89		p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
     90		p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
     91		p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
     92		p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
     93		p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
     94		p1 += 8;
     95		p2 += 8;
     96		p3 += 8;
     97		p4 += 8;
     98		p5 += 8;
     99	} while (--lines > 0);
    100}
    101
    102static void
    103xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1,
    104	     const unsigned long * __restrict p2)
    105{
    106	long lines = bytes / (sizeof (long)) / 8;
    107
    108	do {
    109		register long d0, d1, d2, d3, d4, d5, d6, d7;
    110		d0 = p1[0];	/* Pull the stuff into registers	*/
    111		d1 = p1[1];	/*  ... in bursts, if possible.		*/
    112		d2 = p1[2];
    113		d3 = p1[3];
    114		d4 = p1[4];
    115		d5 = p1[5];
    116		d6 = p1[6];
    117		d7 = p1[7];
    118		d0 ^= p2[0];
    119		d1 ^= p2[1];
    120		d2 ^= p2[2];
    121		d3 ^= p2[3];
    122		d4 ^= p2[4];
    123		d5 ^= p2[5];
    124		d6 ^= p2[6];
    125		d7 ^= p2[7];
    126		p1[0] = d0;	/* Store the result (in bursts)		*/
    127		p1[1] = d1;
    128		p1[2] = d2;
    129		p1[3] = d3;
    130		p1[4] = d4;
    131		p1[5] = d5;
    132		p1[6] = d6;
    133		p1[7] = d7;
    134		p1 += 8;
    135		p2 += 8;
    136	} while (--lines > 0);
    137}
    138
    139static void
    140xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1,
    141	     const unsigned long * __restrict p2,
    142	     const unsigned long * __restrict p3)
    143{
    144	long lines = bytes / (sizeof (long)) / 8;
    145
    146	do {
    147		register long d0, d1, d2, d3, d4, d5, d6, d7;
    148		d0 = p1[0];	/* Pull the stuff into registers	*/
    149		d1 = p1[1];	/*  ... in bursts, if possible.		*/
    150		d2 = p1[2];
    151		d3 = p1[3];
    152		d4 = p1[4];
    153		d5 = p1[5];
    154		d6 = p1[6];
    155		d7 = p1[7];
    156		d0 ^= p2[0];
    157		d1 ^= p2[1];
    158		d2 ^= p2[2];
    159		d3 ^= p2[3];
    160		d4 ^= p2[4];
    161		d5 ^= p2[5];
    162		d6 ^= p2[6];
    163		d7 ^= p2[7];
    164		d0 ^= p3[0];
    165		d1 ^= p3[1];
    166		d2 ^= p3[2];
    167		d3 ^= p3[3];
    168		d4 ^= p3[4];
    169		d5 ^= p3[5];
    170		d6 ^= p3[6];
    171		d7 ^= p3[7];
    172		p1[0] = d0;	/* Store the result (in bursts)		*/
    173		p1[1] = d1;
    174		p1[2] = d2;
    175		p1[3] = d3;
    176		p1[4] = d4;
    177		p1[5] = d5;
    178		p1[6] = d6;
    179		p1[7] = d7;
    180		p1 += 8;
    181		p2 += 8;
    182		p3 += 8;
    183	} while (--lines > 0);
    184}
    185
    186static void
    187xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1,
    188	     const unsigned long * __restrict p2,
    189	     const unsigned long * __restrict p3,
    190	     const unsigned long * __restrict p4)
    191{
    192	long lines = bytes / (sizeof (long)) / 8;
    193
    194	do {
    195		register long d0, d1, d2, d3, d4, d5, d6, d7;
    196		d0 = p1[0];	/* Pull the stuff into registers	*/
    197		d1 = p1[1];	/*  ... in bursts, if possible.		*/
    198		d2 = p1[2];
    199		d3 = p1[3];
    200		d4 = p1[4];
    201		d5 = p1[5];
    202		d6 = p1[6];
    203		d7 = p1[7];
    204		d0 ^= p2[0];
    205		d1 ^= p2[1];
    206		d2 ^= p2[2];
    207		d3 ^= p2[3];
    208		d4 ^= p2[4];
    209		d5 ^= p2[5];
    210		d6 ^= p2[6];
    211		d7 ^= p2[7];
    212		d0 ^= p3[0];
    213		d1 ^= p3[1];
    214		d2 ^= p3[2];
    215		d3 ^= p3[3];
    216		d4 ^= p3[4];
    217		d5 ^= p3[5];
    218		d6 ^= p3[6];
    219		d7 ^= p3[7];
    220		d0 ^= p4[0];
    221		d1 ^= p4[1];
    222		d2 ^= p4[2];
    223		d3 ^= p4[3];
    224		d4 ^= p4[4];
    225		d5 ^= p4[5];
    226		d6 ^= p4[6];
    227		d7 ^= p4[7];
    228		p1[0] = d0;	/* Store the result (in bursts)		*/
    229		p1[1] = d1;
    230		p1[2] = d2;
    231		p1[3] = d3;
    232		p1[4] = d4;
    233		p1[5] = d5;
    234		p1[6] = d6;
    235		p1[7] = d7;
    236		p1 += 8;
    237		p2 += 8;
    238		p3 += 8;
    239		p4 += 8;
    240	} while (--lines > 0);
    241}
    242
    243static void
    244xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1,
    245	     const unsigned long * __restrict p2,
    246	     const unsigned long * __restrict p3,
    247	     const unsigned long * __restrict p4,
    248	     const unsigned long * __restrict p5)
    249{
    250	long lines = bytes / (sizeof (long)) / 8;
    251
    252	do {
    253		register long d0, d1, d2, d3, d4, d5, d6, d7;
    254		d0 = p1[0];	/* Pull the stuff into registers	*/
    255		d1 = p1[1];	/*  ... in bursts, if possible.		*/
    256		d2 = p1[2];
    257		d3 = p1[3];
    258		d4 = p1[4];
    259		d5 = p1[5];
    260		d6 = p1[6];
    261		d7 = p1[7];
    262		d0 ^= p2[0];
    263		d1 ^= p2[1];
    264		d2 ^= p2[2];
    265		d3 ^= p2[3];
    266		d4 ^= p2[4];
    267		d5 ^= p2[5];
    268		d6 ^= p2[6];
    269		d7 ^= p2[7];
    270		d0 ^= p3[0];
    271		d1 ^= p3[1];
    272		d2 ^= p3[2];
    273		d3 ^= p3[3];
    274		d4 ^= p3[4];
    275		d5 ^= p3[5];
    276		d6 ^= p3[6];
    277		d7 ^= p3[7];
    278		d0 ^= p4[0];
    279		d1 ^= p4[1];
    280		d2 ^= p4[2];
    281		d3 ^= p4[3];
    282		d4 ^= p4[4];
    283		d5 ^= p4[5];
    284		d6 ^= p4[6];
    285		d7 ^= p4[7];
    286		d0 ^= p5[0];
    287		d1 ^= p5[1];
    288		d2 ^= p5[2];
    289		d3 ^= p5[3];
    290		d4 ^= p5[4];
    291		d5 ^= p5[5];
    292		d6 ^= p5[6];
    293		d7 ^= p5[7];
    294		p1[0] = d0;	/* Store the result (in bursts)		*/
    295		p1[1] = d1;
    296		p1[2] = d2;
    297		p1[3] = d3;
    298		p1[4] = d4;
    299		p1[5] = d5;
    300		p1[6] = d6;
    301		p1[7] = d7;
    302		p1 += 8;
    303		p2 += 8;
    304		p3 += 8;
    305		p4 += 8;
    306		p5 += 8;
    307	} while (--lines > 0);
    308}
    309
    310static void
    311xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
    312	      const unsigned long * __restrict p2)
    313{
    314	long lines = bytes / (sizeof (long)) / 8 - 1;
    315	prefetchw(p1);
    316	prefetch(p2);
    317
    318	do {
    319		prefetchw(p1+8);
    320		prefetch(p2+8);
    321 once_more:
    322		p1[0] ^= p2[0];
    323		p1[1] ^= p2[1];
    324		p1[2] ^= p2[2];
    325		p1[3] ^= p2[3];
    326		p1[4] ^= p2[4];
    327		p1[5] ^= p2[5];
    328		p1[6] ^= p2[6];
    329		p1[7] ^= p2[7];
    330		p1 += 8;
    331		p2 += 8;
    332	} while (--lines > 0);
    333	if (lines == 0)
    334		goto once_more;
    335}
    336
    337static void
    338xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
    339	      const unsigned long * __restrict p2,
    340	      const unsigned long * __restrict p3)
    341{
    342	long lines = bytes / (sizeof (long)) / 8 - 1;
    343	prefetchw(p1);
    344	prefetch(p2);
    345	prefetch(p3);
    346
    347	do {
    348		prefetchw(p1+8);
    349		prefetch(p2+8);
    350		prefetch(p3+8);
    351 once_more:
    352		p1[0] ^= p2[0] ^ p3[0];
    353		p1[1] ^= p2[1] ^ p3[1];
    354		p1[2] ^= p2[2] ^ p3[2];
    355		p1[3] ^= p2[3] ^ p3[3];
    356		p1[4] ^= p2[4] ^ p3[4];
    357		p1[5] ^= p2[5] ^ p3[5];
    358		p1[6] ^= p2[6] ^ p3[6];
    359		p1[7] ^= p2[7] ^ p3[7];
    360		p1 += 8;
    361		p2 += 8;
    362		p3 += 8;
    363	} while (--lines > 0);
    364	if (lines == 0)
    365		goto once_more;
    366}
    367
    368static void
    369xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
    370	      const unsigned long * __restrict p2,
    371	      const unsigned long * __restrict p3,
    372	      const unsigned long * __restrict p4)
    373{
    374	long lines = bytes / (sizeof (long)) / 8 - 1;
    375
    376	prefetchw(p1);
    377	prefetch(p2);
    378	prefetch(p3);
    379	prefetch(p4);
    380
    381	do {
    382		prefetchw(p1+8);
    383		prefetch(p2+8);
    384		prefetch(p3+8);
    385		prefetch(p4+8);
    386 once_more:
    387		p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
    388		p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
    389		p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
    390		p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
    391		p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
    392		p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
    393		p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
    394		p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
    395		p1 += 8;
    396		p2 += 8;
    397		p3 += 8;
    398		p4 += 8;
    399	} while (--lines > 0);
    400	if (lines == 0)
    401		goto once_more;
    402}
    403
    404static void
    405xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
    406	      const unsigned long * __restrict p2,
    407	      const unsigned long * __restrict p3,
    408	      const unsigned long * __restrict p4,
    409	      const unsigned long * __restrict p5)
    410{
    411	long lines = bytes / (sizeof (long)) / 8 - 1;
    412
    413	prefetchw(p1);
    414	prefetch(p2);
    415	prefetch(p3);
    416	prefetch(p4);
    417	prefetch(p5);
    418
    419	do {
    420		prefetchw(p1+8);
    421		prefetch(p2+8);
    422		prefetch(p3+8);
    423		prefetch(p4+8);
    424		prefetch(p5+8);
    425 once_more:
    426		p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
    427		p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
    428		p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
    429		p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
    430		p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
    431		p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
    432		p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
    433		p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
    434		p1 += 8;
    435		p2 += 8;
    436		p3 += 8;
    437		p4 += 8;
    438		p5 += 8;
    439	} while (--lines > 0);
    440	if (lines == 0)
    441		goto once_more;
    442}
    443
    444static void
    445xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
    446	       const unsigned long * __restrict p2)
    447{
    448	long lines = bytes / (sizeof (long)) / 8 - 1;
    449
    450	prefetchw(p1);
    451	prefetch(p2);
    452
    453	do {
    454		register long d0, d1, d2, d3, d4, d5, d6, d7;
    455
    456		prefetchw(p1+8);
    457		prefetch(p2+8);
    458 once_more:
    459		d0 = p1[0];	/* Pull the stuff into registers	*/
    460		d1 = p1[1];	/*  ... in bursts, if possible.		*/
    461		d2 = p1[2];
    462		d3 = p1[3];
    463		d4 = p1[4];
    464		d5 = p1[5];
    465		d6 = p1[6];
    466		d7 = p1[7];
    467		d0 ^= p2[0];
    468		d1 ^= p2[1];
    469		d2 ^= p2[2];
    470		d3 ^= p2[3];
    471		d4 ^= p2[4];
    472		d5 ^= p2[5];
    473		d6 ^= p2[6];
    474		d7 ^= p2[7];
    475		p1[0] = d0;	/* Store the result (in bursts)		*/
    476		p1[1] = d1;
    477		p1[2] = d2;
    478		p1[3] = d3;
    479		p1[4] = d4;
    480		p1[5] = d5;
    481		p1[6] = d6;
    482		p1[7] = d7;
    483		p1 += 8;
    484		p2 += 8;
    485	} while (--lines > 0);
    486	if (lines == 0)
    487		goto once_more;
    488}
    489
    490static void
    491xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
    492	       const unsigned long * __restrict p2,
    493	       const unsigned long * __restrict p3)
    494{
    495	long lines = bytes / (sizeof (long)) / 8 - 1;
    496
    497	prefetchw(p1);
    498	prefetch(p2);
    499	prefetch(p3);
    500
    501	do {
    502		register long d0, d1, d2, d3, d4, d5, d6, d7;
    503
    504		prefetchw(p1+8);
    505		prefetch(p2+8);
    506		prefetch(p3+8);
    507 once_more:
    508		d0 = p1[0];	/* Pull the stuff into registers	*/
    509		d1 = p1[1];	/*  ... in bursts, if possible.		*/
    510		d2 = p1[2];
    511		d3 = p1[3];
    512		d4 = p1[4];
    513		d5 = p1[5];
    514		d6 = p1[6];
    515		d7 = p1[7];
    516		d0 ^= p2[0];
    517		d1 ^= p2[1];
    518		d2 ^= p2[2];
    519		d3 ^= p2[3];
    520		d4 ^= p2[4];
    521		d5 ^= p2[5];
    522		d6 ^= p2[6];
    523		d7 ^= p2[7];
    524		d0 ^= p3[0];
    525		d1 ^= p3[1];
    526		d2 ^= p3[2];
    527		d3 ^= p3[3];
    528		d4 ^= p3[4];
    529		d5 ^= p3[5];
    530		d6 ^= p3[6];
    531		d7 ^= p3[7];
    532		p1[0] = d0;	/* Store the result (in bursts)		*/
    533		p1[1] = d1;
    534		p1[2] = d2;
    535		p1[3] = d3;
    536		p1[4] = d4;
    537		p1[5] = d5;
    538		p1[6] = d6;
    539		p1[7] = d7;
    540		p1 += 8;
    541		p2 += 8;
    542		p3 += 8;
    543	} while (--lines > 0);
    544	if (lines == 0)
    545		goto once_more;
    546}
    547
    548static void
    549xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
    550	       const unsigned long * __restrict p2,
    551	       const unsigned long * __restrict p3,
    552	       const unsigned long * __restrict p4)
    553{
    554	long lines = bytes / (sizeof (long)) / 8 - 1;
    555
    556	prefetchw(p1);
    557	prefetch(p2);
    558	prefetch(p3);
    559	prefetch(p4);
    560
    561	do {
    562		register long d0, d1, d2, d3, d4, d5, d6, d7;
    563
    564		prefetchw(p1+8);
    565		prefetch(p2+8);
    566		prefetch(p3+8);
    567		prefetch(p4+8);
    568 once_more:
    569		d0 = p1[0];	/* Pull the stuff into registers	*/
    570		d1 = p1[1];	/*  ... in bursts, if possible.		*/
    571		d2 = p1[2];
    572		d3 = p1[3];
    573		d4 = p1[4];
    574		d5 = p1[5];
    575		d6 = p1[6];
    576		d7 = p1[7];
    577		d0 ^= p2[0];
    578		d1 ^= p2[1];
    579		d2 ^= p2[2];
    580		d3 ^= p2[3];
    581		d4 ^= p2[4];
    582		d5 ^= p2[5];
    583		d6 ^= p2[6];
    584		d7 ^= p2[7];
    585		d0 ^= p3[0];
    586		d1 ^= p3[1];
    587		d2 ^= p3[2];
    588		d3 ^= p3[3];
    589		d4 ^= p3[4];
    590		d5 ^= p3[5];
    591		d6 ^= p3[6];
    592		d7 ^= p3[7];
    593		d0 ^= p4[0];
    594		d1 ^= p4[1];
    595		d2 ^= p4[2];
    596		d3 ^= p4[3];
    597		d4 ^= p4[4];
    598		d5 ^= p4[5];
    599		d6 ^= p4[6];
    600		d7 ^= p4[7];
    601		p1[0] = d0;	/* Store the result (in bursts)		*/
    602		p1[1] = d1;
    603		p1[2] = d2;
    604		p1[3] = d3;
    605		p1[4] = d4;
    606		p1[5] = d5;
    607		p1[6] = d6;
    608		p1[7] = d7;
    609		p1 += 8;
    610		p2 += 8;
    611		p3 += 8;
    612		p4 += 8;
    613	} while (--lines > 0);
    614	if (lines == 0)
    615		goto once_more;
    616}
    617
    618static void
    619xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
    620	       const unsigned long * __restrict p2,
    621	       const unsigned long * __restrict p3,
    622	       const unsigned long * __restrict p4,
    623	       const unsigned long * __restrict p5)
    624{
    625	long lines = bytes / (sizeof (long)) / 8 - 1;
    626
    627	prefetchw(p1);
    628	prefetch(p2);
    629	prefetch(p3);
    630	prefetch(p4);
    631	prefetch(p5);
    632
    633	do {
    634		register long d0, d1, d2, d3, d4, d5, d6, d7;
    635
    636		prefetchw(p1+8);
    637		prefetch(p2+8);
    638		prefetch(p3+8);
    639		prefetch(p4+8);
    640		prefetch(p5+8);
    641 once_more:
    642		d0 = p1[0];	/* Pull the stuff into registers	*/
    643		d1 = p1[1];	/*  ... in bursts, if possible.		*/
    644		d2 = p1[2];
    645		d3 = p1[3];
    646		d4 = p1[4];
    647		d5 = p1[5];
    648		d6 = p1[6];
    649		d7 = p1[7];
    650		d0 ^= p2[0];
    651		d1 ^= p2[1];
    652		d2 ^= p2[2];
    653		d3 ^= p2[3];
    654		d4 ^= p2[4];
    655		d5 ^= p2[5];
    656		d6 ^= p2[6];
    657		d7 ^= p2[7];
    658		d0 ^= p3[0];
    659		d1 ^= p3[1];
    660		d2 ^= p3[2];
    661		d3 ^= p3[3];
    662		d4 ^= p3[4];
    663		d5 ^= p3[5];
    664		d6 ^= p3[6];
    665		d7 ^= p3[7];
    666		d0 ^= p4[0];
    667		d1 ^= p4[1];
    668		d2 ^= p4[2];
    669		d3 ^= p4[3];
    670		d4 ^= p4[4];
    671		d5 ^= p4[5];
    672		d6 ^= p4[6];
    673		d7 ^= p4[7];
    674		d0 ^= p5[0];
    675		d1 ^= p5[1];
    676		d2 ^= p5[2];
    677		d3 ^= p5[3];
    678		d4 ^= p5[4];
    679		d5 ^= p5[5];
    680		d6 ^= p5[6];
    681		d7 ^= p5[7];
    682		p1[0] = d0;	/* Store the result (in bursts)		*/
    683		p1[1] = d1;
    684		p1[2] = d2;
    685		p1[3] = d3;
    686		p1[4] = d4;
    687		p1[5] = d5;
    688		p1[6] = d6;
    689		p1[7] = d7;
    690		p1 += 8;
    691		p2 += 8;
    692		p3 += 8;
    693		p4 += 8;
    694		p5 += 8;
    695	} while (--lines > 0);
    696	if (lines == 0)
    697		goto once_more;
    698}
    699
    700static struct xor_block_template xor_block_8regs = {
    701	.name = "8regs",
    702	.do_2 = xor_8regs_2,
    703	.do_3 = xor_8regs_3,
    704	.do_4 = xor_8regs_4,
    705	.do_5 = xor_8regs_5,
    706};
    707
    708static struct xor_block_template xor_block_32regs = {
    709	.name = "32regs",
    710	.do_2 = xor_32regs_2,
    711	.do_3 = xor_32regs_3,
    712	.do_4 = xor_32regs_4,
    713	.do_5 = xor_32regs_5,
    714};
    715
    716static struct xor_block_template xor_block_8regs_p __maybe_unused = {
    717	.name = "8regs_prefetch",
    718	.do_2 = xor_8regs_p_2,
    719	.do_3 = xor_8regs_p_3,
    720	.do_4 = xor_8regs_p_4,
    721	.do_5 = xor_8regs_p_5,
    722};
    723
    724static struct xor_block_template xor_block_32regs_p __maybe_unused = {
    725	.name = "32regs_prefetch",
    726	.do_2 = xor_32regs_p_2,
    727	.do_3 = xor_32regs_p_3,
    728	.do_4 = xor_32regs_p_4,
    729	.do_5 = xor_32regs_p_5,
    730};
    731
    732#define XOR_TRY_TEMPLATES			\
    733	do {					\
    734		xor_speed(&xor_block_8regs);	\
    735		xor_speed(&xor_block_8regs_p);	\
    736		xor_speed(&xor_block_32regs);	\
    737		xor_speed(&xor_block_32regs_p);	\
    738	} while (0)