cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xor.h (22453B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * include/asm-alpha/xor.h
      4 *
      5 * Optimized RAID-5 checksumming functions for alpha EV5 and EV6
      6 */
      7
      8extern void
      9xor_alpha_2(unsigned long bytes, unsigned long * __restrict p1,
     10	    const unsigned long * __restrict p2);
     11extern void
     12xor_alpha_3(unsigned long bytes, unsigned long * __restrict p1,
     13	    const unsigned long * __restrict p2,
     14	    const unsigned long * __restrict p3);
     15extern void
     16xor_alpha_4(unsigned long bytes, unsigned long * __restrict p1,
     17	    const unsigned long * __restrict p2,
     18	    const unsigned long * __restrict p3,
     19	    const unsigned long * __restrict p4);
     20extern void
     21xor_alpha_5(unsigned long bytes, unsigned long * __restrict p1,
     22	    const unsigned long * __restrict p2,
     23	    const unsigned long * __restrict p3,
     24	    const unsigned long * __restrict p4,
     25	    const unsigned long * __restrict p5);
     26
     27extern void
     28xor_alpha_prefetch_2(unsigned long bytes, unsigned long * __restrict p1,
     29		     const unsigned long * __restrict p2);
     30extern void
     31xor_alpha_prefetch_3(unsigned long bytes, unsigned long * __restrict p1,
     32		     const unsigned long * __restrict p2,
     33		     const unsigned long * __restrict p3);
     34extern void
     35xor_alpha_prefetch_4(unsigned long bytes, unsigned long * __restrict p1,
     36		     const unsigned long * __restrict p2,
     37		     const unsigned long * __restrict p3,
     38		     const unsigned long * __restrict p4);
     39extern void
     40xor_alpha_prefetch_5(unsigned long bytes, unsigned long * __restrict p1,
     41		     const unsigned long * __restrict p2,
     42		     const unsigned long * __restrict p3,
     43		     const unsigned long * __restrict p4,
     44		     const unsigned long * __restrict p5);
     45
     46asm("								\n\
     47	.text							\n\
     48	.align 3						\n\
     49	.ent xor_alpha_2					\n\
     50xor_alpha_2:							\n\
     51	.prologue 0						\n\
     52	srl $16, 6, $16						\n\
     53	.align 4						\n\
     542:								\n\
     55	ldq $0,0($17)						\n\
     56	ldq $1,0($18)						\n\
     57	ldq $2,8($17)						\n\
     58	ldq $3,8($18)						\n\
     59								\n\
     60	ldq $4,16($17)						\n\
     61	ldq $5,16($18)						\n\
     62	ldq $6,24($17)						\n\
     63	ldq $7,24($18)						\n\
     64								\n\
     65	ldq $19,32($17)						\n\
     66	ldq $20,32($18)						\n\
     67	ldq $21,40($17)						\n\
     68	ldq $22,40($18)						\n\
     69								\n\
     70	ldq $23,48($17)						\n\
     71	ldq $24,48($18)						\n\
     72	ldq $25,56($17)						\n\
     73	xor $0,$1,$0		# 7 cycles from $1 load		\n\
     74								\n\
     75	ldq $27,56($18)						\n\
     76	xor $2,$3,$2						\n\
     77	stq $0,0($17)						\n\
     78	xor $4,$5,$4						\n\
     79								\n\
     80	stq $2,8($17)						\n\
     81	xor $6,$7,$6						\n\
     82	stq $4,16($17)						\n\
     83	xor $19,$20,$19						\n\
     84								\n\
     85	stq $6,24($17)						\n\
     86	xor $21,$22,$21						\n\
     87	stq $19,32($17)						\n\
     88	xor $23,$24,$23						\n\
     89								\n\
     90	stq $21,40($17)						\n\
     91	xor $25,$27,$25						\n\
     92	stq $23,48($17)						\n\
     93	subq $16,1,$16						\n\
     94								\n\
     95	stq $25,56($17)						\n\
     96	addq $17,64,$17						\n\
     97	addq $18,64,$18						\n\
     98	bgt $16,2b						\n\
     99								\n\
    100	ret							\n\
    101	.end xor_alpha_2					\n\
    102								\n\
    103	.align 3						\n\
    104	.ent xor_alpha_3					\n\
    105xor_alpha_3:							\n\
    106	.prologue 0						\n\
    107	srl $16, 6, $16						\n\
    108	.align 4						\n\
    1093:								\n\
    110	ldq $0,0($17)						\n\
    111	ldq $1,0($18)						\n\
    112	ldq $2,0($19)						\n\
    113	ldq $3,8($17)						\n\
    114								\n\
    115	ldq $4,8($18)						\n\
    116	ldq $6,16($17)						\n\
    117	ldq $7,16($18)						\n\
    118	ldq $21,24($17)						\n\
    119								\n\
    120	ldq $22,24($18)						\n\
    121	ldq $24,32($17)						\n\
    122	ldq $25,32($18)						\n\
    123	ldq $5,8($19)						\n\
    124								\n\
    125	ldq $20,16($19)						\n\
    126	ldq $23,24($19)						\n\
    127	ldq $27,32($19)						\n\
    128	nop							\n\
    129								\n\
    130	xor $0,$1,$1		# 8 cycles from $0 load		\n\
    131	xor $3,$4,$4		# 6 cycles from $4 load		\n\
    132	xor $6,$7,$7		# 6 cycles from $7 load		\n\
    133	xor $21,$22,$22		# 5 cycles from $22 load	\n\
    134								\n\
    135	xor $1,$2,$2		# 9 cycles from $2 load		\n\
    136	xor $24,$25,$25		# 5 cycles from $25 load	\n\
    137	stq $2,0($17)						\n\
    138	xor $4,$5,$5		# 6 cycles from $5 load		\n\
    139								\n\
    140	stq $5,8($17)						\n\
    141	xor $7,$20,$20		# 7 cycles from $20 load	\n\
    142	stq $20,16($17)						\n\
    143	xor $22,$23,$23		# 7 cycles from $23 load	\n\
    144								\n\
    145	stq $23,24($17)						\n\
    146	xor $25,$27,$27		# 7 cycles from $27 load	\n\
    147	stq $27,32($17)						\n\
    148	nop							\n\
    149								\n\
    150	ldq $0,40($17)						\n\
    151	ldq $1,40($18)						\n\
    152	ldq $3,48($17)						\n\
    153	ldq $4,48($18)						\n\
    154								\n\
    155	ldq $6,56($17)						\n\
    156	ldq $7,56($18)						\n\
    157	ldq $2,40($19)						\n\
    158	ldq $5,48($19)						\n\
    159								\n\
    160	ldq $20,56($19)						\n\
    161	xor $0,$1,$1		# 4 cycles from $1 load		\n\
    162	xor $3,$4,$4		# 5 cycles from $4 load		\n\
    163	xor $6,$7,$7		# 5 cycles from $7 load		\n\
    164								\n\
    165	xor $1,$2,$2		# 4 cycles from $2 load		\n\
    166	xor $4,$5,$5		# 5 cycles from $5 load		\n\
    167	stq $2,40($17)						\n\
    168	xor $7,$20,$20		# 4 cycles from $20 load	\n\
    169								\n\
    170	stq $5,48($17)						\n\
    171	subq $16,1,$16						\n\
    172	stq $20,56($17)						\n\
    173	addq $19,64,$19						\n\
    174								\n\
    175	addq $18,64,$18						\n\
    176	addq $17,64,$17						\n\
    177	bgt $16,3b						\n\
    178	ret							\n\
    179	.end xor_alpha_3					\n\
    180								\n\
    181	.align 3						\n\
    182	.ent xor_alpha_4					\n\
    183xor_alpha_4:							\n\
    184	.prologue 0						\n\
    185	srl $16, 6, $16						\n\
    186	.align 4						\n\
    1874:								\n\
    188	ldq $0,0($17)						\n\
    189	ldq $1,0($18)						\n\
    190	ldq $2,0($19)						\n\
    191	ldq $3,0($20)						\n\
    192								\n\
    193	ldq $4,8($17)						\n\
    194	ldq $5,8($18)						\n\
    195	ldq $6,8($19)						\n\
    196	ldq $7,8($20)						\n\
    197								\n\
    198	ldq $21,16($17)						\n\
    199	ldq $22,16($18)						\n\
    200	ldq $23,16($19)						\n\
    201	ldq $24,16($20)						\n\
    202								\n\
    203	ldq $25,24($17)						\n\
    204	xor $0,$1,$1		# 6 cycles from $1 load		\n\
    205	ldq $27,24($18)						\n\
    206	xor $2,$3,$3		# 6 cycles from $3 load		\n\
    207								\n\
    208	ldq $0,24($19)						\n\
    209	xor $1,$3,$3						\n\
    210	ldq $1,24($20)						\n\
    211	xor $4,$5,$5		# 7 cycles from $5 load		\n\
    212								\n\
    213	stq $3,0($17)						\n\
    214	xor $6,$7,$7						\n\
    215	xor $21,$22,$22		# 7 cycles from $22 load	\n\
    216	xor $5,$7,$7						\n\
    217								\n\
    218	stq $7,8($17)						\n\
    219	xor $23,$24,$24		# 7 cycles from $24 load	\n\
    220	ldq $2,32($17)						\n\
    221	xor $22,$24,$24						\n\
    222								\n\
    223	ldq $3,32($18)						\n\
    224	ldq $4,32($19)						\n\
    225	ldq $5,32($20)						\n\
    226	xor $25,$27,$27		# 8 cycles from $27 load	\n\
    227								\n\
    228	ldq $6,40($17)						\n\
    229	ldq $7,40($18)						\n\
    230	ldq $21,40($19)						\n\
    231	ldq $22,40($20)						\n\
    232								\n\
    233	stq $24,16($17)						\n\
    234	xor $0,$1,$1		# 9 cycles from $1 load		\n\
    235	xor $2,$3,$3		# 5 cycles from $3 load		\n\
    236	xor $27,$1,$1						\n\
    237								\n\
    238	stq $1,24($17)						\n\
    239	xor $4,$5,$5		# 5 cycles from $5 load		\n\
    240	ldq $23,48($17)						\n\
    241	ldq $24,48($18)						\n\
    242								\n\
    243	ldq $25,48($19)						\n\
    244	xor $3,$5,$5						\n\
    245	ldq $27,48($20)						\n\
    246	ldq $0,56($17)						\n\
    247								\n\
    248	ldq $1,56($18)						\n\
    249	ldq $2,56($19)						\n\
    250	xor $6,$7,$7		# 8 cycles from $6 load		\n\
    251	ldq $3,56($20)						\n\
    252								\n\
    253	stq $5,32($17)						\n\
    254	xor $21,$22,$22		# 8 cycles from $22 load	\n\
    255	xor $7,$22,$22						\n\
    256	xor $23,$24,$24		# 5 cycles from $24 load	\n\
    257								\n\
    258	stq $22,40($17)						\n\
    259	xor $25,$27,$27		# 5 cycles from $27 load	\n\
    260	xor $24,$27,$27						\n\
    261	xor $0,$1,$1		# 5 cycles from $1 load		\n\
    262								\n\
    263	stq $27,48($17)						\n\
    264	xor $2,$3,$3		# 4 cycles from $3 load		\n\
    265	xor $1,$3,$3						\n\
    266	subq $16,1,$16						\n\
    267								\n\
    268	stq $3,56($17)						\n\
    269	addq $20,64,$20						\n\
    270	addq $19,64,$19						\n\
    271	addq $18,64,$18						\n\
    272								\n\
    273	addq $17,64,$17						\n\
    274	bgt $16,4b						\n\
    275	ret							\n\
    276	.end xor_alpha_4					\n\
    277								\n\
    278	.align 3						\n\
    279	.ent xor_alpha_5					\n\
    280xor_alpha_5:							\n\
    281	.prologue 0						\n\
    282	srl $16, 6, $16						\n\
    283	.align 4						\n\
    2845:								\n\
    285	ldq $0,0($17)						\n\
    286	ldq $1,0($18)						\n\
    287	ldq $2,0($19)						\n\
    288	ldq $3,0($20)						\n\
    289								\n\
    290	ldq $4,0($21)						\n\
    291	ldq $5,8($17)						\n\
    292	ldq $6,8($18)						\n\
    293	ldq $7,8($19)						\n\
    294								\n\
    295	ldq $22,8($20)						\n\
    296	ldq $23,8($21)						\n\
    297	ldq $24,16($17)						\n\
    298	ldq $25,16($18)						\n\
    299								\n\
    300	ldq $27,16($19)						\n\
    301	xor $0,$1,$1		# 6 cycles from $1 load		\n\
    302	ldq $28,16($20)						\n\
    303	xor $2,$3,$3		# 6 cycles from $3 load		\n\
    304								\n\
    305	ldq $0,16($21)						\n\
    306	xor $1,$3,$3						\n\
    307	ldq $1,24($17)						\n\
    308	xor $3,$4,$4		# 7 cycles from $4 load		\n\
    309								\n\
    310	stq $4,0($17)						\n\
    311	xor $5,$6,$6		# 7 cycles from $6 load		\n\
    312	xor $7,$22,$22		# 7 cycles from $22 load	\n\
    313	xor $6,$23,$23		# 7 cycles from $23 load	\n\
    314								\n\
    315	ldq $2,24($18)						\n\
    316	xor $22,$23,$23						\n\
    317	ldq $3,24($19)						\n\
    318	xor $24,$25,$25		# 8 cycles from $25 load	\n\
    319								\n\
    320	stq $23,8($17)						\n\
    321	xor $25,$27,$27		# 8 cycles from $27 load	\n\
    322	ldq $4,24($20)						\n\
    323	xor $28,$0,$0		# 7 cycles from $0 load		\n\
    324								\n\
    325	ldq $5,24($21)						\n\
    326	xor $27,$0,$0						\n\
    327	ldq $6,32($17)						\n\
    328	ldq $7,32($18)						\n\
    329								\n\
    330	stq $0,16($17)						\n\
    331	xor $1,$2,$2		# 6 cycles from $2 load		\n\
    332	ldq $22,32($19)						\n\
    333	xor $3,$4,$4		# 4 cycles from $4 load		\n\
    334								\n\
    335	ldq $23,32($20)						\n\
    336	xor $2,$4,$4						\n\
    337	ldq $24,32($21)						\n\
    338	ldq $25,40($17)						\n\
    339								\n\
    340	ldq $27,40($18)						\n\
    341	ldq $28,40($19)						\n\
    342	ldq $0,40($20)						\n\
    343	xor $4,$5,$5		# 7 cycles from $5 load		\n\
    344								\n\
    345	stq $5,24($17)						\n\
    346	xor $6,$7,$7		# 7 cycles from $7 load		\n\
    347	ldq $1,40($21)						\n\
    348	ldq $2,48($17)						\n\
    349								\n\
    350	ldq $3,48($18)						\n\
    351	xor $7,$22,$22		# 7 cycles from $22 load	\n\
    352	ldq $4,48($19)						\n\
    353	xor $23,$24,$24		# 6 cycles from $24 load	\n\
    354								\n\
    355	ldq $5,48($20)						\n\
    356	xor $22,$24,$24						\n\
    357	ldq $6,48($21)						\n\
    358	xor $25,$27,$27		# 7 cycles from $27 load	\n\
    359								\n\
    360	stq $24,32($17)						\n\
    361	xor $27,$28,$28		# 8 cycles from $28 load	\n\
    362	ldq $7,56($17)						\n\
    363	xor $0,$1,$1		# 6 cycles from $1 load		\n\
    364								\n\
    365	ldq $22,56($18)						\n\
    366	ldq $23,56($19)						\n\
    367	ldq $24,56($20)						\n\
    368	ldq $25,56($21)						\n\
    369								\n\
    370	xor $28,$1,$1						\n\
    371	xor $2,$3,$3		# 9 cycles from $3 load		\n\
    372	xor $3,$4,$4		# 9 cycles from $4 load		\n\
    373	xor $5,$6,$6		# 8 cycles from $6 load		\n\
    374								\n\
    375	stq $1,40($17)						\n\
    376	xor $4,$6,$6						\n\
    377	xor $7,$22,$22		# 7 cycles from $22 load	\n\
    378	xor $23,$24,$24		# 6 cycles from $24 load	\n\
    379								\n\
    380	stq $6,48($17)						\n\
    381	xor $22,$24,$24						\n\
    382	subq $16,1,$16						\n\
    383	xor $24,$25,$25		# 8 cycles from $25 load	\n\
    384								\n\
    385	stq $25,56($17)						\n\
    386	addq $21,64,$21						\n\
    387	addq $20,64,$20						\n\
    388	addq $19,64,$19						\n\
    389								\n\
    390	addq $18,64,$18						\n\
    391	addq $17,64,$17						\n\
    392	bgt $16,5b						\n\
    393	ret							\n\
    394	.end xor_alpha_5					\n\
    395								\n\
    396	.align 3						\n\
    397	.ent xor_alpha_prefetch_2				\n\
    398xor_alpha_prefetch_2:						\n\
    399	.prologue 0						\n\
    400	srl $16, 6, $16						\n\
    401								\n\
    402	ldq $31, 0($17)						\n\
    403	ldq $31, 0($18)						\n\
    404								\n\
    405	ldq $31, 64($17)					\n\
    406	ldq $31, 64($18)					\n\
    407								\n\
    408	ldq $31, 128($17)					\n\
    409	ldq $31, 128($18)					\n\
    410								\n\
    411	ldq $31, 192($17)					\n\
    412	ldq $31, 192($18)					\n\
    413	.align 4						\n\
    4142:								\n\
    415	ldq $0,0($17)						\n\
    416	ldq $1,0($18)						\n\
    417	ldq $2,8($17)						\n\
    418	ldq $3,8($18)						\n\
    419								\n\
    420	ldq $4,16($17)						\n\
    421	ldq $5,16($18)						\n\
    422	ldq $6,24($17)						\n\
    423	ldq $7,24($18)						\n\
    424								\n\
    425	ldq $19,32($17)						\n\
    426	ldq $20,32($18)						\n\
    427	ldq $21,40($17)						\n\
    428	ldq $22,40($18)						\n\
    429								\n\
    430	ldq $23,48($17)						\n\
    431	ldq $24,48($18)						\n\
    432	ldq $25,56($17)						\n\
    433	ldq $27,56($18)						\n\
    434								\n\
    435	ldq $31,256($17)					\n\
    436	xor $0,$1,$0		# 8 cycles from $1 load		\n\
    437	ldq $31,256($18)					\n\
    438	xor $2,$3,$2						\n\
    439								\n\
    440	stq $0,0($17)						\n\
    441	xor $4,$5,$4						\n\
    442	stq $2,8($17)						\n\
    443	xor $6,$7,$6						\n\
    444								\n\
    445	stq $4,16($17)						\n\
    446	xor $19,$20,$19						\n\
    447	stq $6,24($17)						\n\
    448	xor $21,$22,$21						\n\
    449								\n\
    450	stq $19,32($17)						\n\
    451	xor $23,$24,$23						\n\
    452	stq $21,40($17)						\n\
    453	xor $25,$27,$25						\n\
    454								\n\
    455	stq $23,48($17)						\n\
    456	subq $16,1,$16						\n\
    457	stq $25,56($17)						\n\
    458	addq $17,64,$17						\n\
    459								\n\
    460	addq $18,64,$18						\n\
    461	bgt $16,2b						\n\
    462	ret							\n\
    463	.end xor_alpha_prefetch_2				\n\
    464								\n\
    465	.align 3						\n\
    466	.ent xor_alpha_prefetch_3				\n\
    467xor_alpha_prefetch_3:						\n\
    468	.prologue 0						\n\
    469	srl $16, 6, $16						\n\
    470								\n\
    471	ldq $31, 0($17)						\n\
    472	ldq $31, 0($18)						\n\
    473	ldq $31, 0($19)						\n\
    474								\n\
    475	ldq $31, 64($17)					\n\
    476	ldq $31, 64($18)					\n\
    477	ldq $31, 64($19)					\n\
    478								\n\
    479	ldq $31, 128($17)					\n\
    480	ldq $31, 128($18)					\n\
    481	ldq $31, 128($19)					\n\
    482								\n\
    483	ldq $31, 192($17)					\n\
    484	ldq $31, 192($18)					\n\
    485	ldq $31, 192($19)					\n\
    486	.align 4						\n\
    4873:								\n\
    488	ldq $0,0($17)						\n\
    489	ldq $1,0($18)						\n\
    490	ldq $2,0($19)						\n\
    491	ldq $3,8($17)						\n\
    492								\n\
    493	ldq $4,8($18)						\n\
    494	ldq $6,16($17)						\n\
    495	ldq $7,16($18)						\n\
    496	ldq $21,24($17)						\n\
    497								\n\
    498	ldq $22,24($18)						\n\
    499	ldq $24,32($17)						\n\
    500	ldq $25,32($18)						\n\
    501	ldq $5,8($19)						\n\
    502								\n\
    503	ldq $20,16($19)						\n\
    504	ldq $23,24($19)						\n\
    505	ldq $27,32($19)						\n\
    506	nop							\n\
    507								\n\
    508	xor $0,$1,$1		# 8 cycles from $0 load		\n\
    509	xor $3,$4,$4		# 7 cycles from $4 load		\n\
    510	xor $6,$7,$7		# 6 cycles from $7 load		\n\
    511	xor $21,$22,$22		# 5 cycles from $22 load	\n\
    512								\n\
    513	xor $1,$2,$2		# 9 cycles from $2 load		\n\
    514	xor $24,$25,$25		# 5 cycles from $25 load	\n\
    515	stq $2,0($17)						\n\
    516	xor $4,$5,$5		# 6 cycles from $5 load		\n\
    517								\n\
    518	stq $5,8($17)						\n\
    519	xor $7,$20,$20		# 7 cycles from $20 load	\n\
    520	stq $20,16($17)						\n\
    521	xor $22,$23,$23		# 7 cycles from $23 load	\n\
    522								\n\
    523	stq $23,24($17)						\n\
    524	xor $25,$27,$27		# 7 cycles from $27 load	\n\
    525	stq $27,32($17)						\n\
    526	nop							\n\
    527								\n\
    528	ldq $0,40($17)						\n\
    529	ldq $1,40($18)						\n\
    530	ldq $3,48($17)						\n\
    531	ldq $4,48($18)						\n\
    532								\n\
    533	ldq $6,56($17)						\n\
    534	ldq $7,56($18)						\n\
    535	ldq $2,40($19)						\n\
    536	ldq $5,48($19)						\n\
    537								\n\
    538	ldq $20,56($19)						\n\
    539	ldq $31,256($17)					\n\
    540	ldq $31,256($18)					\n\
    541	ldq $31,256($19)					\n\
    542								\n\
    543	xor $0,$1,$1		# 6 cycles from $1 load		\n\
    544	xor $3,$4,$4		# 5 cycles from $4 load		\n\
    545	xor $6,$7,$7		# 5 cycles from $7 load		\n\
    546	xor $1,$2,$2		# 4 cycles from $2 load		\n\
    547								\n\
    548	xor $4,$5,$5		# 5 cycles from $5 load		\n\
    549	xor $7,$20,$20		# 4 cycles from $20 load	\n\
    550	stq $2,40($17)						\n\
    551	subq $16,1,$16						\n\
    552								\n\
    553	stq $5,48($17)						\n\
    554	addq $19,64,$19						\n\
    555	stq $20,56($17)						\n\
    556	addq $18,64,$18						\n\
    557								\n\
    558	addq $17,64,$17						\n\
    559	bgt $16,3b						\n\
    560	ret							\n\
    561	.end xor_alpha_prefetch_3				\n\
    562								\n\
    563	.align 3						\n\
    564	.ent xor_alpha_prefetch_4				\n\
    565xor_alpha_prefetch_4:						\n\
    566	.prologue 0						\n\
    567	srl $16, 6, $16						\n\
    568								\n\
    569	ldq $31, 0($17)						\n\
    570	ldq $31, 0($18)						\n\
    571	ldq $31, 0($19)						\n\
    572	ldq $31, 0($20)						\n\
    573								\n\
    574	ldq $31, 64($17)					\n\
    575	ldq $31, 64($18)					\n\
    576	ldq $31, 64($19)					\n\
    577	ldq $31, 64($20)					\n\
    578								\n\
    579	ldq $31, 128($17)					\n\
    580	ldq $31, 128($18)					\n\
    581	ldq $31, 128($19)					\n\
    582	ldq $31, 128($20)					\n\
    583								\n\
    584	ldq $31, 192($17)					\n\
    585	ldq $31, 192($18)					\n\
    586	ldq $31, 192($19)					\n\
    587	ldq $31, 192($20)					\n\
    588	.align 4						\n\
    5894:								\n\
    590	ldq $0,0($17)						\n\
    591	ldq $1,0($18)						\n\
    592	ldq $2,0($19)						\n\
    593	ldq $3,0($20)						\n\
    594								\n\
    595	ldq $4,8($17)						\n\
    596	ldq $5,8($18)						\n\
    597	ldq $6,8($19)						\n\
    598	ldq $7,8($20)						\n\
    599								\n\
    600	ldq $21,16($17)						\n\
    601	ldq $22,16($18)						\n\
    602	ldq $23,16($19)						\n\
    603	ldq $24,16($20)						\n\
    604								\n\
    605	ldq $25,24($17)						\n\
    606	xor $0,$1,$1		# 6 cycles from $1 load		\n\
    607	ldq $27,24($18)						\n\
    608	xor $2,$3,$3		# 6 cycles from $3 load		\n\
    609								\n\
    610	ldq $0,24($19)						\n\
    611	xor $1,$3,$3						\n\
    612	ldq $1,24($20)						\n\
    613	xor $4,$5,$5		# 7 cycles from $5 load		\n\
    614								\n\
    615	stq $3,0($17)						\n\
    616	xor $6,$7,$7						\n\
    617	xor $21,$22,$22		# 7 cycles from $22 load	\n\
    618	xor $5,$7,$7						\n\
    619								\n\
    620	stq $7,8($17)						\n\
    621	xor $23,$24,$24		# 7 cycles from $24 load	\n\
    622	ldq $2,32($17)						\n\
    623	xor $22,$24,$24						\n\
    624								\n\
    625	ldq $3,32($18)						\n\
    626	ldq $4,32($19)						\n\
    627	ldq $5,32($20)						\n\
    628	xor $25,$27,$27		# 8 cycles from $27 load	\n\
    629								\n\
    630	ldq $6,40($17)						\n\
    631	ldq $7,40($18)						\n\
    632	ldq $21,40($19)						\n\
    633	ldq $22,40($20)						\n\
    634								\n\
    635	stq $24,16($17)						\n\
    636	xor $0,$1,$1		# 9 cycles from $1 load		\n\
    637	xor $2,$3,$3		# 5 cycles from $3 load		\n\
    638	xor $27,$1,$1						\n\
    639								\n\
    640	stq $1,24($17)						\n\
    641	xor $4,$5,$5		# 5 cycles from $5 load		\n\
    642	ldq $23,48($17)						\n\
    643	xor $3,$5,$5						\n\
    644								\n\
    645	ldq $24,48($18)						\n\
    646	ldq $25,48($19)						\n\
    647	ldq $27,48($20)						\n\
    648	ldq $0,56($17)						\n\
    649								\n\
    650	ldq $1,56($18)						\n\
    651	ldq $2,56($19)						\n\
    652	ldq $3,56($20)						\n\
    653	xor $6,$7,$7		# 8 cycles from $6 load		\n\
    654								\n\
    655	ldq $31,256($17)					\n\
    656	xor $21,$22,$22		# 8 cycles from $22 load	\n\
    657	ldq $31,256($18)					\n\
    658	xor $7,$22,$22						\n\
    659								\n\
    660	ldq $31,256($19)					\n\
    661	xor $23,$24,$24		# 6 cycles from $24 load	\n\
    662	ldq $31,256($20)					\n\
    663	xor $25,$27,$27		# 6 cycles from $27 load	\n\
    664								\n\
    665	stq $5,32($17)						\n\
    666	xor $24,$27,$27						\n\
    667	xor $0,$1,$1		# 7 cycles from $1 load		\n\
    668	xor $2,$3,$3		# 6 cycles from $3 load		\n\
    669								\n\
    670	stq $22,40($17)						\n\
    671	xor $1,$3,$3						\n\
    672	stq $27,48($17)						\n\
    673	subq $16,1,$16						\n\
    674								\n\
    675	stq $3,56($17)						\n\
    676	addq $20,64,$20						\n\
    677	addq $19,64,$19						\n\
    678	addq $18,64,$18						\n\
    679								\n\
    680	addq $17,64,$17						\n\
    681	bgt $16,4b						\n\
    682	ret							\n\
    683	.end xor_alpha_prefetch_4				\n\
    684								\n\
    685	.align 3						\n\
    686	.ent xor_alpha_prefetch_5				\n\
    687xor_alpha_prefetch_5:						\n\
    688	.prologue 0						\n\
    689	srl $16, 6, $16						\n\
    690								\n\
    691	ldq $31, 0($17)						\n\
    692	ldq $31, 0($18)						\n\
    693	ldq $31, 0($19)						\n\
    694	ldq $31, 0($20)						\n\
    695	ldq $31, 0($21)						\n\
    696								\n\
    697	ldq $31, 64($17)					\n\
    698	ldq $31, 64($18)					\n\
    699	ldq $31, 64($19)					\n\
    700	ldq $31, 64($20)					\n\
    701	ldq $31, 64($21)					\n\
    702								\n\
    703	ldq $31, 128($17)					\n\
    704	ldq $31, 128($18)					\n\
    705	ldq $31, 128($19)					\n\
    706	ldq $31, 128($20)					\n\
    707	ldq $31, 128($21)					\n\
    708								\n\
    709	ldq $31, 192($17)					\n\
    710	ldq $31, 192($18)					\n\
    711	ldq $31, 192($19)					\n\
    712	ldq $31, 192($20)					\n\
    713	ldq $31, 192($21)					\n\
    714	.align 4						\n\
    7155:								\n\
    716	ldq $0,0($17)						\n\
    717	ldq $1,0($18)						\n\
    718	ldq $2,0($19)						\n\
    719	ldq $3,0($20)						\n\
    720								\n\
    721	ldq $4,0($21)						\n\
    722	ldq $5,8($17)						\n\
    723	ldq $6,8($18)						\n\
    724	ldq $7,8($19)						\n\
    725								\n\
    726	ldq $22,8($20)						\n\
    727	ldq $23,8($21)						\n\
    728	ldq $24,16($17)						\n\
    729	ldq $25,16($18)						\n\
    730								\n\
    731	ldq $27,16($19)						\n\
    732	xor $0,$1,$1		# 6 cycles from $1 load		\n\
    733	ldq $28,16($20)						\n\
    734	xor $2,$3,$3		# 6 cycles from $3 load		\n\
    735								\n\
    736	ldq $0,16($21)						\n\
    737	xor $1,$3,$3						\n\
    738	ldq $1,24($17)						\n\
    739	xor $3,$4,$4		# 7 cycles from $4 load		\n\
    740								\n\
    741	stq $4,0($17)						\n\
    742	xor $5,$6,$6		# 7 cycles from $6 load		\n\
    743	xor $7,$22,$22		# 7 cycles from $22 load	\n\
    744	xor $6,$23,$23		# 7 cycles from $23 load	\n\
    745								\n\
    746	ldq $2,24($18)						\n\
    747	xor $22,$23,$23						\n\
    748	ldq $3,24($19)						\n\
    749	xor $24,$25,$25		# 8 cycles from $25 load	\n\
    750								\n\
    751	stq $23,8($17)						\n\
    752	xor $25,$27,$27		# 8 cycles from $27 load	\n\
    753	ldq $4,24($20)						\n\
    754	xor $28,$0,$0		# 7 cycles from $0 load		\n\
    755								\n\
    756	ldq $5,24($21)						\n\
    757	xor $27,$0,$0						\n\
    758	ldq $6,32($17)						\n\
    759	ldq $7,32($18)						\n\
    760								\n\
    761	stq $0,16($17)						\n\
    762	xor $1,$2,$2		# 6 cycles from $2 load		\n\
    763	ldq $22,32($19)						\n\
    764	xor $3,$4,$4		# 4 cycles from $4 load		\n\
    765								\n\
    766	ldq $23,32($20)						\n\
    767	xor $2,$4,$4						\n\
    768	ldq $24,32($21)						\n\
    769	ldq $25,40($17)						\n\
    770								\n\
    771	ldq $27,40($18)						\n\
    772	ldq $28,40($19)						\n\
    773	ldq $0,40($20)						\n\
    774	xor $4,$5,$5		# 7 cycles from $5 load		\n\
    775								\n\
    776	stq $5,24($17)						\n\
    777	xor $6,$7,$7		# 7 cycles from $7 load		\n\
    778	ldq $1,40($21)						\n\
    779	ldq $2,48($17)						\n\
    780								\n\
    781	ldq $3,48($18)						\n\
    782	xor $7,$22,$22		# 7 cycles from $22 load	\n\
    783	ldq $4,48($19)						\n\
    784	xor $23,$24,$24		# 6 cycles from $24 load	\n\
    785								\n\
    786	ldq $5,48($20)						\n\
    787	xor $22,$24,$24						\n\
    788	ldq $6,48($21)						\n\
    789	xor $25,$27,$27		# 7 cycles from $27 load	\n\
    790								\n\
    791	stq $24,32($17)						\n\
    792	xor $27,$28,$28		# 8 cycles from $28 load	\n\
    793	ldq $7,56($17)						\n\
    794	xor $0,$1,$1		# 6 cycles from $1 load		\n\
    795								\n\
    796	ldq $22,56($18)						\n\
    797	ldq $23,56($19)						\n\
    798	ldq $24,56($20)						\n\
    799	ldq $25,56($21)						\n\
    800								\n\
    801	ldq $31,256($17)					\n\
    802	xor $28,$1,$1						\n\
    803	ldq $31,256($18)					\n\
    804	xor $2,$3,$3		# 9 cycles from $3 load		\n\
    805								\n\
    806	ldq $31,256($19)					\n\
    807	xor $3,$4,$4		# 9 cycles from $4 load		\n\
    808	ldq $31,256($20)					\n\
    809	xor $5,$6,$6		# 8 cycles from $6 load		\n\
    810								\n\
    811	stq $1,40($17)						\n\
    812	xor $4,$6,$6						\n\
    813	xor $7,$22,$22		# 7 cycles from $22 load	\n\
    814	xor $23,$24,$24		# 6 cycles from $24 load	\n\
    815								\n\
    816	stq $6,48($17)						\n\
    817	xor $22,$24,$24						\n\
    818	ldq $31,256($21)					\n\
    819	xor $24,$25,$25		# 8 cycles from $25 load	\n\
    820								\n\
    821	stq $25,56($17)						\n\
    822	subq $16,1,$16						\n\
    823	addq $21,64,$21						\n\
    824	addq $20,64,$20						\n\
    825								\n\
    826	addq $19,64,$19						\n\
    827	addq $18,64,$18						\n\
    828	addq $17,64,$17						\n\
    829	bgt $16,5b						\n\
    830								\n\
    831	ret							\n\
    832	.end xor_alpha_prefetch_5				\n\
    833");
    834
    835static struct xor_block_template xor_block_alpha = {
    836	.name	= "alpha",
    837	.do_2	= xor_alpha_2,
    838	.do_3	= xor_alpha_3,
    839	.do_4	= xor_alpha_4,
    840	.do_5	= xor_alpha_5,
    841};
    842
    843static struct xor_block_template xor_block_alpha_prefetch = {
    844	.name	= "alpha prefetch",
    845	.do_2	= xor_alpha_prefetch_2,
    846	.do_3	= xor_alpha_prefetch_3,
    847	.do_4	= xor_alpha_prefetch_4,
    848	.do_5	= xor_alpha_prefetch_5,
    849};
    850
    851/* For grins, also test the generic routines.  */
    852#include <asm-generic/xor.h>
    853
    854#undef XOR_TRY_TEMPLATES
    855#define XOR_TRY_TEMPLATES				\
    856	do {						\
    857		xor_speed(&xor_block_8regs);		\
    858		xor_speed(&xor_block_32regs);		\
    859		xor_speed(&xor_block_alpha);		\
    860		xor_speed(&xor_block_alpha_prefetch);	\
    861	} while (0)
    862
    863/* Force the use of alpha_prefetch if EV6, as it is significantly
    864   faster in the cold cache case.  */
    865#define XOR_SELECT_TEMPLATE(FASTEST) \
    866	(implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST)