cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xor.S (3226B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * arch/ia64/lib/xor.S
      4 *
      5 * Optimized RAID-5 checksumming functions for IA-64.
      6 */
      7
      8#include <asm/asmmacro.h>
      9#include <asm/export.h>
     10
     11GLOBAL_ENTRY(xor_ia64_2)
     12	.prologue
     13	.fframe 0
     14	.save ar.pfs, r31
     15	alloc r31 = ar.pfs, 3, 0, 13, 16
     16	.save ar.lc, r30
     17	mov r30 = ar.lc
     18	.save pr, r29
     19	mov r29 = pr
     20	;;
     21	.body
     22	mov r8 = in1
     23	mov ar.ec = 6 + 2
     24	shr in0 = in0, 3
     25	;;
     26	adds in0 = -1, in0
     27	mov r16 = in1
     28	mov r17 = in2
     29	;;
     30	mov ar.lc = in0
     31	mov pr.rot = 1 << 16
     32	;;
     33	.rotr s1[6+1], s2[6+1], d[2]
     34	.rotp p[6+2]
     350:
     36(p[0])	ld8.nta s1[0] = [r16], 8
     37(p[0])	ld8.nta s2[0] = [r17], 8
     38(p[6])	xor d[0] = s1[6], s2[6]
     39(p[6+1])st8.nta [r8] = d[1], 8
     40	nop.f 0
     41	br.ctop.dptk.few 0b
     42	;;
     43	mov ar.lc = r30
     44	mov pr = r29, -1
     45	br.ret.sptk.few rp
     46END(xor_ia64_2)
     47EXPORT_SYMBOL(xor_ia64_2)
     48
     49GLOBAL_ENTRY(xor_ia64_3)
     50	.prologue
     51	.fframe 0
     52	.save ar.pfs, r31
     53	alloc r31 = ar.pfs, 4, 0, 20, 24
     54	.save ar.lc, r30
     55	mov r30 = ar.lc
     56	.save pr, r29
     57	mov r29 = pr
     58	;;
     59	.body
     60	mov r8 = in1
     61	mov ar.ec = 6 + 2
     62	shr in0 = in0, 3
     63	;;
     64	adds in0 = -1, in0
     65	mov r16 = in1
     66	mov r17 = in2
     67	;;
     68	mov r18 = in3
     69	mov ar.lc = in0
     70	mov pr.rot = 1 << 16
     71	;;
     72	.rotr s1[6+1], s2[6+1], s3[6+1], d[2]
     73	.rotp p[6+2]
     740:
     75(p[0])	ld8.nta s1[0] = [r16], 8
     76(p[0])	ld8.nta s2[0] = [r17], 8
     77(p[6])	xor d[0] = s1[6], s2[6]
     78	;;
     79(p[0])	ld8.nta s3[0] = [r18], 8
     80(p[6+1])st8.nta [r8] = d[1], 8
     81(p[6])	xor d[0] = d[0], s3[6]
     82	br.ctop.dptk.few 0b
     83	;;
     84	mov ar.lc = r30
     85	mov pr = r29, -1
     86	br.ret.sptk.few rp
     87END(xor_ia64_3)
     88EXPORT_SYMBOL(xor_ia64_3)
     89
     90GLOBAL_ENTRY(xor_ia64_4)
     91	.prologue
     92	.fframe 0
     93	.save ar.pfs, r31
     94	alloc r31 = ar.pfs, 5, 0, 27, 32
     95	.save ar.lc, r30
     96	mov r30 = ar.lc
     97	.save pr, r29
     98	mov r29 = pr
     99	;;
    100	.body
    101	mov r8 = in1
    102	mov ar.ec = 6 + 2
    103	shr in0 = in0, 3
    104	;;
    105	adds in0 = -1, in0
    106	mov r16 = in1
    107	mov r17 = in2
    108	;;
    109	mov r18 = in3
    110	mov ar.lc = in0
    111	mov pr.rot = 1 << 16
    112	mov r19 = in4
    113	;;
    114	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
    115	.rotp p[6+2]
    1160:
    117(p[0])	ld8.nta s1[0] = [r16], 8
    118(p[0])	ld8.nta s2[0] = [r17], 8
    119(p[6])	xor d[0] = s1[6], s2[6]
    120(p[0])	ld8.nta s3[0] = [r18], 8
    121(p[0])	ld8.nta s4[0] = [r19], 8
    122(p[6])	xor r20 = s3[6], s4[6]
    123	;;
    124(p[6+1])st8.nta [r8] = d[1], 8
    125(p[6])	xor d[0] = d[0], r20
    126	br.ctop.dptk.few 0b
    127	;;
    128	mov ar.lc = r30
    129	mov pr = r29, -1
    130	br.ret.sptk.few rp
    131END(xor_ia64_4)
    132EXPORT_SYMBOL(xor_ia64_4)
    133
    134GLOBAL_ENTRY(xor_ia64_5)
    135	.prologue
    136	.fframe 0
    137	.save ar.pfs, r31
    138	alloc r31 = ar.pfs, 6, 0, 34, 40
    139	.save ar.lc, r30
    140	mov r30 = ar.lc
    141	.save pr, r29
    142	mov r29 = pr
    143	;;
    144	.body
    145	mov r8 = in1
    146	mov ar.ec = 6 + 2
    147	shr in0 = in0, 3
    148	;;
    149	adds in0 = -1, in0
    150	mov r16 = in1
    151	mov r17 = in2
    152	;;
    153	mov r18 = in3
    154	mov ar.lc = in0
    155	mov pr.rot = 1 << 16
    156	mov r19 = in4
    157	mov r20 = in5
    158	;;
    159	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
    160	.rotp p[6+2]
    1610:
    162(p[0])	ld8.nta s1[0] = [r16], 8
    163(p[0])	ld8.nta s2[0] = [r17], 8
    164(p[6])	xor d[0] = s1[6], s2[6]
    165(p[0])	ld8.nta s3[0] = [r18], 8
    166(p[0])	ld8.nta s4[0] = [r19], 8
    167(p[6])	xor r21 = s3[6], s4[6]
    168	;;
    169(p[0])	ld8.nta s5[0] = [r20], 8
    170(p[6+1])st8.nta [r8] = d[1], 8
    171(p[6])	xor d[0] = d[0], r21
    172	;;
    173(p[6])	  xor d[0] = d[0], s5[6]
    174	nop.f 0
    175	br.ctop.dptk.few 0b
    176	;;
    177	mov ar.lc = r30
    178	mov pr = r29, -1
    179	br.ret.sptk.few rp
    180END(xor_ia64_5)
    181EXPORT_SYMBOL(xor_ia64_5)