xor.S (3226B)
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * arch/ia64/lib/xor.S 4 * 5 * Optimized RAID-5 checksumming functions for IA-64. 6 */ 7 8#include <asm/asmmacro.h> 9#include <asm/export.h> 10 11GLOBAL_ENTRY(xor_ia64_2) 12 .prologue 13 .fframe 0 14 .save ar.pfs, r31 15 alloc r31 = ar.pfs, 3, 0, 13, 16 16 .save ar.lc, r30 17 mov r30 = ar.lc 18 .save pr, r29 19 mov r29 = pr 20 ;; 21 .body 22 mov r8 = in1 23 mov ar.ec = 6 + 2 24 shr in0 = in0, 3 25 ;; 26 adds in0 = -1, in0 27 mov r16 = in1 28 mov r17 = in2 29 ;; 30 mov ar.lc = in0 31 mov pr.rot = 1 << 16 32 ;; 33 .rotr s1[6+1], s2[6+1], d[2] 34 .rotp p[6+2] 350: 36(p[0]) ld8.nta s1[0] = [r16], 8 37(p[0]) ld8.nta s2[0] = [r17], 8 38(p[6]) xor d[0] = s1[6], s2[6] 39(p[6+1])st8.nta [r8] = d[1], 8 40 nop.f 0 41 br.ctop.dptk.few 0b 42 ;; 43 mov ar.lc = r30 44 mov pr = r29, -1 45 br.ret.sptk.few rp 46END(xor_ia64_2) 47EXPORT_SYMBOL(xor_ia64_2) 48 49GLOBAL_ENTRY(xor_ia64_3) 50 .prologue 51 .fframe 0 52 .save ar.pfs, r31 53 alloc r31 = ar.pfs, 4, 0, 20, 24 54 .save ar.lc, r30 55 mov r30 = ar.lc 56 .save pr, r29 57 mov r29 = pr 58 ;; 59 .body 60 mov r8 = in1 61 mov ar.ec = 6 + 2 62 shr in0 = in0, 3 63 ;; 64 adds in0 = -1, in0 65 mov r16 = in1 66 mov r17 = in2 67 ;; 68 mov r18 = in3 69 mov ar.lc = in0 70 mov pr.rot = 1 << 16 71 ;; 72 .rotr s1[6+1], s2[6+1], s3[6+1], d[2] 73 .rotp p[6+2] 740: 75(p[0]) ld8.nta s1[0] = [r16], 8 76(p[0]) ld8.nta s2[0] = [r17], 8 77(p[6]) xor d[0] = s1[6], s2[6] 78 ;; 79(p[0]) ld8.nta s3[0] = [r18], 8 80(p[6+1])st8.nta [r8] = d[1], 8 81(p[6]) xor d[0] = d[0], s3[6] 82 br.ctop.dptk.few 0b 83 ;; 84 mov ar.lc = r30 85 mov pr = r29, -1 86 br.ret.sptk.few rp 87END(xor_ia64_3) 88EXPORT_SYMBOL(xor_ia64_3) 89 90GLOBAL_ENTRY(xor_ia64_4) 91 .prologue 92 .fframe 0 93 .save ar.pfs, r31 94 alloc r31 = ar.pfs, 5, 0, 27, 32 95 .save ar.lc, r30 96 mov r30 = ar.lc 97 .save pr, r29 98 mov r29 = pr 99 ;; 100 .body 101 mov r8 = in1 102 mov ar.ec = 6 + 2 103 shr in0 = in0, 3 104 ;; 105 adds in0 = -1, in0 106 mov r16 = in1 107 mov r17 = in2 108 ;; 109 mov r18 = in3 110 mov ar.lc = in0 111 mov pr.rot = 1 << 16 112 mov r19 = in4 113 ;; 114 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2] 115 .rotp p[6+2] 1160: 117(p[0]) ld8.nta s1[0] = [r16], 8 118(p[0]) ld8.nta s2[0] = [r17], 8 119(p[6]) xor d[0] = s1[6], s2[6] 120(p[0]) ld8.nta s3[0] = [r18], 8 121(p[0]) ld8.nta s4[0] = [r19], 8 122(p[6]) xor r20 = s3[6], s4[6] 123 ;; 124(p[6+1])st8.nta [r8] = d[1], 8 125(p[6]) xor d[0] = d[0], r20 126 br.ctop.dptk.few 0b 127 ;; 128 mov ar.lc = r30 129 mov pr = r29, -1 130 br.ret.sptk.few rp 131END(xor_ia64_4) 132EXPORT_SYMBOL(xor_ia64_4) 133 134GLOBAL_ENTRY(xor_ia64_5) 135 .prologue 136 .fframe 0 137 .save ar.pfs, r31 138 alloc r31 = ar.pfs, 6, 0, 34, 40 139 .save ar.lc, r30 140 mov r30 = ar.lc 141 .save pr, r29 142 mov r29 = pr 143 ;; 144 .body 145 mov r8 = in1 146 mov ar.ec = 6 + 2 147 shr in0 = in0, 3 148 ;; 149 adds in0 = -1, in0 150 mov r16 = in1 151 mov r17 = in2 152 ;; 153 mov r18 = in3 154 mov ar.lc = in0 155 mov pr.rot = 1 << 16 156 mov r19 = in4 157 mov r20 = in5 158 ;; 159 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2] 160 .rotp p[6+2] 1610: 162(p[0]) ld8.nta s1[0] = [r16], 8 163(p[0]) ld8.nta s2[0] = [r17], 8 164(p[6]) xor d[0] = s1[6], s2[6] 165(p[0]) ld8.nta s3[0] = [r18], 8 166(p[0]) ld8.nta s4[0] = [r19], 8 167(p[6]) xor r21 = s3[6], s4[6] 168 ;; 169(p[0]) ld8.nta s5[0] = [r20], 8 170(p[6+1])st8.nta [r8] = d[1], 8 171(p[6]) xor d[0] = d[0], r21 172 ;; 173(p[6]) xor d[0] = d[0], s5[6] 174 nop.f 0 175 br.ctop.dptk.few 0b 176 ;; 177 mov ar.lc = r30 178 mov pr = r29, -1 179 br.ret.sptk.few rp 180END(xor_ia64_5) 181EXPORT_SYMBOL(xor_ia64_5)