vector.S (7110B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2#include <asm/processor.h> 3#include <asm/ppc_asm.h> 4#include <asm/reg.h> 5#include <asm/asm-offsets.h> 6#include <asm/cputable.h> 7#include <asm/thread_info.h> 8#include <asm/page.h> 9#include <asm/ptrace.h> 10#include <asm/export.h> 11#include <asm/asm-compat.h> 12 13/* 14 * Load state from memory into VMX registers including VSCR. 15 * Assumes the caller has enabled VMX in the MSR. 16 */ 17_GLOBAL(load_vr_state) 18 li r4,VRSTATE_VSCR 19 lvx v0,r4,r3 20 mtvscr v0 21 REST_32VRS(0,r4,r3) 22 blr 23EXPORT_SYMBOL(load_vr_state) 24_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */ 25 26/* 27 * Store VMX state into memory, including VSCR. 28 * Assumes the caller has enabled VMX in the MSR. 29 */ 30_GLOBAL(store_vr_state) 31 SAVE_32VRS(0, r4, r3) 32 mfvscr v0 33 li r4, VRSTATE_VSCR 34 stvx v0, r4, r3 35 blr 36EXPORT_SYMBOL(store_vr_state) 37 38/* 39 * Disable VMX for the task which had it previously, 40 * and save its vector registers in its thread_struct. 41 * Enables the VMX for use in the kernel on return. 42 * On SMP we know the VMX is free, since we give it up every 43 * switch (ie, no lazy save of the vector registers). 44 * 45 * Note that on 32-bit this can only use registers that will be 46 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. 47 */ 48_GLOBAL(load_up_altivec) 49 mfmsr r5 /* grab the current MSR */ 50#ifdef CONFIG_PPC_BOOK3S_64 51 /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ 52 ori r5,r5,MSR_RI 53#endif 54 oris r5,r5,MSR_VEC@h 55 MTMSRD(r5) /* enable use of AltiVec now */ 56 isync 57 58 /* 59 * While userspace in general ignores VRSAVE, glibc uses it as a boolean 60 * to optimise userspace context save/restore. Whenever we take an 61 * altivec unavailable exception we must set VRSAVE to something non 62 * zero. Set it to all 1s. See also the programming note in the ISA. 63 */ 64 mfspr r4,SPRN_VRSAVE 65 cmpwi 0,r4,0 66 bne+ 1f 67 li r4,-1 68 mtspr SPRN_VRSAVE,r4 691: 70 /* enable use of VMX after return */ 71#ifdef CONFIG_PPC32 72 addi r5,r2,THREAD 73 oris r9,r9,MSR_VEC@h 74#else 75 ld r4,PACACURRENT(r13) 76 addi r5,r4,THREAD /* Get THREAD */ 77 oris r12,r12,MSR_VEC@h 78 std r12,_MSR(r1) 79#ifdef CONFIG_PPC_BOOK3S_64 80 li r4,0 81 stb r4,PACASRR_VALID(r13) 82#endif 83#endif 84 li r4,1 85 stb r4,THREAD_LOAD_VEC(r5) 86 addi r6,r5,THREAD_VRSTATE 87 li r10,VRSTATE_VSCR 88 stw r4,THREAD_USED_VR(r5) 89 lvx v0,r10,r6 90 mtvscr v0 91 REST_32VRS(0,r4,r6) 92 /* restore registers and return */ 93 blr 94_ASM_NOKPROBE_SYMBOL(load_up_altivec) 95 96/* 97 * save_altivec(tsk) 98 * Save the vector registers to its thread_struct 99 */ 100_GLOBAL(save_altivec) 101 addi r3,r3,THREAD /* want THREAD of task */ 102 PPC_LL r7,THREAD_VRSAVEAREA(r3) 103 PPC_LL r5,PT_REGS(r3) 104 PPC_LCMPI 0,r7,0 105 bne 2f 106 addi r7,r3,THREAD_VRSTATE 1072: SAVE_32VRS(0,r4,r7) 108 mfvscr v0 109 li r4,VRSTATE_VSCR 110 stvx v0,r4,r7 111 blr 112 113#ifdef CONFIG_VSX 114 115#ifdef CONFIG_PPC32 116#error This asm code isn't ready for 32-bit kernels 117#endif 118 119/* 120 * load_up_vsx(unused, unused, tsk) 121 * Disable VSX for the task which had it previously, 122 * and save its vector registers in its thread_struct. 123 * Reuse the fp and vsx saves, but first check to see if they have 124 * been saved already. 125 */ 126_GLOBAL(load_up_vsx) 127/* Load FP and VSX registers if they haven't been done yet */ 128 andi. r5,r12,MSR_FP 129 beql+ load_up_fpu /* skip if already loaded */ 130 andis. r5,r12,MSR_VEC@h 131 beql+ load_up_altivec /* skip if already loaded */ 132 133#ifdef CONFIG_PPC_BOOK3S_64 134 /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ 135 li r5,MSR_RI 136 mtmsrd r5,1 137#endif 138 139 ld r4,PACACURRENT(r13) 140 addi r4,r4,THREAD /* Get THREAD */ 141 li r6,1 142 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ 143 /* enable use of VSX after return */ 144 oris r12,r12,MSR_VSX@h 145 std r12,_MSR(r1) 146 li r4,0 147 stb r4,PACASRR_VALID(r13) 148 b fast_interrupt_return_srr 149 150#endif /* CONFIG_VSX */ 151 152 153/* 154 * The routines below are in assembler so we can closely control the 155 * usage of floating-point registers. These routines must be called 156 * with preempt disabled. 157 */ 158#ifdef CONFIG_PPC32 159 .data 160fpzero: 161 .long 0 162fpone: 163 .long 0x3f800000 /* 1.0 in single-precision FP */ 164fphalf: 165 .long 0x3f000000 /* 0.5 in single-precision FP */ 166 167#define LDCONST(fr, name) \ 168 lis r11,name@ha; \ 169 lfs fr,name@l(r11) 170#else 171 172 .section ".toc","aw" 173fpzero: 174 .tc FD_0_0[TC],0 175fpone: 176 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ 177fphalf: 178 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ 179 180#define LDCONST(fr, name) \ 181 lfd fr,name@toc(r2) 182#endif 183 184 .text 185/* 186 * Internal routine to enable floating point and set FPSCR to 0. 187 * Don't call it from C; it doesn't use the normal calling convention. 188 */ 189fpenable: 190#ifdef CONFIG_PPC32 191 stwu r1,-64(r1) 192#else 193 stdu r1,-64(r1) 194#endif 195 mfmsr r10 196 ori r11,r10,MSR_FP 197 mtmsr r11 198 isync 199 stfd fr0,24(r1) 200 stfd fr1,16(r1) 201 stfd fr31,8(r1) 202 LDCONST(fr1, fpzero) 203 mffs fr31 204 MTFSF_L(fr1) 205 blr 206 207fpdisable: 208 mtlr r12 209 MTFSF_L(fr31) 210 lfd fr31,8(r1) 211 lfd fr1,16(r1) 212 lfd fr0,24(r1) 213 mtmsr r10 214 isync 215 addi r1,r1,64 216 blr 217 218/* 219 * Vector add, floating point. 220 */ 221_GLOBAL(vaddfp) 222 mflr r12 223 bl fpenable 224 li r0,4 225 mtctr r0 226 li r6,0 2271: lfsx fr0,r4,r6 228 lfsx fr1,r5,r6 229 fadds fr0,fr0,fr1 230 stfsx fr0,r3,r6 231 addi r6,r6,4 232 bdnz 1b 233 b fpdisable 234 235/* 236 * Vector subtract, floating point. 237 */ 238_GLOBAL(vsubfp) 239 mflr r12 240 bl fpenable 241 li r0,4 242 mtctr r0 243 li r6,0 2441: lfsx fr0,r4,r6 245 lfsx fr1,r5,r6 246 fsubs fr0,fr0,fr1 247 stfsx fr0,r3,r6 248 addi r6,r6,4 249 bdnz 1b 250 b fpdisable 251 252/* 253 * Vector multiply and add, floating point. 254 */ 255_GLOBAL(vmaddfp) 256 mflr r12 257 bl fpenable 258 stfd fr2,32(r1) 259 li r0,4 260 mtctr r0 261 li r7,0 2621: lfsx fr0,r4,r7 263 lfsx fr1,r5,r7 264 lfsx fr2,r6,r7 265 fmadds fr0,fr0,fr2,fr1 266 stfsx fr0,r3,r7 267 addi r7,r7,4 268 bdnz 1b 269 lfd fr2,32(r1) 270 b fpdisable 271 272/* 273 * Vector negative multiply and subtract, floating point. 274 */ 275_GLOBAL(vnmsubfp) 276 mflr r12 277 bl fpenable 278 stfd fr2,32(r1) 279 li r0,4 280 mtctr r0 281 li r7,0 2821: lfsx fr0,r4,r7 283 lfsx fr1,r5,r7 284 lfsx fr2,r6,r7 285 fnmsubs fr0,fr0,fr2,fr1 286 stfsx fr0,r3,r7 287 addi r7,r7,4 288 bdnz 1b 289 lfd fr2,32(r1) 290 b fpdisable 291 292/* 293 * Vector reciprocal estimate. We just compute 1.0/x. 294 * r3 -> destination, r4 -> source. 295 */ 296_GLOBAL(vrefp) 297 mflr r12 298 bl fpenable 299 li r0,4 300 LDCONST(fr1, fpone) 301 mtctr r0 302 li r6,0 3031: lfsx fr0,r4,r6 304 fdivs fr0,fr1,fr0 305 stfsx fr0,r3,r6 306 addi r6,r6,4 307 bdnz 1b 308 b fpdisable 309 310/* 311 * Vector reciprocal square-root estimate, floating point. 312 * We use the frsqrte instruction for the initial estimate followed 313 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 314 * r3 -> destination, r4 -> source. 315 */ 316_GLOBAL(vrsqrtefp) 317 mflr r12 318 bl fpenable 319 stfd fr2,32(r1) 320 stfd fr3,40(r1) 321 stfd fr4,48(r1) 322 stfd fr5,56(r1) 323 li r0,4 324 LDCONST(fr4, fpone) 325 LDCONST(fr5, fphalf) 326 mtctr r0 327 li r6,0 3281: lfsx fr0,r4,r6 329 frsqrte fr1,fr0 /* r = frsqrte(s) */ 330 fmuls fr3,fr1,fr0 /* r * s */ 331 fmuls fr2,fr1,fr5 /* r * 0.5 */ 332 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 333 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 334 fmuls fr3,fr1,fr0 /* r * s */ 335 fmuls fr2,fr1,fr5 /* r * 0.5 */ 336 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 337 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 338 stfsx fr1,r3,r6 339 addi r6,r6,4 340 bdnz 1b 341 lfd fr5,56(r1) 342 lfd fr4,48(r1) 343 lfd fr3,40(r1) 344 lfd fr2,32(r1) 345 b fpdisable