fpsimdmacros.h (7733B)
1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * FP/SIMD state saving and restoring macros 4 * 5 * Copyright (C) 2012 ARM Ltd. 6 * Author: Catalin Marinas <catalin.marinas@arm.com> 7 */ 8 9#include <asm/assembler.h> 10 11.macro fpsimd_save state, tmpnr 12 stp q0, q1, [\state, #16 * 0] 13 stp q2, q3, [\state, #16 * 2] 14 stp q4, q5, [\state, #16 * 4] 15 stp q6, q7, [\state, #16 * 6] 16 stp q8, q9, [\state, #16 * 8] 17 stp q10, q11, [\state, #16 * 10] 18 stp q12, q13, [\state, #16 * 12] 19 stp q14, q15, [\state, #16 * 14] 20 stp q16, q17, [\state, #16 * 16] 21 stp q18, q19, [\state, #16 * 18] 22 stp q20, q21, [\state, #16 * 20] 23 stp q22, q23, [\state, #16 * 22] 24 stp q24, q25, [\state, #16 * 24] 25 stp q26, q27, [\state, #16 * 26] 26 stp q28, q29, [\state, #16 * 28] 27 stp q30, q31, [\state, #16 * 30]! 28 mrs x\tmpnr, fpsr 29 str w\tmpnr, [\state, #16 * 2] 30 mrs x\tmpnr, fpcr 31 str w\tmpnr, [\state, #16 * 2 + 4] 32.endm 33 34.macro fpsimd_restore_fpcr state, tmp 35 /* 36 * Writes to fpcr may be self-synchronising, so avoid restoring 37 * the register if it hasn't changed. 38 */ 39 mrs \tmp, fpcr 40 cmp \tmp, \state 41 b.eq 9999f 42 msr fpcr, \state 439999: 44.endm 45 46/* Clobbers \state */ 47.macro fpsimd_restore state, tmpnr 48 ldp q0, q1, [\state, #16 * 0] 49 ldp q2, q3, [\state, #16 * 2] 50 ldp q4, q5, [\state, #16 * 4] 51 ldp q6, q7, [\state, #16 * 6] 52 ldp q8, q9, [\state, #16 * 8] 53 ldp q10, q11, [\state, #16 * 10] 54 ldp q12, q13, [\state, #16 * 12] 55 ldp q14, q15, [\state, #16 * 14] 56 ldp q16, q17, [\state, #16 * 16] 57 ldp q18, q19, [\state, #16 * 18] 58 ldp q20, q21, [\state, #16 * 20] 59 ldp q22, q23, [\state, #16 * 22] 60 ldp q24, q25, [\state, #16 * 24] 61 ldp q26, q27, [\state, #16 * 26] 62 ldp q28, q29, [\state, #16 * 28] 63 ldp q30, q31, [\state, #16 * 30]! 64 ldr w\tmpnr, [\state, #16 * 2] 65 msr fpsr, x\tmpnr 66 ldr w\tmpnr, [\state, #16 * 2 + 4] 67 fpsimd_restore_fpcr x\tmpnr, \state 68.endm 69 70/* Sanity-check macros to help avoid encoding garbage instructions */ 71 72.macro _check_general_reg nr 73 .if (\nr) < 0 || (\nr) > 30 74 .error "Bad register number \nr." 75 .endif 76.endm 77 78.macro _sve_check_zreg znr 79 .if (\znr) < 0 || (\znr) > 31 80 .error "Bad Scalable Vector Extension vector register number \znr." 81 .endif 82.endm 83 84.macro _sve_check_preg pnr 85 .if (\pnr) < 0 || (\pnr) > 15 86 .error "Bad Scalable Vector Extension predicate register number \pnr." 87 .endif 88.endm 89 90.macro _check_num n, min, max 91 .if (\n) < (\min) || (\n) > (\max) 92 .error "Number \n out of range [\min,\max]" 93 .endif 94.endm 95 96.macro _sme_check_wv v 97 .if (\v) < 12 || (\v) > 15 98 .error "Bad vector select register \v." 99 .endif 100.endm 101 102/* SVE instruction encodings for non-SVE-capable assemblers */ 103/* (pre binutils 2.28, all kernel capable clang versions support SVE) */ 104 105/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */ 106.macro _sve_str_v nz, nxbase, offset=0 107 _sve_check_zreg \nz 108 _check_general_reg \nxbase 109 _check_num (\offset), -0x100, 0xff 110 .inst 0xe5804000 \ 111 | (\nz) \ 112 | ((\nxbase) << 5) \ 113 | (((\offset) & 7) << 10) \ 114 | (((\offset) & 0x1f8) << 13) 115.endm 116 117/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */ 118.macro _sve_ldr_v nz, nxbase, offset=0 119 _sve_check_zreg \nz 120 _check_general_reg \nxbase 121 _check_num (\offset), -0x100, 0xff 122 .inst 0x85804000 \ 123 | (\nz) \ 124 | ((\nxbase) << 5) \ 125 | (((\offset) & 7) << 10) \ 126 | (((\offset) & 0x1f8) << 13) 127.endm 128 129/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */ 130.macro _sve_str_p np, nxbase, offset=0 131 _sve_check_preg \np 132 _check_general_reg \nxbase 133 _check_num (\offset), -0x100, 0xff 134 .inst 0xe5800000 \ 135 | (\np) \ 136 | ((\nxbase) << 5) \ 137 | (((\offset) & 7) << 10) \ 138 | (((\offset) & 0x1f8) << 13) 139.endm 140 141/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */ 142.macro _sve_ldr_p np, nxbase, offset=0 143 _sve_check_preg \np 144 _check_general_reg \nxbase 145 _check_num (\offset), -0x100, 0xff 146 .inst 0x85800000 \ 147 | (\np) \ 148 | ((\nxbase) << 5) \ 149 | (((\offset) & 7) << 10) \ 150 | (((\offset) & 0x1f8) << 13) 151.endm 152 153/* RDVL X\nx, #\imm */ 154.macro _sve_rdvl nx, imm 155 _check_general_reg \nx 156 _check_num (\imm), -0x20, 0x1f 157 .inst 0x04bf5000 \ 158 | (\nx) \ 159 | (((\imm) & 0x3f) << 5) 160.endm 161 162/* RDFFR (unpredicated): RDFFR P\np.B */ 163.macro _sve_rdffr np 164 _sve_check_preg \np 165 .inst 0x2519f000 \ 166 | (\np) 167.endm 168 169/* WRFFR P\np.B */ 170.macro _sve_wrffr np 171 _sve_check_preg \np 172 .inst 0x25289000 \ 173 | ((\np) << 5) 174.endm 175 176/* PFALSE P\np.B */ 177.macro _sve_pfalse np 178 _sve_check_preg \np 179 .inst 0x2518e400 \ 180 | (\np) 181.endm 182 183/* SME instruction encodings for non-SME-capable assemblers */ 184/* (pre binutils 2.38/LLVM 13) */ 185 186/* RDSVL X\nx, #\imm */ 187.macro _sme_rdsvl nx, imm 188 _check_general_reg \nx 189 _check_num (\imm), -0x20, 0x1f 190 .inst 0x04bf5800 \ 191 | (\nx) \ 192 | (((\imm) & 0x3f) << 5) 193.endm 194 195/* 196 * STR (vector from ZA array): 197 * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] 198 */ 199.macro _sme_str_zav nw, nxbase, offset=0 200 _sme_check_wv \nw 201 _check_general_reg \nxbase 202 _check_num (\offset), -0x100, 0xff 203 .inst 0xe1200000 \ 204 | (((\nw) & 3) << 13) \ 205 | ((\nxbase) << 5) \ 206 | ((\offset) & 7) 207.endm 208 209/* 210 * LDR (vector to ZA array): 211 * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] 212 */ 213.macro _sme_ldr_zav nw, nxbase, offset=0 214 _sme_check_wv \nw 215 _check_general_reg \nxbase 216 _check_num (\offset), -0x100, 0xff 217 .inst 0xe1000000 \ 218 | (((\nw) & 3) << 13) \ 219 | ((\nxbase) << 5) \ 220 | ((\offset) & 7) 221.endm 222 223/* 224 * Zero the entire ZA array 225 * ZERO ZA 226 */ 227.macro zero_za 228 .inst 0xc00800ff 229.endm 230 231.macro __for from:req, to:req 232 .if (\from) == (\to) 233 _for__body %\from 234 .else 235 __for %\from, %((\from) + ((\to) - (\from)) / 2) 236 __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to 237 .endif 238.endm 239 240.macro _for var:req, from:req, to:req, insn:vararg 241 .macro _for__body \var:req 242 .noaltmacro 243 \insn 244 .altmacro 245 .endm 246 247 .altmacro 248 __for \from, \to 249 .noaltmacro 250 251 .purgem _for__body 252.endm 253 254/* Update ZCR_EL1.LEN with the new VQ */ 255.macro sve_load_vq xvqminus1, xtmp, xtmp2 256 mrs_s \xtmp, SYS_ZCR_EL1 257 bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK 258 orr \xtmp2, \xtmp2, \xvqminus1 259 cmp \xtmp2, \xtmp 260 b.eq 921f 261 msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising 262921: 263.endm 264 265/* Update SMCR_EL1.LEN with the new VQ */ 266.macro sme_load_vq xvqminus1, xtmp, xtmp2 267 mrs_s \xtmp, SYS_SMCR_EL1 268 bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK 269 orr \xtmp2, \xtmp2, \xvqminus1 270 cmp \xtmp2, \xtmp 271 b.eq 921f 272 msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising 273921: 274.endm 275 276/* Preserve the first 128-bits of Znz and zero the rest. */ 277.macro _sve_flush_z nz 278 _sve_check_zreg \nz 279 mov v\nz\().16b, v\nz\().16b 280.endm 281 282.macro sve_flush_z 283 _for n, 0, 31, _sve_flush_z \n 284.endm 285.macro sve_flush_p 286 _for n, 0, 15, _sve_pfalse \n 287.endm 288.macro sve_flush_ffr 289 _sve_wrffr 0 290.endm 291 292.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp 293 _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 294 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 295 cbz \save_ffr, 921f 296 _sve_rdffr 0 297 _sve_str_p 0, \nxbase 298 _sve_ldr_p 0, \nxbase, -16 299 b 922f 300921: 301 str xzr, [x\nxbase] // Zero out FFR 302922: 303 mrs x\nxtmp, fpsr 304 str w\nxtmp, [\xpfpsr] 305 mrs x\nxtmp, fpcr 306 str w\nxtmp, [\xpfpsr, #4] 307.endm 308 309.macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp 310 _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 311 cbz \restore_ffr, 921f 312 _sve_ldr_p 0, \nxbase 313 _sve_wrffr 0 314921: 315 _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16 316 317 ldr w\nxtmp, [\xpfpsr] 318 msr fpsr, x\nxtmp 319 ldr w\nxtmp, [\xpfpsr, #4] 320 msr fpcr, x\nxtmp 321.endm 322 323.macro sme_save_za nxbase, xvl, nw 324 mov w\nw, #0 325 326423: 327 _sme_str_zav \nw, \nxbase 328 add x\nxbase, x\nxbase, \xvl 329 add x\nw, x\nw, #1 330 cmp \xvl, x\nw 331 bne 423b 332.endm 333 334.macro sme_load_za nxbase, xvl, nw 335 mov w\nw, #0 336 337423: 338 _sme_ldr_zav \nw, \nxbase 339 add x\nxbase, x\nxbase, \xvl 340 add x\nw, x\nw, #1 341 cmp \xvl, x\nw 342 bne 423b 343.endm