fpu.S (6776B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Author: Lu Zeng <zenglu@loongson.cn> 4 * Pei Huang <huangpei@loongson.cn> 5 * Huacai Chen <chenhuacai@loongson.cn> 6 * 7 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited 8 */ 9#include <asm/asm.h> 10#include <asm/asmmacro.h> 11#include <asm/asm-offsets.h> 12#include <asm/errno.h> 13#include <asm/export.h> 14#include <asm/fpregdef.h> 15#include <asm/loongarch.h> 16#include <asm/regdef.h> 17 18#define FPU_REG_WIDTH 8 19#define LSX_REG_WIDTH 16 20#define LASX_REG_WIDTH 32 21 22 .macro EX insn, reg, src, offs 23.ex\@: \insn \reg, \src, \offs 24 .section __ex_table,"a" 25 PTR .ex\@, fault 26 .previous 27 .endm 28 29 .macro sc_save_fp base 30 EX fst.d $f0, \base, (0 * FPU_REG_WIDTH) 31 EX fst.d $f1, \base, (1 * FPU_REG_WIDTH) 32 EX fst.d $f2, \base, (2 * FPU_REG_WIDTH) 33 EX fst.d $f3, \base, (3 * FPU_REG_WIDTH) 34 EX fst.d $f4, \base, (4 * FPU_REG_WIDTH) 35 EX fst.d $f5, \base, (5 * FPU_REG_WIDTH) 36 EX fst.d $f6, \base, (6 * FPU_REG_WIDTH) 37 EX fst.d $f7, \base, (7 * FPU_REG_WIDTH) 38 EX fst.d $f8, \base, (8 * FPU_REG_WIDTH) 39 EX fst.d $f9, \base, (9 * FPU_REG_WIDTH) 40 EX fst.d $f10, \base, (10 * FPU_REG_WIDTH) 41 EX fst.d $f11, \base, (11 * FPU_REG_WIDTH) 42 EX fst.d $f12, \base, (12 * FPU_REG_WIDTH) 43 EX fst.d $f13, \base, (13 * FPU_REG_WIDTH) 44 EX fst.d $f14, \base, (14 * FPU_REG_WIDTH) 45 EX fst.d $f15, \base, (15 * FPU_REG_WIDTH) 46 EX fst.d $f16, \base, (16 * FPU_REG_WIDTH) 47 EX fst.d $f17, \base, (17 * FPU_REG_WIDTH) 48 EX fst.d $f18, \base, (18 * FPU_REG_WIDTH) 49 EX fst.d $f19, \base, (19 * FPU_REG_WIDTH) 50 EX fst.d $f20, \base, (20 * FPU_REG_WIDTH) 51 EX fst.d $f21, \base, (21 * FPU_REG_WIDTH) 52 EX fst.d $f22, \base, (22 * FPU_REG_WIDTH) 53 EX fst.d $f23, \base, (23 * FPU_REG_WIDTH) 54 EX fst.d $f24, \base, (24 * FPU_REG_WIDTH) 55 EX fst.d $f25, \base, (25 * FPU_REG_WIDTH) 56 EX fst.d $f26, \base, (26 * FPU_REG_WIDTH) 57 EX fst.d $f27, \base, (27 * FPU_REG_WIDTH) 58 EX fst.d $f28, \base, (28 * FPU_REG_WIDTH) 59 EX fst.d $f29, \base, (29 * FPU_REG_WIDTH) 60 EX fst.d $f30, \base, (30 * FPU_REG_WIDTH) 61 EX fst.d $f31, \base, (31 * FPU_REG_WIDTH) 62 .endm 63 64 .macro sc_restore_fp base 65 EX fld.d $f0, \base, (0 * FPU_REG_WIDTH) 66 EX fld.d $f1, \base, (1 * FPU_REG_WIDTH) 67 EX fld.d $f2, \base, (2 * FPU_REG_WIDTH) 68 EX fld.d $f3, \base, (3 * FPU_REG_WIDTH) 69 EX fld.d $f4, \base, (4 * FPU_REG_WIDTH) 70 EX fld.d $f5, \base, (5 * FPU_REG_WIDTH) 71 EX fld.d $f6, \base, (6 * FPU_REG_WIDTH) 72 EX fld.d $f7, \base, (7 * FPU_REG_WIDTH) 73 EX fld.d $f8, \base, (8 * FPU_REG_WIDTH) 74 EX fld.d $f9, \base, (9 * FPU_REG_WIDTH) 75 EX fld.d $f10, \base, (10 * FPU_REG_WIDTH) 76 EX fld.d $f11, \base, (11 * FPU_REG_WIDTH) 77 EX fld.d $f12, \base, (12 * FPU_REG_WIDTH) 78 EX fld.d $f13, \base, (13 * FPU_REG_WIDTH) 79 EX fld.d $f14, \base, (14 * FPU_REG_WIDTH) 80 EX fld.d $f15, \base, (15 * FPU_REG_WIDTH) 81 EX fld.d $f16, \base, (16 * FPU_REG_WIDTH) 82 EX fld.d $f17, \base, (17 * FPU_REG_WIDTH) 83 EX fld.d $f18, \base, (18 * FPU_REG_WIDTH) 84 EX fld.d $f19, \base, (19 * FPU_REG_WIDTH) 85 EX fld.d $f20, \base, (20 * FPU_REG_WIDTH) 86 EX fld.d $f21, \base, (21 * FPU_REG_WIDTH) 87 EX fld.d $f22, \base, (22 * FPU_REG_WIDTH) 88 EX fld.d $f23, \base, (23 * FPU_REG_WIDTH) 89 EX fld.d $f24, \base, (24 * FPU_REG_WIDTH) 90 EX fld.d $f25, \base, (25 * FPU_REG_WIDTH) 91 EX fld.d $f26, \base, (26 * FPU_REG_WIDTH) 92 EX fld.d $f27, \base, (27 * FPU_REG_WIDTH) 93 EX fld.d $f28, \base, (28 * FPU_REG_WIDTH) 94 EX fld.d $f29, \base, (29 * FPU_REG_WIDTH) 95 EX fld.d $f30, \base, (30 * FPU_REG_WIDTH) 96 EX fld.d $f31, \base, (31 * FPU_REG_WIDTH) 97 .endm 98 99 .macro sc_save_fcc base, tmp0, tmp1 100 movcf2gr \tmp0, $fcc0 101 move \tmp1, \tmp0 102 movcf2gr \tmp0, $fcc1 103 bstrins.d \tmp1, \tmp0, 15, 8 104 movcf2gr \tmp0, $fcc2 105 bstrins.d \tmp1, \tmp0, 23, 16 106 movcf2gr \tmp0, $fcc3 107 bstrins.d \tmp1, \tmp0, 31, 24 108 movcf2gr \tmp0, $fcc4 109 bstrins.d \tmp1, \tmp0, 39, 32 110 movcf2gr \tmp0, $fcc5 111 bstrins.d \tmp1, \tmp0, 47, 40 112 movcf2gr \tmp0, $fcc6 113 bstrins.d \tmp1, \tmp0, 55, 48 114 movcf2gr \tmp0, $fcc7 115 bstrins.d \tmp1, \tmp0, 63, 56 116 EX st.d \tmp1, \base, 0 117 .endm 118 119 .macro sc_restore_fcc base, tmp0, tmp1 120 EX ld.d \tmp0, \base, 0 121 bstrpick.d \tmp1, \tmp0, 7, 0 122 movgr2cf $fcc0, \tmp1 123 bstrpick.d \tmp1, \tmp0, 15, 8 124 movgr2cf $fcc1, \tmp1 125 bstrpick.d \tmp1, \tmp0, 23, 16 126 movgr2cf $fcc2, \tmp1 127 bstrpick.d \tmp1, \tmp0, 31, 24 128 movgr2cf $fcc3, \tmp1 129 bstrpick.d \tmp1, \tmp0, 39, 32 130 movgr2cf $fcc4, \tmp1 131 bstrpick.d \tmp1, \tmp0, 47, 40 132 movgr2cf $fcc5, \tmp1 133 bstrpick.d \tmp1, \tmp0, 55, 48 134 movgr2cf $fcc6, \tmp1 135 bstrpick.d \tmp1, \tmp0, 63, 56 136 movgr2cf $fcc7, \tmp1 137 .endm 138 139 .macro sc_save_fcsr base, tmp0 140 movfcsr2gr \tmp0, fcsr0 141 EX st.w \tmp0, \base, 0 142 .endm 143 144 .macro sc_restore_fcsr base, tmp0 145 EX ld.w \tmp0, \base, 0 146 movgr2fcsr fcsr0, \tmp0 147 .endm 148 149/* 150 * Save a thread's fp context. 151 */ 152SYM_FUNC_START(_save_fp) 153 fpu_save_csr a0 t1 154 fpu_save_double a0 t1 # clobbers t1 155 fpu_save_cc a0 t1 t2 # clobbers t1, t2 156 jirl zero, ra, 0 157SYM_FUNC_END(_save_fp) 158EXPORT_SYMBOL(_save_fp) 159 160/* 161 * Restore a thread's fp context. 162 */ 163SYM_FUNC_START(_restore_fp) 164 fpu_restore_double a0 t1 # clobbers t1 165 fpu_restore_csr a0 t1 166 fpu_restore_cc a0 t1 t2 # clobbers t1, t2 167 jirl zero, ra, 0 168SYM_FUNC_END(_restore_fp) 169 170/* 171 * Load the FPU with signalling NANS. This bit pattern we're using has 172 * the property that no matter whether considered as single or as double 173 * precision represents signaling NANS. 174 * 175 * The value to initialize fcsr0 to comes in $a0. 176 */ 177 178SYM_FUNC_START(_init_fpu) 179 li.w t1, CSR_EUEN_FPEN 180 csrxchg t1, t1, LOONGARCH_CSR_EUEN 181 182 movgr2fcsr fcsr0, a0 183 184 li.w t1, -1 # SNaN 185 186 movgr2fr.d $f0, t1 187 movgr2fr.d $f1, t1 188 movgr2fr.d $f2, t1 189 movgr2fr.d $f3, t1 190 movgr2fr.d $f4, t1 191 movgr2fr.d $f5, t1 192 movgr2fr.d $f6, t1 193 movgr2fr.d $f7, t1 194 movgr2fr.d $f8, t1 195 movgr2fr.d $f9, t1 196 movgr2fr.d $f10, t1 197 movgr2fr.d $f11, t1 198 movgr2fr.d $f12, t1 199 movgr2fr.d $f13, t1 200 movgr2fr.d $f14, t1 201 movgr2fr.d $f15, t1 202 movgr2fr.d $f16, t1 203 movgr2fr.d $f17, t1 204 movgr2fr.d $f18, t1 205 movgr2fr.d $f19, t1 206 movgr2fr.d $f20, t1 207 movgr2fr.d $f21, t1 208 movgr2fr.d $f22, t1 209 movgr2fr.d $f23, t1 210 movgr2fr.d $f24, t1 211 movgr2fr.d $f25, t1 212 movgr2fr.d $f26, t1 213 movgr2fr.d $f27, t1 214 movgr2fr.d $f28, t1 215 movgr2fr.d $f29, t1 216 movgr2fr.d $f30, t1 217 movgr2fr.d $f31, t1 218 219 jirl zero, ra, 0 220SYM_FUNC_END(_init_fpu) 221 222/* 223 * a0: fpregs 224 * a1: fcc 225 * a2: fcsr 226 */ 227SYM_FUNC_START(_save_fp_context) 228 sc_save_fcc a1 t1 t2 229 sc_save_fcsr a2 t1 230 sc_save_fp a0 231 li.w a0, 0 # success 232 jirl zero, ra, 0 233SYM_FUNC_END(_save_fp_context) 234 235/* 236 * a0: fpregs 237 * a1: fcc 238 * a2: fcsr 239 */ 240SYM_FUNC_START(_restore_fp_context) 241 sc_restore_fp a0 242 sc_restore_fcc a1 t1 t2 243 sc_restore_fcsr a2 t1 244 li.w a0, 0 # success 245 jirl zero, ra, 0 246SYM_FUNC_END(_restore_fp_context) 247 248SYM_FUNC_START(fault) 249 li.w a0, -EFAULT # failure 250 jirl zero, ra, 0 251SYM_FUNC_END(fault)