vecemu.c (8781B)
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Routines to emulate some Altivec/VMX instructions, specifically 4 * those that can trap when given denormalized operands in Java mode. 5 */ 6#include <linux/kernel.h> 7#include <linux/errno.h> 8#include <linux/sched.h> 9#include <asm/ptrace.h> 10#include <asm/processor.h> 11#include <asm/switch_to.h> 12#include <linux/uaccess.h> 13#include <asm/inst.h> 14 15/* Functions in vector.S */ 16extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); 17extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); 18extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); 19extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); 20extern void vrefp(vector128 *dst, vector128 *src); 21extern void vrsqrtefp(vector128 *dst, vector128 *src); 22extern void vexptep(vector128 *dst, vector128 *src); 23 24static unsigned int exp2s[8] = { 25 0x800000, 26 0x8b95c2, 27 0x9837f0, 28 0xa5fed7, 29 0xb504f3, 30 0xc5672a, 31 0xd744fd, 32 0xeac0c7 33}; 34 35/* 36 * Computes an estimate of 2^x. The `s' argument is the 32-bit 37 * single-precision floating-point representation of x. 38 */ 39static unsigned int eexp2(unsigned int s) 40{ 41 int exp, pwr; 42 unsigned int mant, frac; 43 44 /* extract exponent field from input */ 45 exp = ((s >> 23) & 0xff) - 127; 46 if (exp > 7) { 47 /* check for NaN input */ 48 if (exp == 128 && (s & 0x7fffff) != 0) 49 return s | 0x400000; /* return QNaN */ 50 /* 2^-big = 0, 2^+big = +Inf */ 51 return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ 52 } 53 if (exp < -23) 54 return 0x3f800000; /* 1.0 */ 55 56 /* convert to fixed point integer in 9.23 representation */ 57 pwr = (s & 0x7fffff) | 0x800000; 58 if (exp > 0) 59 pwr <<= exp; 60 else 61 pwr >>= -exp; 62 if (s & 0x80000000) 63 pwr = -pwr; 64 65 /* extract integer part, which becomes exponent part of result */ 66 exp = (pwr >> 23) + 126; 67 if (exp >= 254) 68 return 0x7f800000; 69 if (exp < -23) 70 return 0; 71 72 /* table lookup on top 3 bits of fraction to get mantissa */ 73 mant = exp2s[(pwr >> 20) & 7]; 74 75 /* linear interpolation using remaining 20 bits of fraction */ 76 asm("mulhwu %0,%1,%2" : "=r" (frac) 77 : "r" (pwr << 12), "r" (0x172b83ff)); 78 asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); 79 mant += frac; 80 81 if (exp >= 0) 82 return mant + (exp << 23); 83 84 /* denormalized result */ 85 exp = -exp; 86 mant += 1 << (exp - 1); 87 return mant >> exp; 88} 89 90/* 91 * Computes an estimate of log_2(x). The `s' argument is the 32-bit 92 * single-precision floating-point representation of x. 93 */ 94static unsigned int elog2(unsigned int s) 95{ 96 int exp, mant, lz, frac; 97 98 exp = s & 0x7f800000; 99 mant = s & 0x7fffff; 100 if (exp == 0x7f800000) { /* Inf or NaN */ 101 if (mant != 0) 102 s |= 0x400000; /* turn NaN into QNaN */ 103 return s; 104 } 105 if ((exp | mant) == 0) /* +0 or -0 */ 106 return 0xff800000; /* return -Inf */ 107 108 if (exp == 0) { 109 /* denormalized */ 110 asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); 111 mant <<= lz - 8; 112 exp = (-118 - lz) << 23; 113 } else { 114 mant |= 0x800000; 115 exp -= 127 << 23; 116 } 117 118 if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ 119 exp |= 0x400000; /* 0.5 * 2^23 */ 120 asm("mulhwu %0,%1,%2" : "=r" (mant) 121 : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ 122 } 123 if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ 124 exp |= 0x200000; /* 0.25 * 2^23 */ 125 asm("mulhwu %0,%1,%2" : "=r" (mant) 126 : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ 127 } 128 if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ 129 exp |= 0x100000; /* 0.125 * 2^23 */ 130 asm("mulhwu %0,%1,%2" : "=r" (mant) 131 : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ 132 } 133 if (mant > 0x800000) { /* 1.0 * 2^23 */ 134 /* calculate (mant - 1) * 1.381097463 */ 135 /* 1.381097463 == 0.125 / (2^0.125 - 1) */ 136 asm("mulhwu %0,%1,%2" : "=r" (frac) 137 : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); 138 exp += frac; 139 } 140 s = exp & 0x80000000; 141 if (exp != 0) { 142 if (s) 143 exp = -exp; 144 asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); 145 lz = 8 - lz; 146 if (lz > 0) 147 exp >>= lz; 148 else if (lz < 0) 149 exp <<= -lz; 150 s += ((lz + 126) << 23) + exp; 151 } 152 return s; 153} 154 155#define VSCR_SAT 1 156 157static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) 158{ 159 int exp, mant; 160 161 exp = (x >> 23) & 0xff; 162 mant = x & 0x7fffff; 163 if (exp == 255 && mant != 0) 164 return 0; /* NaN -> 0 */ 165 exp = exp - 127 + scale; 166 if (exp < 0) 167 return 0; /* round towards zero */ 168 if (exp >= 31) { 169 /* saturate, unless the result would be -2^31 */ 170 if (x + (scale << 23) != 0xcf000000) 171 *vscrp |= VSCR_SAT; 172 return (x & 0x80000000)? 0x80000000: 0x7fffffff; 173 } 174 mant |= 0x800000; 175 mant = (mant << 7) >> (30 - exp); 176 return (x & 0x80000000)? -mant: mant; 177} 178 179static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) 180{ 181 int exp; 182 unsigned int mant; 183 184 exp = (x >> 23) & 0xff; 185 mant = x & 0x7fffff; 186 if (exp == 255 && mant != 0) 187 return 0; /* NaN -> 0 */ 188 exp = exp - 127 + scale; 189 if (exp < 0) 190 return 0; /* round towards zero */ 191 if (x & 0x80000000) { 192 /* negative => saturate to 0 */ 193 *vscrp |= VSCR_SAT; 194 return 0; 195 } 196 if (exp >= 32) { 197 /* saturate */ 198 *vscrp |= VSCR_SAT; 199 return 0xffffffff; 200 } 201 mant |= 0x800000; 202 mant = (mant << 8) >> (31 - exp); 203 return mant; 204} 205 206/* Round to floating integer, towards 0 */ 207static unsigned int rfiz(unsigned int x) 208{ 209 int exp; 210 211 exp = ((x >> 23) & 0xff) - 127; 212 if (exp == 128 && (x & 0x7fffff) != 0) 213 return x | 0x400000; /* NaN -> make it a QNaN */ 214 if (exp >= 23) 215 return x; /* it's an integer already (or Inf) */ 216 if (exp < 0) 217 return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ 218 return x & ~(0x7fffff >> exp); 219} 220 221/* Round to floating integer, towards +/- Inf */ 222static unsigned int rfii(unsigned int x) 223{ 224 int exp, mask; 225 226 exp = ((x >> 23) & 0xff) - 127; 227 if (exp == 128 && (x & 0x7fffff) != 0) 228 return x | 0x400000; /* NaN -> make it a QNaN */ 229 if (exp >= 23) 230 return x; /* it's an integer already (or Inf) */ 231 if ((x & 0x7fffffff) == 0) 232 return x; /* +/-0 -> +/-0 */ 233 if (exp < 0) 234 /* 0 < |x| < 1.0 rounds to +/- 1.0 */ 235 return (x & 0x80000000) | 0x3f800000; 236 mask = 0x7fffff >> exp; 237 /* mantissa overflows into exponent - that's OK, 238 it can't overflow into the sign bit */ 239 return (x + mask) & ~mask; 240} 241 242/* Round to floating integer, to nearest */ 243static unsigned int rfin(unsigned int x) 244{ 245 int exp, half; 246 247 exp = ((x >> 23) & 0xff) - 127; 248 if (exp == 128 && (x & 0x7fffff) != 0) 249 return x | 0x400000; /* NaN -> make it a QNaN */ 250 if (exp >= 23) 251 return x; /* it's an integer already (or Inf) */ 252 if (exp < -1) 253 return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ 254 if (exp == -1) 255 /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ 256 return (x & 0x80000000) | 0x3f800000; 257 half = 0x400000 >> exp; 258 /* add 0.5 to the magnitude and chop off the fraction bits */ 259 return (x + half) & ~(0x7fffff >> exp); 260} 261 262int emulate_altivec(struct pt_regs *regs) 263{ 264 ppc_inst_t instr; 265 unsigned int i, word; 266 unsigned int va, vb, vc, vd; 267 vector128 *vrs; 268 269 if (get_user_instr(instr, (void __user *)regs->nip)) 270 return -EFAULT; 271 272 word = ppc_inst_val(instr); 273 if (ppc_inst_primary_opcode(instr) != 4) 274 return -EINVAL; /* not an altivec instruction */ 275 vd = (word >> 21) & 0x1f; 276 va = (word >> 16) & 0x1f; 277 vb = (word >> 11) & 0x1f; 278 vc = (word >> 6) & 0x1f; 279 280 vrs = current->thread.vr_state.vr; 281 switch (word & 0x3f) { 282 case 10: 283 switch (vc) { 284 case 0: /* vaddfp */ 285 vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); 286 break; 287 case 1: /* vsubfp */ 288 vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); 289 break; 290 case 4: /* vrefp */ 291 vrefp(&vrs[vd], &vrs[vb]); 292 break; 293 case 5: /* vrsqrtefp */ 294 vrsqrtefp(&vrs[vd], &vrs[vb]); 295 break; 296 case 6: /* vexptefp */ 297 for (i = 0; i < 4; ++i) 298 vrs[vd].u[i] = eexp2(vrs[vb].u[i]); 299 break; 300 case 7: /* vlogefp */ 301 for (i = 0; i < 4; ++i) 302 vrs[vd].u[i] = elog2(vrs[vb].u[i]); 303 break; 304 case 8: /* vrfin */ 305 for (i = 0; i < 4; ++i) 306 vrs[vd].u[i] = rfin(vrs[vb].u[i]); 307 break; 308 case 9: /* vrfiz */ 309 for (i = 0; i < 4; ++i) 310 vrs[vd].u[i] = rfiz(vrs[vb].u[i]); 311 break; 312 case 10: /* vrfip */ 313 for (i = 0; i < 4; ++i) { 314 u32 x = vrs[vb].u[i]; 315 x = (x & 0x80000000)? rfiz(x): rfii(x); 316 vrs[vd].u[i] = x; 317 } 318 break; 319 case 11: /* vrfim */ 320 for (i = 0; i < 4; ++i) { 321 u32 x = vrs[vb].u[i]; 322 x = (x & 0x80000000)? rfii(x): rfiz(x); 323 vrs[vd].u[i] = x; 324 } 325 break; 326 case 14: /* vctuxs */ 327 for (i = 0; i < 4; ++i) 328 vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, 329 ¤t->thread.vr_state.vscr.u[3]); 330 break; 331 case 15: /* vctsxs */ 332 for (i = 0; i < 4; ++i) 333 vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, 334 ¤t->thread.vr_state.vscr.u[3]); 335 break; 336 default: 337 return -EINVAL; 338 } 339 break; 340 case 46: /* vmaddfp */ 341 vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); 342 break; 343 case 47: /* vnmsubfp */ 344 vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); 345 break; 346 default: 347 return -EINVAL; 348 } 349 350 return 0; 351}