vfpdouble.c (29217B)
1/* 2 * linux/arch/arm/vfp/vfpdouble.c 3 * 4 * This code is derived in part from John R. Housers softfloat library, which 5 * carries the following notice: 6 * 7 * =========================================================================== 8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point 9 * Arithmetic Package, Release 2. 10 * 11 * Written by John R. Hauser. This work was made possible in part by the 12 * International Computer Science Institute, located at Suite 600, 1947 Center 13 * Street, Berkeley, California 94704. Funding was partially provided by the 14 * National Science Foundation under grant MIP-9311980. The original version 15 * of this code was written as part of a project to build a fixed-point vector 16 * processor in collaboration with the University of California at Berkeley, 17 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information 18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 19 * arithmetic/softfloat.html'. 20 * 21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 23 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 26 * 27 * Derivative works are acceptable, even for commercial purposes, so long as 28 * (1) they include prominent notice that the work is derivative, and (2) they 29 * include prominent notice akin to these three paragraphs for those parts of 30 * this code that are retained. 31 * =========================================================================== 32 */ 33#include <linux/kernel.h> 34#include <linux/bitops.h> 35 36#include <asm/div64.h> 37#include <asm/vfp.h> 38 39#include "vfpinstr.h" 40#include "vfp.h" 41 42static struct vfp_double vfp_double_default_qnan = { 43 .exponent = 2047, 44 .sign = 0, 45 .significand = VFP_DOUBLE_SIGNIFICAND_QNAN, 46}; 47 48static void vfp_double_dump(const char *str, struct vfp_double *d) 49{ 50 pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n", 51 str, d->sign != 0, d->exponent, d->significand); 52} 53 54static void vfp_double_normalise_denormal(struct vfp_double *vd) 55{ 56 int bits = 31 - fls(vd->significand >> 32); 57 if (bits == 31) 58 bits = 63 - fls(vd->significand); 59 60 vfp_double_dump("normalise_denormal: in", vd); 61 62 if (bits) { 63 vd->exponent -= bits - 1; 64 vd->significand <<= bits; 65 } 66 67 vfp_double_dump("normalise_denormal: out", vd); 68} 69 70u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) 71{ 72 u64 significand, incr; 73 int exponent, shift, underflow; 74 u32 rmode; 75 76 vfp_double_dump("pack: in", vd); 77 78 /* 79 * Infinities and NaNs are a special case. 80 */ 81 if (vd->exponent == 2047 && (vd->significand == 0 || exceptions)) 82 goto pack; 83 84 /* 85 * Special-case zero. 86 */ 87 if (vd->significand == 0) { 88 vd->exponent = 0; 89 goto pack; 90 } 91 92 exponent = vd->exponent; 93 significand = vd->significand; 94 95 shift = 32 - fls(significand >> 32); 96 if (shift == 32) 97 shift = 64 - fls(significand); 98 if (shift) { 99 exponent -= shift; 100 significand <<= shift; 101 } 102 103#ifdef DEBUG 104 vd->exponent = exponent; 105 vd->significand = significand; 106 vfp_double_dump("pack: normalised", vd); 107#endif 108 109 /* 110 * Tiny number? 111 */ 112 underflow = exponent < 0; 113 if (underflow) { 114 significand = vfp_shiftright64jamming(significand, -exponent); 115 exponent = 0; 116#ifdef DEBUG 117 vd->exponent = exponent; 118 vd->significand = significand; 119 vfp_double_dump("pack: tiny number", vd); 120#endif 121 if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1))) 122 underflow = 0; 123 } 124 125 /* 126 * Select rounding increment. 127 */ 128 incr = 0; 129 rmode = fpscr & FPSCR_RMODE_MASK; 130 131 if (rmode == FPSCR_ROUND_NEAREST) { 132 incr = 1ULL << VFP_DOUBLE_LOW_BITS; 133 if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0) 134 incr -= 1; 135 } else if (rmode == FPSCR_ROUND_TOZERO) { 136 incr = 0; 137 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0)) 138 incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1; 139 140 pr_debug("VFP: rounding increment = 0x%08llx\n", incr); 141 142 /* 143 * Is our rounding going to overflow? 144 */ 145 if ((significand + incr) < significand) { 146 exponent += 1; 147 significand = (significand >> 1) | (significand & 1); 148 incr >>= 1; 149#ifdef DEBUG 150 vd->exponent = exponent; 151 vd->significand = significand; 152 vfp_double_dump("pack: overflow", vd); 153#endif 154 } 155 156 /* 157 * If any of the low bits (which will be shifted out of the 158 * number) are non-zero, the result is inexact. 159 */ 160 if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1)) 161 exceptions |= FPSCR_IXC; 162 163 /* 164 * Do our rounding. 165 */ 166 significand += incr; 167 168 /* 169 * Infinity? 170 */ 171 if (exponent >= 2046) { 172 exceptions |= FPSCR_OFC | FPSCR_IXC; 173 if (incr == 0) { 174 vd->exponent = 2045; 175 vd->significand = 0x7fffffffffffffffULL; 176 } else { 177 vd->exponent = 2047; /* infinity */ 178 vd->significand = 0; 179 } 180 } else { 181 if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0) 182 exponent = 0; 183 if (exponent || significand > 0x8000000000000000ULL) 184 underflow = 0; 185 if (underflow) 186 exceptions |= FPSCR_UFC; 187 vd->exponent = exponent; 188 vd->significand = significand >> 1; 189 } 190 191 pack: 192 vfp_double_dump("pack: final", vd); 193 { 194 s64 d = vfp_double_pack(vd); 195 pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func, 196 dd, d, exceptions); 197 vfp_put_double(d, dd); 198 } 199 return exceptions; 200} 201 202/* 203 * Propagate the NaN, setting exceptions if it is signalling. 204 * 'n' is always a NaN. 'm' may be a number, NaN or infinity. 205 */ 206static u32 207vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn, 208 struct vfp_double *vdm, u32 fpscr) 209{ 210 struct vfp_double *nan; 211 int tn, tm = 0; 212 213 tn = vfp_double_type(vdn); 214 215 if (vdm) 216 tm = vfp_double_type(vdm); 217 218 if (fpscr & FPSCR_DEFAULT_NAN) 219 /* 220 * Default NaN mode - always returns a quiet NaN 221 */ 222 nan = &vfp_double_default_qnan; 223 else { 224 /* 225 * Contemporary mode - select the first signalling 226 * NAN, or if neither are signalling, the first 227 * quiet NAN. 228 */ 229 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) 230 nan = vdn; 231 else 232 nan = vdm; 233 /* 234 * Make the NaN quiet. 235 */ 236 nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; 237 } 238 239 *vdd = *nan; 240 241 /* 242 * If one was a signalling NAN, raise invalid operation. 243 */ 244 return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; 245} 246 247/* 248 * Extended operations 249 */ 250static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr) 251{ 252 vfp_put_double(vfp_double_packed_abs(vfp_get_double(dm)), dd); 253 return 0; 254} 255 256static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr) 257{ 258 vfp_put_double(vfp_get_double(dm), dd); 259 return 0; 260} 261 262static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr) 263{ 264 vfp_put_double(vfp_double_packed_negate(vfp_get_double(dm)), dd); 265 return 0; 266} 267 268static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr) 269{ 270 struct vfp_double vdm, vdd; 271 int ret, tm; 272 273 vfp_double_unpack(&vdm, vfp_get_double(dm)); 274 tm = vfp_double_type(&vdm); 275 if (tm & (VFP_NAN|VFP_INFINITY)) { 276 struct vfp_double *vdp = &vdd; 277 278 if (tm & VFP_NAN) 279 ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr); 280 else if (vdm.sign == 0) { 281 sqrt_copy: 282 vdp = &vdm; 283 ret = 0; 284 } else { 285 sqrt_invalid: 286 vdp = &vfp_double_default_qnan; 287 ret = FPSCR_IOC; 288 } 289 vfp_put_double(vfp_double_pack(vdp), dd); 290 return ret; 291 } 292 293 /* 294 * sqrt(+/- 0) == +/- 0 295 */ 296 if (tm & VFP_ZERO) 297 goto sqrt_copy; 298 299 /* 300 * Normalise a denormalised number 301 */ 302 if (tm & VFP_DENORMAL) 303 vfp_double_normalise_denormal(&vdm); 304 305 /* 306 * sqrt(<0) = invalid 307 */ 308 if (vdm.sign) 309 goto sqrt_invalid; 310 311 vfp_double_dump("sqrt", &vdm); 312 313 /* 314 * Estimate the square root. 315 */ 316 vdd.sign = 0; 317 vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023; 318 vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31; 319 320 vfp_double_dump("sqrt estimate1", &vdd); 321 322 vdm.significand >>= 1 + (vdm.exponent & 1); 323 vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand); 324 325 vfp_double_dump("sqrt estimate2", &vdd); 326 327 /* 328 * And now adjust. 329 */ 330 if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) { 331 if (vdd.significand < 2) { 332 vdd.significand = ~0ULL; 333 } else { 334 u64 termh, terml, remh, reml; 335 vdm.significand <<= 2; 336 mul64to128(&termh, &terml, vdd.significand, vdd.significand); 337 sub128(&remh, &reml, vdm.significand, 0, termh, terml); 338 while ((s64)remh < 0) { 339 vdd.significand -= 1; 340 shift64left(&termh, &terml, vdd.significand); 341 terml |= 1; 342 add128(&remh, &reml, remh, reml, termh, terml); 343 } 344 vdd.significand |= (remh | reml) != 0; 345 } 346 } 347 vdd.significand = vfp_shiftright64jamming(vdd.significand, 1); 348 349 return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt"); 350} 351 352/* 353 * Equal := ZC 354 * Less than := N 355 * Greater than := C 356 * Unordered := CV 357 */ 358static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr) 359{ 360 s64 d, m; 361 u32 ret = 0; 362 363 m = vfp_get_double(dm); 364 if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { 365 ret |= FPSCR_C | FPSCR_V; 366 if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) 367 /* 368 * Signalling NaN, or signalling on quiet NaN 369 */ 370 ret |= FPSCR_IOC; 371 } 372 373 d = vfp_get_double(dd); 374 if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { 375 ret |= FPSCR_C | FPSCR_V; 376 if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) 377 /* 378 * Signalling NaN, or signalling on quiet NaN 379 */ 380 ret |= FPSCR_IOC; 381 } 382 383 if (ret == 0) { 384 if (d == m || vfp_double_packed_abs(d | m) == 0) { 385 /* 386 * equal 387 */ 388 ret |= FPSCR_Z | FPSCR_C; 389 } else if (vfp_double_packed_sign(d ^ m)) { 390 /* 391 * different signs 392 */ 393 if (vfp_double_packed_sign(d)) 394 /* 395 * d is negative, so d < m 396 */ 397 ret |= FPSCR_N; 398 else 399 /* 400 * d is positive, so d > m 401 */ 402 ret |= FPSCR_C; 403 } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { 404 /* 405 * d < m 406 */ 407 ret |= FPSCR_N; 408 } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { 409 /* 410 * d > m 411 */ 412 ret |= FPSCR_C; 413 } 414 } 415 416 return ret; 417} 418 419static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr) 420{ 421 return vfp_compare(dd, 0, dm, fpscr); 422} 423 424static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr) 425{ 426 return vfp_compare(dd, 1, dm, fpscr); 427} 428 429static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr) 430{ 431 return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr); 432} 433 434static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr) 435{ 436 return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr); 437} 438 439static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr) 440{ 441 struct vfp_double vdm; 442 struct vfp_single vsd; 443 int tm; 444 u32 exceptions = 0; 445 446 vfp_double_unpack(&vdm, vfp_get_double(dm)); 447 448 tm = vfp_double_type(&vdm); 449 450 /* 451 * If we have a signalling NaN, signal invalid operation. 452 */ 453 if (tm == VFP_SNAN) 454 exceptions = FPSCR_IOC; 455 456 if (tm & VFP_DENORMAL) 457 vfp_double_normalise_denormal(&vdm); 458 459 vsd.sign = vdm.sign; 460 vsd.significand = vfp_hi64to32jamming(vdm.significand); 461 462 /* 463 * If we have an infinity or a NaN, the exponent must be 255 464 */ 465 if (tm & (VFP_INFINITY|VFP_NAN)) { 466 vsd.exponent = 255; 467 if (tm == VFP_QNAN) 468 vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN; 469 goto pack_nan; 470 } else if (tm & VFP_ZERO) 471 vsd.exponent = 0; 472 else 473 vsd.exponent = vdm.exponent - (1023 - 127); 474 475 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts"); 476 477 pack_nan: 478 vfp_put_float(vfp_single_pack(&vsd), sd); 479 return exceptions; 480} 481 482static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr) 483{ 484 struct vfp_double vdm; 485 u32 m = vfp_get_float(dm); 486 487 vdm.sign = 0; 488 vdm.exponent = 1023 + 63 - 1; 489 vdm.significand = (u64)m; 490 491 return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito"); 492} 493 494static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr) 495{ 496 struct vfp_double vdm; 497 u32 m = vfp_get_float(dm); 498 499 vdm.sign = (m & 0x80000000) >> 16; 500 vdm.exponent = 1023 + 63 - 1; 501 vdm.significand = vdm.sign ? -m : m; 502 503 return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito"); 504} 505 506static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr) 507{ 508 struct vfp_double vdm; 509 u32 d, exceptions = 0; 510 int rmode = fpscr & FPSCR_RMODE_MASK; 511 int tm; 512 513 vfp_double_unpack(&vdm, vfp_get_double(dm)); 514 515 /* 516 * Do we have a denormalised number? 517 */ 518 tm = vfp_double_type(&vdm); 519 if (tm & VFP_DENORMAL) 520 exceptions |= FPSCR_IDC; 521 522 if (tm & VFP_NAN) 523 vdm.sign = 0; 524 525 if (vdm.exponent >= 1023 + 32) { 526 d = vdm.sign ? 0 : 0xffffffff; 527 exceptions = FPSCR_IOC; 528 } else if (vdm.exponent >= 1023 - 1) { 529 int shift = 1023 + 63 - vdm.exponent; 530 u64 rem, incr = 0; 531 532 /* 533 * 2^0 <= m < 2^32-2^8 534 */ 535 d = (vdm.significand << 1) >> shift; 536 rem = vdm.significand << (65 - shift); 537 538 if (rmode == FPSCR_ROUND_NEAREST) { 539 incr = 0x8000000000000000ULL; 540 if ((d & 1) == 0) 541 incr -= 1; 542 } else if (rmode == FPSCR_ROUND_TOZERO) { 543 incr = 0; 544 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { 545 incr = ~0ULL; 546 } 547 548 if ((rem + incr) < rem) { 549 if (d < 0xffffffff) 550 d += 1; 551 else 552 exceptions |= FPSCR_IOC; 553 } 554 555 if (d && vdm.sign) { 556 d = 0; 557 exceptions |= FPSCR_IOC; 558 } else if (rem) 559 exceptions |= FPSCR_IXC; 560 } else { 561 d = 0; 562 if (vdm.exponent | vdm.significand) { 563 exceptions |= FPSCR_IXC; 564 if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) 565 d = 1; 566 else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) { 567 d = 0; 568 exceptions |= FPSCR_IOC; 569 } 570 } 571 } 572 573 pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 574 575 vfp_put_float(d, sd); 576 577 return exceptions; 578} 579 580static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr) 581{ 582 return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO); 583} 584 585static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr) 586{ 587 struct vfp_double vdm; 588 u32 d, exceptions = 0; 589 int rmode = fpscr & FPSCR_RMODE_MASK; 590 int tm; 591 592 vfp_double_unpack(&vdm, vfp_get_double(dm)); 593 vfp_double_dump("VDM", &vdm); 594 595 /* 596 * Do we have denormalised number? 597 */ 598 tm = vfp_double_type(&vdm); 599 if (tm & VFP_DENORMAL) 600 exceptions |= FPSCR_IDC; 601 602 if (tm & VFP_NAN) { 603 d = 0; 604 exceptions |= FPSCR_IOC; 605 } else if (vdm.exponent >= 1023 + 32) { 606 d = 0x7fffffff; 607 if (vdm.sign) 608 d = ~d; 609 exceptions |= FPSCR_IOC; 610 } else if (vdm.exponent >= 1023 - 1) { 611 int shift = 1023 + 63 - vdm.exponent; /* 58 */ 612 u64 rem, incr = 0; 613 614 d = (vdm.significand << 1) >> shift; 615 rem = vdm.significand << (65 - shift); 616 617 if (rmode == FPSCR_ROUND_NEAREST) { 618 incr = 0x8000000000000000ULL; 619 if ((d & 1) == 0) 620 incr -= 1; 621 } else if (rmode == FPSCR_ROUND_TOZERO) { 622 incr = 0; 623 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { 624 incr = ~0ULL; 625 } 626 627 if ((rem + incr) < rem && d < 0xffffffff) 628 d += 1; 629 if (d > 0x7fffffff + (vdm.sign != 0)) { 630 d = 0x7fffffff + (vdm.sign != 0); 631 exceptions |= FPSCR_IOC; 632 } else if (rem) 633 exceptions |= FPSCR_IXC; 634 635 if (vdm.sign) 636 d = -d; 637 } else { 638 d = 0; 639 if (vdm.exponent | vdm.significand) { 640 exceptions |= FPSCR_IXC; 641 if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) 642 d = 1; 643 else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) 644 d = -1; 645 } 646 } 647 648 pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 649 650 vfp_put_float((s32)d, sd); 651 652 return exceptions; 653} 654 655static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr) 656{ 657 return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO); 658} 659 660 661static struct op fops_ext[32] = { 662 [FEXT_TO_IDX(FEXT_FCPY)] = { vfp_double_fcpy, 0 }, 663 [FEXT_TO_IDX(FEXT_FABS)] = { vfp_double_fabs, 0 }, 664 [FEXT_TO_IDX(FEXT_FNEG)] = { vfp_double_fneg, 0 }, 665 [FEXT_TO_IDX(FEXT_FSQRT)] = { vfp_double_fsqrt, 0 }, 666 [FEXT_TO_IDX(FEXT_FCMP)] = { vfp_double_fcmp, OP_SCALAR }, 667 [FEXT_TO_IDX(FEXT_FCMPE)] = { vfp_double_fcmpe, OP_SCALAR }, 668 [FEXT_TO_IDX(FEXT_FCMPZ)] = { vfp_double_fcmpz, OP_SCALAR }, 669 [FEXT_TO_IDX(FEXT_FCMPEZ)] = { vfp_double_fcmpez, OP_SCALAR }, 670 [FEXT_TO_IDX(FEXT_FCVT)] = { vfp_double_fcvts, OP_SCALAR|OP_SD }, 671 [FEXT_TO_IDX(FEXT_FUITO)] = { vfp_double_fuito, OP_SCALAR|OP_SM }, 672 [FEXT_TO_IDX(FEXT_FSITO)] = { vfp_double_fsito, OP_SCALAR|OP_SM }, 673 [FEXT_TO_IDX(FEXT_FTOUI)] = { vfp_double_ftoui, OP_SCALAR|OP_SD }, 674 [FEXT_TO_IDX(FEXT_FTOUIZ)] = { vfp_double_ftouiz, OP_SCALAR|OP_SD }, 675 [FEXT_TO_IDX(FEXT_FTOSI)] = { vfp_double_ftosi, OP_SCALAR|OP_SD }, 676 [FEXT_TO_IDX(FEXT_FTOSIZ)] = { vfp_double_ftosiz, OP_SCALAR|OP_SD }, 677}; 678 679 680 681 682static u32 683vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn, 684 struct vfp_double *vdm, u32 fpscr) 685{ 686 struct vfp_double *vdp; 687 u32 exceptions = 0; 688 int tn, tm; 689 690 tn = vfp_double_type(vdn); 691 tm = vfp_double_type(vdm); 692 693 if (tn & tm & VFP_INFINITY) { 694 /* 695 * Two infinities. Are they different signs? 696 */ 697 if (vdn->sign ^ vdm->sign) { 698 /* 699 * different signs -> invalid 700 */ 701 exceptions = FPSCR_IOC; 702 vdp = &vfp_double_default_qnan; 703 } else { 704 /* 705 * same signs -> valid 706 */ 707 vdp = vdn; 708 } 709 } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { 710 /* 711 * One infinity and one number -> infinity 712 */ 713 vdp = vdn; 714 } else { 715 /* 716 * 'n' is a NaN of some type 717 */ 718 return vfp_propagate_nan(vdd, vdn, vdm, fpscr); 719 } 720 *vdd = *vdp; 721 return exceptions; 722} 723 724static u32 725vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, 726 struct vfp_double *vdm, u32 fpscr) 727{ 728 u32 exp_diff; 729 u64 m_sig; 730 731 if (vdn->significand & (1ULL << 63) || 732 vdm->significand & (1ULL << 63)) { 733 pr_info("VFP: bad FP values in %s\n", __func__); 734 vfp_double_dump("VDN", vdn); 735 vfp_double_dump("VDM", vdm); 736 } 737 738 /* 739 * Ensure that 'n' is the largest magnitude number. Note that 740 * if 'n' and 'm' have equal exponents, we do not swap them. 741 * This ensures that NaN propagation works correctly. 742 */ 743 if (vdn->exponent < vdm->exponent) { 744 struct vfp_double *t = vdn; 745 vdn = vdm; 746 vdm = t; 747 } 748 749 /* 750 * Is 'n' an infinity or a NaN? Note that 'm' may be a number, 751 * infinity or a NaN here. 752 */ 753 if (vdn->exponent == 2047) 754 return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr); 755 756 /* 757 * We have two proper numbers, where 'vdn' is the larger magnitude. 758 * 759 * Copy 'n' to 'd' before doing the arithmetic. 760 */ 761 *vdd = *vdn; 762 763 /* 764 * Align 'm' with the result. 765 */ 766 exp_diff = vdn->exponent - vdm->exponent; 767 m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff); 768 769 /* 770 * If the signs are different, we are really subtracting. 771 */ 772 if (vdn->sign ^ vdm->sign) { 773 m_sig = vdn->significand - m_sig; 774 if ((s64)m_sig < 0) { 775 vdd->sign = vfp_sign_negate(vdd->sign); 776 m_sig = -m_sig; 777 } else if (m_sig == 0) { 778 vdd->sign = (fpscr & FPSCR_RMODE_MASK) == 779 FPSCR_ROUND_MINUSINF ? 0x8000 : 0; 780 } 781 } else { 782 m_sig += vdn->significand; 783 } 784 vdd->significand = m_sig; 785 786 return 0; 787} 788 789static u32 790vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, 791 struct vfp_double *vdm, u32 fpscr) 792{ 793 vfp_double_dump("VDN", vdn); 794 vfp_double_dump("VDM", vdm); 795 796 /* 797 * Ensure that 'n' is the largest magnitude number. Note that 798 * if 'n' and 'm' have equal exponents, we do not swap them. 799 * This ensures that NaN propagation works correctly. 800 */ 801 if (vdn->exponent < vdm->exponent) { 802 struct vfp_double *t = vdn; 803 vdn = vdm; 804 vdm = t; 805 pr_debug("VFP: swapping M <-> N\n"); 806 } 807 808 vdd->sign = vdn->sign ^ vdm->sign; 809 810 /* 811 * If 'n' is an infinity or NaN, handle it. 'm' may be anything. 812 */ 813 if (vdn->exponent == 2047) { 814 if (vdn->significand || (vdm->exponent == 2047 && vdm->significand)) 815 return vfp_propagate_nan(vdd, vdn, vdm, fpscr); 816 if ((vdm->exponent | vdm->significand) == 0) { 817 *vdd = vfp_double_default_qnan; 818 return FPSCR_IOC; 819 } 820 vdd->exponent = vdn->exponent; 821 vdd->significand = 0; 822 return 0; 823 } 824 825 /* 826 * If 'm' is zero, the result is always zero. In this case, 827 * 'n' may be zero or a number, but it doesn't matter which. 828 */ 829 if ((vdm->exponent | vdm->significand) == 0) { 830 vdd->exponent = 0; 831 vdd->significand = 0; 832 return 0; 833 } 834 835 /* 836 * We add 2 to the destination exponent for the same reason 837 * as the addition case - though this time we have +1 from 838 * each input operand. 839 */ 840 vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2; 841 vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand); 842 843 vfp_double_dump("VDD", vdd); 844 return 0; 845} 846 847#define NEG_MULTIPLY (1 << 0) 848#define NEG_SUBTRACT (1 << 1) 849 850static u32 851vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func) 852{ 853 struct vfp_double vdd, vdp, vdn, vdm; 854 u32 exceptions; 855 856 vfp_double_unpack(&vdn, vfp_get_double(dn)); 857 if (vdn.exponent == 0 && vdn.significand) 858 vfp_double_normalise_denormal(&vdn); 859 860 vfp_double_unpack(&vdm, vfp_get_double(dm)); 861 if (vdm.exponent == 0 && vdm.significand) 862 vfp_double_normalise_denormal(&vdm); 863 864 exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr); 865 if (negate & NEG_MULTIPLY) 866 vdp.sign = vfp_sign_negate(vdp.sign); 867 868 vfp_double_unpack(&vdn, vfp_get_double(dd)); 869 if (vdn.exponent == 0 && vdn.significand) 870 vfp_double_normalise_denormal(&vdn); 871 if (negate & NEG_SUBTRACT) 872 vdn.sign = vfp_sign_negate(vdn.sign); 873 874 exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr); 875 876 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func); 877} 878 879/* 880 * Standard operations 881 */ 882 883/* 884 * sd = sd + (sn * sm) 885 */ 886static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr) 887{ 888 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac"); 889} 890 891/* 892 * sd = sd - (sn * sm) 893 */ 894static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr) 895{ 896 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac"); 897} 898 899/* 900 * sd = -sd + (sn * sm) 901 */ 902static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr) 903{ 904 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc"); 905} 906 907/* 908 * sd = -sd - (sn * sm) 909 */ 910static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr) 911{ 912 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); 913} 914 915/* 916 * sd = sn * sm 917 */ 918static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr) 919{ 920 struct vfp_double vdd, vdn, vdm; 921 u32 exceptions; 922 923 vfp_double_unpack(&vdn, vfp_get_double(dn)); 924 if (vdn.exponent == 0 && vdn.significand) 925 vfp_double_normalise_denormal(&vdn); 926 927 vfp_double_unpack(&vdm, vfp_get_double(dm)); 928 if (vdm.exponent == 0 && vdm.significand) 929 vfp_double_normalise_denormal(&vdm); 930 931 exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); 932 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul"); 933} 934 935/* 936 * sd = -(sn * sm) 937 */ 938static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr) 939{ 940 struct vfp_double vdd, vdn, vdm; 941 u32 exceptions; 942 943 vfp_double_unpack(&vdn, vfp_get_double(dn)); 944 if (vdn.exponent == 0 && vdn.significand) 945 vfp_double_normalise_denormal(&vdn); 946 947 vfp_double_unpack(&vdm, vfp_get_double(dm)); 948 if (vdm.exponent == 0 && vdm.significand) 949 vfp_double_normalise_denormal(&vdm); 950 951 exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); 952 vdd.sign = vfp_sign_negate(vdd.sign); 953 954 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul"); 955} 956 957/* 958 * sd = sn + sm 959 */ 960static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr) 961{ 962 struct vfp_double vdd, vdn, vdm; 963 u32 exceptions; 964 965 vfp_double_unpack(&vdn, vfp_get_double(dn)); 966 if (vdn.exponent == 0 && vdn.significand) 967 vfp_double_normalise_denormal(&vdn); 968 969 vfp_double_unpack(&vdm, vfp_get_double(dm)); 970 if (vdm.exponent == 0 && vdm.significand) 971 vfp_double_normalise_denormal(&vdm); 972 973 exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); 974 975 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd"); 976} 977 978/* 979 * sd = sn - sm 980 */ 981static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr) 982{ 983 struct vfp_double vdd, vdn, vdm; 984 u32 exceptions; 985 986 vfp_double_unpack(&vdn, vfp_get_double(dn)); 987 if (vdn.exponent == 0 && vdn.significand) 988 vfp_double_normalise_denormal(&vdn); 989 990 vfp_double_unpack(&vdm, vfp_get_double(dm)); 991 if (vdm.exponent == 0 && vdm.significand) 992 vfp_double_normalise_denormal(&vdm); 993 994 /* 995 * Subtraction is like addition, but with a negated operand. 996 */ 997 vdm.sign = vfp_sign_negate(vdm.sign); 998 999 exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); 1000 1001 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub"); 1002} 1003 1004/* 1005 * sd = sn / sm 1006 */ 1007static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr) 1008{ 1009 struct vfp_double vdd, vdn, vdm; 1010 u32 exceptions = 0; 1011 int tm, tn; 1012 1013 vfp_double_unpack(&vdn, vfp_get_double(dn)); 1014 vfp_double_unpack(&vdm, vfp_get_double(dm)); 1015 1016 vdd.sign = vdn.sign ^ vdm.sign; 1017 1018 tn = vfp_double_type(&vdn); 1019 tm = vfp_double_type(&vdm); 1020 1021 /* 1022 * Is n a NAN? 1023 */ 1024 if (tn & VFP_NAN) 1025 goto vdn_nan; 1026 1027 /* 1028 * Is m a NAN? 1029 */ 1030 if (tm & VFP_NAN) 1031 goto vdm_nan; 1032 1033 /* 1034 * If n and m are infinity, the result is invalid 1035 * If n and m are zero, the result is invalid 1036 */ 1037 if (tm & tn & (VFP_INFINITY|VFP_ZERO)) 1038 goto invalid; 1039 1040 /* 1041 * If n is infinity, the result is infinity 1042 */ 1043 if (tn & VFP_INFINITY) 1044 goto infinity; 1045 1046 /* 1047 * If m is zero, raise div0 exceptions 1048 */ 1049 if (tm & VFP_ZERO) 1050 goto divzero; 1051 1052 /* 1053 * If m is infinity, or n is zero, the result is zero 1054 */ 1055 if (tm & VFP_INFINITY || tn & VFP_ZERO) 1056 goto zero; 1057 1058 if (tn & VFP_DENORMAL) 1059 vfp_double_normalise_denormal(&vdn); 1060 if (tm & VFP_DENORMAL) 1061 vfp_double_normalise_denormal(&vdm); 1062 1063 /* 1064 * Ok, we have two numbers, we can perform division. 1065 */ 1066 vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1; 1067 vdm.significand <<= 1; 1068 if (vdm.significand <= (2 * vdn.significand)) { 1069 vdn.significand >>= 1; 1070 vdd.exponent++; 1071 } 1072 vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand); 1073 if ((vdd.significand & 0x1ff) <= 2) { 1074 u64 termh, terml, remh, reml; 1075 mul64to128(&termh, &terml, vdm.significand, vdd.significand); 1076 sub128(&remh, &reml, vdn.significand, 0, termh, terml); 1077 while ((s64)remh < 0) { 1078 vdd.significand -= 1; 1079 add128(&remh, &reml, remh, reml, 0, vdm.significand); 1080 } 1081 vdd.significand |= (reml != 0); 1082 } 1083 return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv"); 1084 1085 vdn_nan: 1086 exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr); 1087 pack: 1088 vfp_put_double(vfp_double_pack(&vdd), dd); 1089 return exceptions; 1090 1091 vdm_nan: 1092 exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr); 1093 goto pack; 1094 1095 zero: 1096 vdd.exponent = 0; 1097 vdd.significand = 0; 1098 goto pack; 1099 1100 divzero: 1101 exceptions = FPSCR_DZC; 1102 infinity: 1103 vdd.exponent = 2047; 1104 vdd.significand = 0; 1105 goto pack; 1106 1107 invalid: 1108 vfp_put_double(vfp_double_pack(&vfp_double_default_qnan), dd); 1109 return FPSCR_IOC; 1110} 1111 1112static struct op fops[16] = { 1113 [FOP_TO_IDX(FOP_FMAC)] = { vfp_double_fmac, 0 }, 1114 [FOP_TO_IDX(FOP_FNMAC)] = { vfp_double_fnmac, 0 }, 1115 [FOP_TO_IDX(FOP_FMSC)] = { vfp_double_fmsc, 0 }, 1116 [FOP_TO_IDX(FOP_FNMSC)] = { vfp_double_fnmsc, 0 }, 1117 [FOP_TO_IDX(FOP_FMUL)] = { vfp_double_fmul, 0 }, 1118 [FOP_TO_IDX(FOP_FNMUL)] = { vfp_double_fnmul, 0 }, 1119 [FOP_TO_IDX(FOP_FADD)] = { vfp_double_fadd, 0 }, 1120 [FOP_TO_IDX(FOP_FSUB)] = { vfp_double_fsub, 0 }, 1121 [FOP_TO_IDX(FOP_FDIV)] = { vfp_double_fdiv, 0 }, 1122}; 1123 1124#define FREG_BANK(x) ((x) & 0x0c) 1125#define FREG_IDX(x) ((x) & 3) 1126 1127u32 vfp_double_cpdo(u32 inst, u32 fpscr) 1128{ 1129 u32 op = inst & FOP_MASK; 1130 u32 exceptions = 0; 1131 unsigned int dest; 1132 unsigned int dn = vfp_get_dn(inst); 1133 unsigned int dm; 1134 unsigned int vecitr, veclen, vecstride; 1135 struct op *fop; 1136 1137 vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK)); 1138 1139 fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)]; 1140 1141 /* 1142 * fcvtds takes an sN register number as destination, not dN. 1143 * It also always operates on scalars. 1144 */ 1145 if (fop->flags & OP_SD) 1146 dest = vfp_get_sd(inst); 1147 else 1148 dest = vfp_get_dd(inst); 1149 1150 /* 1151 * f[us]ito takes a sN operand, not a dN operand. 1152 */ 1153 if (fop->flags & OP_SM) 1154 dm = vfp_get_sm(inst); 1155 else 1156 dm = vfp_get_dm(inst); 1157 1158 /* 1159 * If destination bank is zero, vector length is always '1'. 1160 * ARM DDI0100F C5.1.3, C5.3.2. 1161 */ 1162 if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0)) 1163 veclen = 0; 1164 else 1165 veclen = fpscr & FPSCR_LENGTH_MASK; 1166 1167 pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, 1168 (veclen >> FPSCR_LENGTH_BIT) + 1); 1169 1170 if (!fop->fn) 1171 goto invalid; 1172 1173 for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { 1174 u32 except; 1175 char type; 1176 1177 type = fop->flags & OP_SD ? 's' : 'd'; 1178 if (op == FOP_EXT) 1179 pr_debug("VFP: itr%d (%c%u) = op[%u] (d%u)\n", 1180 vecitr >> FPSCR_LENGTH_BIT, 1181 type, dest, dn, dm); 1182 else 1183 pr_debug("VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)\n", 1184 vecitr >> FPSCR_LENGTH_BIT, 1185 type, dest, dn, FOP_TO_IDX(op), dm); 1186 1187 except = fop->fn(dest, dn, dm, fpscr); 1188 pr_debug("VFP: itr%d: exceptions=%08x\n", 1189 vecitr >> FPSCR_LENGTH_BIT, except); 1190 1191 exceptions |= except; 1192 1193 /* 1194 * CHECK: It appears to be undefined whether we stop when 1195 * we encounter an exception. We continue. 1196 */ 1197 dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 3); 1198 dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 3); 1199 if (FREG_BANK(dm) != 0) 1200 dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 3); 1201 } 1202 return exceptions; 1203 1204 invalid: 1205 return ~0; 1206}