fplsp.S (293709B)
1~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP 3M68000 Hi-Performance Microprocessor Division 4M68060 Software Package 5Production Release P1.00 -- October 10, 1994 6 7M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. 8 9THE SOFTWARE is provided on an "AS IS" basis and without warranty. 10To the maximum extent permitted by applicable law, 11MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, 12INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE 13and any warranty against infringement with regard to the SOFTWARE 14(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. 15 16To the maximum extent permitted by applicable law, 17IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER 18(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, 19BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) 20ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. 21Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. 22 23You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE 24so long as this entire notice is retained without alteration in any modified and/or 25redistributed versions, and that such modified versions are clearly identified as such. 26No licenses are granted by implication, estoppel or otherwise under any patents 27or trademarks of Motorola, Inc. 28~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29# 30# lfptop.s: 31# This file is appended to the top of the 060ILSP package 32# and contains the entry points into the package. The user, in 33# effect, branches to one of the branch table entries located here. 34# 35 36 bra.l _facoss_ 37 short 0x0000 38 bra.l _facosd_ 39 short 0x0000 40 bra.l _facosx_ 41 short 0x0000 42 43 bra.l _fasins_ 44 short 0x0000 45 bra.l _fasind_ 46 short 0x0000 47 bra.l _fasinx_ 48 short 0x0000 49 50 bra.l _fatans_ 51 short 0x0000 52 bra.l _fatand_ 53 short 0x0000 54 bra.l _fatanx_ 55 short 0x0000 56 57 bra.l _fatanhs_ 58 short 0x0000 59 bra.l _fatanhd_ 60 short 0x0000 61 bra.l _fatanhx_ 62 short 0x0000 63 64 bra.l _fcoss_ 65 short 0x0000 66 bra.l _fcosd_ 67 short 0x0000 68 bra.l _fcosx_ 69 short 0x0000 70 71 bra.l _fcoshs_ 72 short 0x0000 73 bra.l _fcoshd_ 74 short 0x0000 75 bra.l _fcoshx_ 76 short 0x0000 77 78 bra.l _fetoxs_ 79 short 0x0000 80 bra.l _fetoxd_ 81 short 0x0000 82 bra.l _fetoxx_ 83 short 0x0000 84 85 bra.l _fetoxm1s_ 86 short 0x0000 87 bra.l _fetoxm1d_ 88 short 0x0000 89 bra.l _fetoxm1x_ 90 short 0x0000 91 92 bra.l _fgetexps_ 93 short 0x0000 94 bra.l _fgetexpd_ 95 short 0x0000 96 bra.l _fgetexpx_ 97 short 0x0000 98 99 bra.l _fgetmans_ 100 short 0x0000 101 bra.l _fgetmand_ 102 short 0x0000 103 bra.l _fgetmanx_ 104 short 0x0000 105 106 bra.l _flog10s_ 107 short 0x0000 108 bra.l _flog10d_ 109 short 0x0000 110 bra.l _flog10x_ 111 short 0x0000 112 113 bra.l _flog2s_ 114 short 0x0000 115 bra.l _flog2d_ 116 short 0x0000 117 bra.l _flog2x_ 118 short 0x0000 119 120 bra.l _flogns_ 121 short 0x0000 122 bra.l _flognd_ 123 short 0x0000 124 bra.l _flognx_ 125 short 0x0000 126 127 bra.l _flognp1s_ 128 short 0x0000 129 bra.l _flognp1d_ 130 short 0x0000 131 bra.l _flognp1x_ 132 short 0x0000 133 134 bra.l _fmods_ 135 short 0x0000 136 bra.l _fmodd_ 137 short 0x0000 138 bra.l _fmodx_ 139 short 0x0000 140 141 bra.l _frems_ 142 short 0x0000 143 bra.l _fremd_ 144 short 0x0000 145 bra.l _fremx_ 146 short 0x0000 147 148 bra.l _fscales_ 149 short 0x0000 150 bra.l _fscaled_ 151 short 0x0000 152 bra.l _fscalex_ 153 short 0x0000 154 155 bra.l _fsins_ 156 short 0x0000 157 bra.l _fsind_ 158 short 0x0000 159 bra.l _fsinx_ 160 short 0x0000 161 162 bra.l _fsincoss_ 163 short 0x0000 164 bra.l _fsincosd_ 165 short 0x0000 166 bra.l _fsincosx_ 167 short 0x0000 168 169 bra.l _fsinhs_ 170 short 0x0000 171 bra.l _fsinhd_ 172 short 0x0000 173 bra.l _fsinhx_ 174 short 0x0000 175 176 bra.l _ftans_ 177 short 0x0000 178 bra.l _ftand_ 179 short 0x0000 180 bra.l _ftanx_ 181 short 0x0000 182 183 bra.l _ftanhs_ 184 short 0x0000 185 bra.l _ftanhd_ 186 short 0x0000 187 bra.l _ftanhx_ 188 short 0x0000 189 190 bra.l _ftentoxs_ 191 short 0x0000 192 bra.l _ftentoxd_ 193 short 0x0000 194 bra.l _ftentoxx_ 195 short 0x0000 196 197 bra.l _ftwotoxs_ 198 short 0x0000 199 bra.l _ftwotoxd_ 200 short 0x0000 201 bra.l _ftwotoxx_ 202 short 0x0000 203 204 bra.l _fabss_ 205 short 0x0000 206 bra.l _fabsd_ 207 short 0x0000 208 bra.l _fabsx_ 209 short 0x0000 210 211 bra.l _fadds_ 212 short 0x0000 213 bra.l _faddd_ 214 short 0x0000 215 bra.l _faddx_ 216 short 0x0000 217 218 bra.l _fdivs_ 219 short 0x0000 220 bra.l _fdivd_ 221 short 0x0000 222 bra.l _fdivx_ 223 short 0x0000 224 225 bra.l _fints_ 226 short 0x0000 227 bra.l _fintd_ 228 short 0x0000 229 bra.l _fintx_ 230 short 0x0000 231 232 bra.l _fintrzs_ 233 short 0x0000 234 bra.l _fintrzd_ 235 short 0x0000 236 bra.l _fintrzx_ 237 short 0x0000 238 239 bra.l _fmuls_ 240 short 0x0000 241 bra.l _fmuld_ 242 short 0x0000 243 bra.l _fmulx_ 244 short 0x0000 245 246 bra.l _fnegs_ 247 short 0x0000 248 bra.l _fnegd_ 249 short 0x0000 250 bra.l _fnegx_ 251 short 0x0000 252 253 bra.l _fsqrts_ 254 short 0x0000 255 bra.l _fsqrtd_ 256 short 0x0000 257 bra.l _fsqrtx_ 258 short 0x0000 259 260 bra.l _fsubs_ 261 short 0x0000 262 bra.l _fsubd_ 263 short 0x0000 264 bra.l _fsubx_ 265 short 0x0000 266 267# leave room for future possible additions 268 align 0x400 269 270# 271# This file contains a set of define statements for constants 272# in order to promote readability within the corecode itself. 273# 274 275set LOCAL_SIZE, 192 # stack frame size(bytes) 276set LV, -LOCAL_SIZE # stack offset 277 278set EXC_SR, 0x4 # stack status register 279set EXC_PC, 0x6 # stack pc 280set EXC_VOFF, 0xa # stacked vector offset 281set EXC_EA, 0xc # stacked <ea> 282 283set EXC_FP, 0x0 # frame pointer 284 285set EXC_AREGS, -68 # offset of all address regs 286set EXC_DREGS, -100 # offset of all data regs 287set EXC_FPREGS, -36 # offset of all fp regs 288 289set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7 290set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7 291set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6 292set EXC_A5, EXC_AREGS+(5*4) 293set EXC_A4, EXC_AREGS+(4*4) 294set EXC_A3, EXC_AREGS+(3*4) 295set EXC_A2, EXC_AREGS+(2*4) 296set EXC_A1, EXC_AREGS+(1*4) 297set EXC_A0, EXC_AREGS+(0*4) 298set EXC_D7, EXC_DREGS+(7*4) 299set EXC_D6, EXC_DREGS+(6*4) 300set EXC_D5, EXC_DREGS+(5*4) 301set EXC_D4, EXC_DREGS+(4*4) 302set EXC_D3, EXC_DREGS+(3*4) 303set EXC_D2, EXC_DREGS+(2*4) 304set EXC_D1, EXC_DREGS+(1*4) 305set EXC_D0, EXC_DREGS+(0*4) 306 307set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0 308set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1 309set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used) 310 311set FP_SCR1, LV+80 # fp scratch 1 312set FP_SCR1_EX, FP_SCR1+0 313set FP_SCR1_SGN, FP_SCR1+2 314set FP_SCR1_HI, FP_SCR1+4 315set FP_SCR1_LO, FP_SCR1+8 316 317set FP_SCR0, LV+68 # fp scratch 0 318set FP_SCR0_EX, FP_SCR0+0 319set FP_SCR0_SGN, FP_SCR0+2 320set FP_SCR0_HI, FP_SCR0+4 321set FP_SCR0_LO, FP_SCR0+8 322 323set FP_DST, LV+56 # fp destination operand 324set FP_DST_EX, FP_DST+0 325set FP_DST_SGN, FP_DST+2 326set FP_DST_HI, FP_DST+4 327set FP_DST_LO, FP_DST+8 328 329set FP_SRC, LV+44 # fp source operand 330set FP_SRC_EX, FP_SRC+0 331set FP_SRC_SGN, FP_SRC+2 332set FP_SRC_HI, FP_SRC+4 333set FP_SRC_LO, FP_SRC+8 334 335set USER_FPIAR, LV+40 # FP instr address register 336 337set USER_FPSR, LV+36 # FP status register 338set FPSR_CC, USER_FPSR+0 # FPSR condition codes 339set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte 340set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte 341set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte 342 343set USER_FPCR, LV+32 # FP control register 344set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable 345set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control 346 347set L_SCR3, LV+28 # integer scratch 3 348set L_SCR2, LV+24 # integer scratch 2 349set L_SCR1, LV+20 # integer scratch 1 350 351set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst) 352 353set EXC_TEMP2, LV+24 # temporary space 354set EXC_TEMP, LV+16 # temporary space 355 356set DTAG, LV+15 # destination operand type 357set STAG, LV+14 # source operand type 358 359set SPCOND_FLG, LV+10 # flag: special case (see below) 360 361set EXC_CC, LV+8 # saved condition codes 362set EXC_EXTWPTR, LV+4 # saved current PC (active) 363set EXC_EXTWORD, LV+2 # saved extension word 364set EXC_CMDREG, LV+2 # saved extension word 365set EXC_OPWORD, LV+0 # saved operation word 366 367################################ 368 369# Helpful macros 370 371set FTEMP, 0 # offsets within an 372set FTEMP_EX, 0 # extended precision 373set FTEMP_SGN, 2 # value saved in memory. 374set FTEMP_HI, 4 375set FTEMP_LO, 8 376set FTEMP_GRS, 12 377 378set LOCAL, 0 # offsets within an 379set LOCAL_EX, 0 # extended precision 380set LOCAL_SGN, 2 # value saved in memory. 381set LOCAL_HI, 4 382set LOCAL_LO, 8 383set LOCAL_GRS, 12 384 385set DST, 0 # offsets within an 386set DST_EX, 0 # extended precision 387set DST_HI, 4 # value saved in memory. 388set DST_LO, 8 389 390set SRC, 0 # offsets within an 391set SRC_EX, 0 # extended precision 392set SRC_HI, 4 # value saved in memory. 393set SRC_LO, 8 394 395set SGL_LO, 0x3f81 # min sgl prec exponent 396set SGL_HI, 0x407e # max sgl prec exponent 397set DBL_LO, 0x3c01 # min dbl prec exponent 398set DBL_HI, 0x43fe # max dbl prec exponent 399set EXT_LO, 0x0 # min ext prec exponent 400set EXT_HI, 0x7ffe # max ext prec exponent 401 402set EXT_BIAS, 0x3fff # extended precision bias 403set SGL_BIAS, 0x007f # single precision bias 404set DBL_BIAS, 0x03ff # double precision bias 405 406set NORM, 0x00 # operand type for STAG/DTAG 407set ZERO, 0x01 # operand type for STAG/DTAG 408set INF, 0x02 # operand type for STAG/DTAG 409set QNAN, 0x03 # operand type for STAG/DTAG 410set DENORM, 0x04 # operand type for STAG/DTAG 411set SNAN, 0x05 # operand type for STAG/DTAG 412set UNNORM, 0x06 # operand type for STAG/DTAG 413 414################## 415# FPSR/FPCR bits # 416################## 417set neg_bit, 0x3 # negative result 418set z_bit, 0x2 # zero result 419set inf_bit, 0x1 # infinite result 420set nan_bit, 0x0 # NAN result 421 422set q_sn_bit, 0x7 # sign bit of quotient byte 423 424set bsun_bit, 7 # branch on unordered 425set snan_bit, 6 # signalling NAN 426set operr_bit, 5 # operand error 427set ovfl_bit, 4 # overflow 428set unfl_bit, 3 # underflow 429set dz_bit, 2 # divide by zero 430set inex2_bit, 1 # inexact result 2 431set inex1_bit, 0 # inexact result 1 432 433set aiop_bit, 7 # accrued inexact operation bit 434set aovfl_bit, 6 # accrued overflow bit 435set aunfl_bit, 5 # accrued underflow bit 436set adz_bit, 4 # accrued dz bit 437set ainex_bit, 3 # accrued inexact bit 438 439############################# 440# FPSR individual bit masks # 441############################# 442set neg_mask, 0x08000000 # negative bit mask (lw) 443set inf_mask, 0x02000000 # infinity bit mask (lw) 444set z_mask, 0x04000000 # zero bit mask (lw) 445set nan_mask, 0x01000000 # nan bit mask (lw) 446 447set neg_bmask, 0x08 # negative bit mask (byte) 448set inf_bmask, 0x02 # infinity bit mask (byte) 449set z_bmask, 0x04 # zero bit mask (byte) 450set nan_bmask, 0x01 # nan bit mask (byte) 451 452set bsun_mask, 0x00008000 # bsun exception mask 453set snan_mask, 0x00004000 # snan exception mask 454set operr_mask, 0x00002000 # operr exception mask 455set ovfl_mask, 0x00001000 # overflow exception mask 456set unfl_mask, 0x00000800 # underflow exception mask 457set dz_mask, 0x00000400 # dz exception mask 458set inex2_mask, 0x00000200 # inex2 exception mask 459set inex1_mask, 0x00000100 # inex1 exception mask 460 461set aiop_mask, 0x00000080 # accrued illegal operation 462set aovfl_mask, 0x00000040 # accrued overflow 463set aunfl_mask, 0x00000020 # accrued underflow 464set adz_mask, 0x00000010 # accrued divide by zero 465set ainex_mask, 0x00000008 # accrued inexact 466 467###################################### 468# FPSR combinations used in the FPSP # 469###################################### 470set dzinf_mask, inf_mask+dz_mask+adz_mask 471set opnan_mask, nan_mask+operr_mask+aiop_mask 472set nzi_mask, 0x01ffffff #clears N, Z, and I 473set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask 474set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask 475set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask 476set inx1a_mask, inex1_mask+ainex_mask 477set inx2a_mask, inex2_mask+ainex_mask 478set snaniop_mask, nan_mask+snan_mask+aiop_mask 479set snaniop2_mask, snan_mask+aiop_mask 480set naniop_mask, nan_mask+aiop_mask 481set neginf_mask, neg_mask+inf_mask 482set infaiop_mask, inf_mask+aiop_mask 483set negz_mask, neg_mask+z_mask 484set opaop_mask, operr_mask+aiop_mask 485set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask 486set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask 487 488######### 489# misc. # 490######### 491set rnd_stky_bit, 29 # stky bit pos in longword 492 493set sign_bit, 0x7 # sign bit 494set signan_bit, 0x6 # signalling nan bit 495 496set sgl_thresh, 0x3f81 # minimum sgl exponent 497set dbl_thresh, 0x3c01 # minimum dbl exponent 498 499set x_mode, 0x0 # extended precision 500set s_mode, 0x4 # single precision 501set d_mode, 0x8 # double precision 502 503set rn_mode, 0x0 # round-to-nearest 504set rz_mode, 0x1 # round-to-zero 505set rm_mode, 0x2 # round-tp-minus-infinity 506set rp_mode, 0x3 # round-to-plus-infinity 507 508set mantissalen, 64 # length of mantissa in bits 509 510set BYTE, 1 # len(byte) == 1 byte 511set WORD, 2 # len(word) == 2 bytes 512set LONG, 4 # len(longword) == 2 bytes 513 514set BSUN_VEC, 0xc0 # bsun vector offset 515set INEX_VEC, 0xc4 # inexact vector offset 516set DZ_VEC, 0xc8 # dz vector offset 517set UNFL_VEC, 0xcc # unfl vector offset 518set OPERR_VEC, 0xd0 # operr vector offset 519set OVFL_VEC, 0xd4 # ovfl vector offset 520set SNAN_VEC, 0xd8 # snan vector offset 521 522########################### 523# SPecial CONDition FLaGs # 524########################### 525set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception 526set fbsun_flg, 0x02 # flag bit: bsun exception 527set mia7_flg, 0x04 # flag bit: (a7)+ <ea> 528set mda7_flg, 0x08 # flag bit: -(a7) <ea> 529set fmovm_flg, 0x40 # flag bit: fmovm instruction 530set immed_flg, 0x80 # flag bit: &<data> <ea> 531 532set ftrapcc_bit, 0x0 533set fbsun_bit, 0x1 534set mia7_bit, 0x2 535set mda7_bit, 0x3 536set immed_bit, 0x7 537 538################################## 539# TRANSCENDENTAL "LAST-OP" FLAGS # 540################################## 541set FMUL_OP, 0x0 # fmul instr performed last 542set FDIV_OP, 0x1 # fdiv performed last 543set FADD_OP, 0x2 # fadd performed last 544set FMOV_OP, 0x3 # fmov performed last 545 546############# 547# CONSTANTS # 548############# 549T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD 550T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL 551 552PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000 553PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 554 555TWOBYPI: 556 long 0x3FE45F30,0x6DC9C883 557 558######################################################################### 559# MONADIC TEMPLATE # 560######################################################################### 561 global _fsins_ 562_fsins_: 563 link %a6,&-LOCAL_SIZE 564 565 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 566 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 567 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 568 569 fmov.l &0x0,%fpcr # zero FPCR 570 571# 572# copy, convert, and tag input argument 573# 574 fmov.s 0x8(%a6),%fp0 # load sgl input 575 fmov.x %fp0,FP_SRC(%a6) 576 lea FP_SRC(%a6),%a0 577 bsr.l tag # fetch operand type 578 mov.b %d0,STAG(%a6) 579 mov.b %d0,%d1 580 581 andi.l &0x00ff00ff,USER_FPSR(%a6) 582 583 clr.l %d0 584 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 585 586 tst.b %d1 587 bne.b _L0_2s 588 bsr.l ssin # operand is a NORM 589 bra.b _L0_6s 590_L0_2s: 591 cmpi.b %d1,&ZERO # is operand a ZERO? 592 bne.b _L0_3s # no 593 bsr.l src_zero # yes 594 bra.b _L0_6s 595_L0_3s: 596 cmpi.b %d1,&INF # is operand an INF? 597 bne.b _L0_4s # no 598 bsr.l t_operr # yes 599 bra.b _L0_6s 600_L0_4s: 601 cmpi.b %d1,&QNAN # is operand a QNAN? 602 bne.b _L0_5s # no 603 bsr.l src_qnan # yes 604 bra.b _L0_6s 605_L0_5s: 606 bsr.l ssind # operand is a DENORM 607_L0_6s: 608 609# 610# Result is now in FP0 611# 612 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 613 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 614 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 615 unlk %a6 616 rts 617 618 global _fsind_ 619_fsind_: 620 link %a6,&-LOCAL_SIZE 621 622 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 623 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 624 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 625 626 fmov.l &0x0,%fpcr # zero FPCR 627 628# 629# copy, convert, and tag input argument 630# 631 fmov.d 0x8(%a6),%fp0 # load dbl input 632 fmov.x %fp0,FP_SRC(%a6) 633 lea FP_SRC(%a6),%a0 634 bsr.l tag # fetch operand type 635 mov.b %d0,STAG(%a6) 636 mov.b %d0,%d1 637 638 andi.l &0x00ff00ff,USER_FPSR(%a6) 639 640 clr.l %d0 641 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 642 643 mov.b %d1,STAG(%a6) 644 tst.b %d1 645 bne.b _L0_2d 646 bsr.l ssin # operand is a NORM 647 bra.b _L0_6d 648_L0_2d: 649 cmpi.b %d1,&ZERO # is operand a ZERO? 650 bne.b _L0_3d # no 651 bsr.l src_zero # yes 652 bra.b _L0_6d 653_L0_3d: 654 cmpi.b %d1,&INF # is operand an INF? 655 bne.b _L0_4d # no 656 bsr.l t_operr # yes 657 bra.b _L0_6d 658_L0_4d: 659 cmpi.b %d1,&QNAN # is operand a QNAN? 660 bne.b _L0_5d # no 661 bsr.l src_qnan # yes 662 bra.b _L0_6d 663_L0_5d: 664 bsr.l ssind # operand is a DENORM 665_L0_6d: 666 667# 668# Result is now in FP0 669# 670 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 671 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 672 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 673 unlk %a6 674 rts 675 676 global _fsinx_ 677_fsinx_: 678 link %a6,&-LOCAL_SIZE 679 680 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 681 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 682 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 683 684 fmov.l &0x0,%fpcr # zero FPCR 685 686# 687# copy, convert, and tag input argument 688# 689 lea FP_SRC(%a6),%a0 690 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 691 mov.l 0x8+0x4(%a6),0x4(%a0) 692 mov.l 0x8+0x8(%a6),0x8(%a0) 693 bsr.l tag # fetch operand type 694 mov.b %d0,STAG(%a6) 695 mov.b %d0,%d1 696 697 andi.l &0x00ff00ff,USER_FPSR(%a6) 698 699 clr.l %d0 700 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 701 702 tst.b %d1 703 bne.b _L0_2x 704 bsr.l ssin # operand is a NORM 705 bra.b _L0_6x 706_L0_2x: 707 cmpi.b %d1,&ZERO # is operand a ZERO? 708 bne.b _L0_3x # no 709 bsr.l src_zero # yes 710 bra.b _L0_6x 711_L0_3x: 712 cmpi.b %d1,&INF # is operand an INF? 713 bne.b _L0_4x # no 714 bsr.l t_operr # yes 715 bra.b _L0_6x 716_L0_4x: 717 cmpi.b %d1,&QNAN # is operand a QNAN? 718 bne.b _L0_5x # no 719 bsr.l src_qnan # yes 720 bra.b _L0_6x 721_L0_5x: 722 bsr.l ssind # operand is a DENORM 723_L0_6x: 724 725# 726# Result is now in FP0 727# 728 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 729 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 730 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 731 unlk %a6 732 rts 733 734 735######################################################################### 736# MONADIC TEMPLATE # 737######################################################################### 738 global _fcoss_ 739_fcoss_: 740 link %a6,&-LOCAL_SIZE 741 742 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 743 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 744 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 745 746 fmov.l &0x0,%fpcr # zero FPCR 747 748# 749# copy, convert, and tag input argument 750# 751 fmov.s 0x8(%a6),%fp0 # load sgl input 752 fmov.x %fp0,FP_SRC(%a6) 753 lea FP_SRC(%a6),%a0 754 bsr.l tag # fetch operand type 755 mov.b %d0,STAG(%a6) 756 mov.b %d0,%d1 757 758 andi.l &0x00ff00ff,USER_FPSR(%a6) 759 760 clr.l %d0 761 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 762 763 tst.b %d1 764 bne.b _L1_2s 765 bsr.l scos # operand is a NORM 766 bra.b _L1_6s 767_L1_2s: 768 cmpi.b %d1,&ZERO # is operand a ZERO? 769 bne.b _L1_3s # no 770 bsr.l ld_pone # yes 771 bra.b _L1_6s 772_L1_3s: 773 cmpi.b %d1,&INF # is operand an INF? 774 bne.b _L1_4s # no 775 bsr.l t_operr # yes 776 bra.b _L1_6s 777_L1_4s: 778 cmpi.b %d1,&QNAN # is operand a QNAN? 779 bne.b _L1_5s # no 780 bsr.l src_qnan # yes 781 bra.b _L1_6s 782_L1_5s: 783 bsr.l scosd # operand is a DENORM 784_L1_6s: 785 786# 787# Result is now in FP0 788# 789 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 790 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 791 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 792 unlk %a6 793 rts 794 795 global _fcosd_ 796_fcosd_: 797 link %a6,&-LOCAL_SIZE 798 799 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 800 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 801 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 802 803 fmov.l &0x0,%fpcr # zero FPCR 804 805# 806# copy, convert, and tag input argument 807# 808 fmov.d 0x8(%a6),%fp0 # load dbl input 809 fmov.x %fp0,FP_SRC(%a6) 810 lea FP_SRC(%a6),%a0 811 bsr.l tag # fetch operand type 812 mov.b %d0,STAG(%a6) 813 mov.b %d0,%d1 814 815 andi.l &0x00ff00ff,USER_FPSR(%a6) 816 817 clr.l %d0 818 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 819 820 mov.b %d1,STAG(%a6) 821 tst.b %d1 822 bne.b _L1_2d 823 bsr.l scos # operand is a NORM 824 bra.b _L1_6d 825_L1_2d: 826 cmpi.b %d1,&ZERO # is operand a ZERO? 827 bne.b _L1_3d # no 828 bsr.l ld_pone # yes 829 bra.b _L1_6d 830_L1_3d: 831 cmpi.b %d1,&INF # is operand an INF? 832 bne.b _L1_4d # no 833 bsr.l t_operr # yes 834 bra.b _L1_6d 835_L1_4d: 836 cmpi.b %d1,&QNAN # is operand a QNAN? 837 bne.b _L1_5d # no 838 bsr.l src_qnan # yes 839 bra.b _L1_6d 840_L1_5d: 841 bsr.l scosd # operand is a DENORM 842_L1_6d: 843 844# 845# Result is now in FP0 846# 847 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 848 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 849 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 850 unlk %a6 851 rts 852 853 global _fcosx_ 854_fcosx_: 855 link %a6,&-LOCAL_SIZE 856 857 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 858 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 859 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 860 861 fmov.l &0x0,%fpcr # zero FPCR 862 863# 864# copy, convert, and tag input argument 865# 866 lea FP_SRC(%a6),%a0 867 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 868 mov.l 0x8+0x4(%a6),0x4(%a0) 869 mov.l 0x8+0x8(%a6),0x8(%a0) 870 bsr.l tag # fetch operand type 871 mov.b %d0,STAG(%a6) 872 mov.b %d0,%d1 873 874 andi.l &0x00ff00ff,USER_FPSR(%a6) 875 876 clr.l %d0 877 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 878 879 tst.b %d1 880 bne.b _L1_2x 881 bsr.l scos # operand is a NORM 882 bra.b _L1_6x 883_L1_2x: 884 cmpi.b %d1,&ZERO # is operand a ZERO? 885 bne.b _L1_3x # no 886 bsr.l ld_pone # yes 887 bra.b _L1_6x 888_L1_3x: 889 cmpi.b %d1,&INF # is operand an INF? 890 bne.b _L1_4x # no 891 bsr.l t_operr # yes 892 bra.b _L1_6x 893_L1_4x: 894 cmpi.b %d1,&QNAN # is operand a QNAN? 895 bne.b _L1_5x # no 896 bsr.l src_qnan # yes 897 bra.b _L1_6x 898_L1_5x: 899 bsr.l scosd # operand is a DENORM 900_L1_6x: 901 902# 903# Result is now in FP0 904# 905 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 906 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 907 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 908 unlk %a6 909 rts 910 911 912######################################################################### 913# MONADIC TEMPLATE # 914######################################################################### 915 global _fsinhs_ 916_fsinhs_: 917 link %a6,&-LOCAL_SIZE 918 919 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 920 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 921 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 922 923 fmov.l &0x0,%fpcr # zero FPCR 924 925# 926# copy, convert, and tag input argument 927# 928 fmov.s 0x8(%a6),%fp0 # load sgl input 929 fmov.x %fp0,FP_SRC(%a6) 930 lea FP_SRC(%a6),%a0 931 bsr.l tag # fetch operand type 932 mov.b %d0,STAG(%a6) 933 mov.b %d0,%d1 934 935 andi.l &0x00ff00ff,USER_FPSR(%a6) 936 937 clr.l %d0 938 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 939 940 tst.b %d1 941 bne.b _L2_2s 942 bsr.l ssinh # operand is a NORM 943 bra.b _L2_6s 944_L2_2s: 945 cmpi.b %d1,&ZERO # is operand a ZERO? 946 bne.b _L2_3s # no 947 bsr.l src_zero # yes 948 bra.b _L2_6s 949_L2_3s: 950 cmpi.b %d1,&INF # is operand an INF? 951 bne.b _L2_4s # no 952 bsr.l src_inf # yes 953 bra.b _L2_6s 954_L2_4s: 955 cmpi.b %d1,&QNAN # is operand a QNAN? 956 bne.b _L2_5s # no 957 bsr.l src_qnan # yes 958 bra.b _L2_6s 959_L2_5s: 960 bsr.l ssinhd # operand is a DENORM 961_L2_6s: 962 963# 964# Result is now in FP0 965# 966 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 967 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 968 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 969 unlk %a6 970 rts 971 972 global _fsinhd_ 973_fsinhd_: 974 link %a6,&-LOCAL_SIZE 975 976 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 977 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 978 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 979 980 fmov.l &0x0,%fpcr # zero FPCR 981 982# 983# copy, convert, and tag input argument 984# 985 fmov.d 0x8(%a6),%fp0 # load dbl input 986 fmov.x %fp0,FP_SRC(%a6) 987 lea FP_SRC(%a6),%a0 988 bsr.l tag # fetch operand type 989 mov.b %d0,STAG(%a6) 990 mov.b %d0,%d1 991 992 andi.l &0x00ff00ff,USER_FPSR(%a6) 993 994 clr.l %d0 995 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 996 997 mov.b %d1,STAG(%a6) 998 tst.b %d1 999 bne.b _L2_2d 1000 bsr.l ssinh # operand is a NORM 1001 bra.b _L2_6d 1002_L2_2d: 1003 cmpi.b %d1,&ZERO # is operand a ZERO? 1004 bne.b _L2_3d # no 1005 bsr.l src_zero # yes 1006 bra.b _L2_6d 1007_L2_3d: 1008 cmpi.b %d1,&INF # is operand an INF? 1009 bne.b _L2_4d # no 1010 bsr.l src_inf # yes 1011 bra.b _L2_6d 1012_L2_4d: 1013 cmpi.b %d1,&QNAN # is operand a QNAN? 1014 bne.b _L2_5d # no 1015 bsr.l src_qnan # yes 1016 bra.b _L2_6d 1017_L2_5d: 1018 bsr.l ssinhd # operand is a DENORM 1019_L2_6d: 1020 1021# 1022# Result is now in FP0 1023# 1024 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1025 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1026 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1027 unlk %a6 1028 rts 1029 1030 global _fsinhx_ 1031_fsinhx_: 1032 link %a6,&-LOCAL_SIZE 1033 1034 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1035 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1036 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1037 1038 fmov.l &0x0,%fpcr # zero FPCR 1039 1040# 1041# copy, convert, and tag input argument 1042# 1043 lea FP_SRC(%a6),%a0 1044 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 1045 mov.l 0x8+0x4(%a6),0x4(%a0) 1046 mov.l 0x8+0x8(%a6),0x8(%a0) 1047 bsr.l tag # fetch operand type 1048 mov.b %d0,STAG(%a6) 1049 mov.b %d0,%d1 1050 1051 andi.l &0x00ff00ff,USER_FPSR(%a6) 1052 1053 clr.l %d0 1054 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1055 1056 tst.b %d1 1057 bne.b _L2_2x 1058 bsr.l ssinh # operand is a NORM 1059 bra.b _L2_6x 1060_L2_2x: 1061 cmpi.b %d1,&ZERO # is operand a ZERO? 1062 bne.b _L2_3x # no 1063 bsr.l src_zero # yes 1064 bra.b _L2_6x 1065_L2_3x: 1066 cmpi.b %d1,&INF # is operand an INF? 1067 bne.b _L2_4x # no 1068 bsr.l src_inf # yes 1069 bra.b _L2_6x 1070_L2_4x: 1071 cmpi.b %d1,&QNAN # is operand a QNAN? 1072 bne.b _L2_5x # no 1073 bsr.l src_qnan # yes 1074 bra.b _L2_6x 1075_L2_5x: 1076 bsr.l ssinhd # operand is a DENORM 1077_L2_6x: 1078 1079# 1080# Result is now in FP0 1081# 1082 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1083 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1084 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1085 unlk %a6 1086 rts 1087 1088 1089######################################################################### 1090# MONADIC TEMPLATE # 1091######################################################################### 1092 global _flognp1s_ 1093_flognp1s_: 1094 link %a6,&-LOCAL_SIZE 1095 1096 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1097 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1098 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1099 1100 fmov.l &0x0,%fpcr # zero FPCR 1101 1102# 1103# copy, convert, and tag input argument 1104# 1105 fmov.s 0x8(%a6),%fp0 # load sgl input 1106 fmov.x %fp0,FP_SRC(%a6) 1107 lea FP_SRC(%a6),%a0 1108 bsr.l tag # fetch operand type 1109 mov.b %d0,STAG(%a6) 1110 mov.b %d0,%d1 1111 1112 andi.l &0x00ff00ff,USER_FPSR(%a6) 1113 1114 clr.l %d0 1115 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1116 1117 tst.b %d1 1118 bne.b _L3_2s 1119 bsr.l slognp1 # operand is a NORM 1120 bra.b _L3_6s 1121_L3_2s: 1122 cmpi.b %d1,&ZERO # is operand a ZERO? 1123 bne.b _L3_3s # no 1124 bsr.l src_zero # yes 1125 bra.b _L3_6s 1126_L3_3s: 1127 cmpi.b %d1,&INF # is operand an INF? 1128 bne.b _L3_4s # no 1129 bsr.l sopr_inf # yes 1130 bra.b _L3_6s 1131_L3_4s: 1132 cmpi.b %d1,&QNAN # is operand a QNAN? 1133 bne.b _L3_5s # no 1134 bsr.l src_qnan # yes 1135 bra.b _L3_6s 1136_L3_5s: 1137 bsr.l slognp1d # operand is a DENORM 1138_L3_6s: 1139 1140# 1141# Result is now in FP0 1142# 1143 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1144 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1145 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1146 unlk %a6 1147 rts 1148 1149 global _flognp1d_ 1150_flognp1d_: 1151 link %a6,&-LOCAL_SIZE 1152 1153 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1154 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1155 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1156 1157 fmov.l &0x0,%fpcr # zero FPCR 1158 1159# 1160# copy, convert, and tag input argument 1161# 1162 fmov.d 0x8(%a6),%fp0 # load dbl input 1163 fmov.x %fp0,FP_SRC(%a6) 1164 lea FP_SRC(%a6),%a0 1165 bsr.l tag # fetch operand type 1166 mov.b %d0,STAG(%a6) 1167 mov.b %d0,%d1 1168 1169 andi.l &0x00ff00ff,USER_FPSR(%a6) 1170 1171 clr.l %d0 1172 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1173 1174 mov.b %d1,STAG(%a6) 1175 tst.b %d1 1176 bne.b _L3_2d 1177 bsr.l slognp1 # operand is a NORM 1178 bra.b _L3_6d 1179_L3_2d: 1180 cmpi.b %d1,&ZERO # is operand a ZERO? 1181 bne.b _L3_3d # no 1182 bsr.l src_zero # yes 1183 bra.b _L3_6d 1184_L3_3d: 1185 cmpi.b %d1,&INF # is operand an INF? 1186 bne.b _L3_4d # no 1187 bsr.l sopr_inf # yes 1188 bra.b _L3_6d 1189_L3_4d: 1190 cmpi.b %d1,&QNAN # is operand a QNAN? 1191 bne.b _L3_5d # no 1192 bsr.l src_qnan # yes 1193 bra.b _L3_6d 1194_L3_5d: 1195 bsr.l slognp1d # operand is a DENORM 1196_L3_6d: 1197 1198# 1199# Result is now in FP0 1200# 1201 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1202 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1203 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1204 unlk %a6 1205 rts 1206 1207 global _flognp1x_ 1208_flognp1x_: 1209 link %a6,&-LOCAL_SIZE 1210 1211 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1212 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1213 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1214 1215 fmov.l &0x0,%fpcr # zero FPCR 1216 1217# 1218# copy, convert, and tag input argument 1219# 1220 lea FP_SRC(%a6),%a0 1221 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 1222 mov.l 0x8+0x4(%a6),0x4(%a0) 1223 mov.l 0x8+0x8(%a6),0x8(%a0) 1224 bsr.l tag # fetch operand type 1225 mov.b %d0,STAG(%a6) 1226 mov.b %d0,%d1 1227 1228 andi.l &0x00ff00ff,USER_FPSR(%a6) 1229 1230 clr.l %d0 1231 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1232 1233 tst.b %d1 1234 bne.b _L3_2x 1235 bsr.l slognp1 # operand is a NORM 1236 bra.b _L3_6x 1237_L3_2x: 1238 cmpi.b %d1,&ZERO # is operand a ZERO? 1239 bne.b _L3_3x # no 1240 bsr.l src_zero # yes 1241 bra.b _L3_6x 1242_L3_3x: 1243 cmpi.b %d1,&INF # is operand an INF? 1244 bne.b _L3_4x # no 1245 bsr.l sopr_inf # yes 1246 bra.b _L3_6x 1247_L3_4x: 1248 cmpi.b %d1,&QNAN # is operand a QNAN? 1249 bne.b _L3_5x # no 1250 bsr.l src_qnan # yes 1251 bra.b _L3_6x 1252_L3_5x: 1253 bsr.l slognp1d # operand is a DENORM 1254_L3_6x: 1255 1256# 1257# Result is now in FP0 1258# 1259 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1260 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1261 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1262 unlk %a6 1263 rts 1264 1265 1266######################################################################### 1267# MONADIC TEMPLATE # 1268######################################################################### 1269 global _fetoxm1s_ 1270_fetoxm1s_: 1271 link %a6,&-LOCAL_SIZE 1272 1273 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1274 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1275 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1276 1277 fmov.l &0x0,%fpcr # zero FPCR 1278 1279# 1280# copy, convert, and tag input argument 1281# 1282 fmov.s 0x8(%a6),%fp0 # load sgl input 1283 fmov.x %fp0,FP_SRC(%a6) 1284 lea FP_SRC(%a6),%a0 1285 bsr.l tag # fetch operand type 1286 mov.b %d0,STAG(%a6) 1287 mov.b %d0,%d1 1288 1289 andi.l &0x00ff00ff,USER_FPSR(%a6) 1290 1291 clr.l %d0 1292 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1293 1294 tst.b %d1 1295 bne.b _L4_2s 1296 bsr.l setoxm1 # operand is a NORM 1297 bra.b _L4_6s 1298_L4_2s: 1299 cmpi.b %d1,&ZERO # is operand a ZERO? 1300 bne.b _L4_3s # no 1301 bsr.l src_zero # yes 1302 bra.b _L4_6s 1303_L4_3s: 1304 cmpi.b %d1,&INF # is operand an INF? 1305 bne.b _L4_4s # no 1306 bsr.l setoxm1i # yes 1307 bra.b _L4_6s 1308_L4_4s: 1309 cmpi.b %d1,&QNAN # is operand a QNAN? 1310 bne.b _L4_5s # no 1311 bsr.l src_qnan # yes 1312 bra.b _L4_6s 1313_L4_5s: 1314 bsr.l setoxm1d # operand is a DENORM 1315_L4_6s: 1316 1317# 1318# Result is now in FP0 1319# 1320 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1321 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1322 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1323 unlk %a6 1324 rts 1325 1326 global _fetoxm1d_ 1327_fetoxm1d_: 1328 link %a6,&-LOCAL_SIZE 1329 1330 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1331 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1332 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1333 1334 fmov.l &0x0,%fpcr # zero FPCR 1335 1336# 1337# copy, convert, and tag input argument 1338# 1339 fmov.d 0x8(%a6),%fp0 # load dbl input 1340 fmov.x %fp0,FP_SRC(%a6) 1341 lea FP_SRC(%a6),%a0 1342 bsr.l tag # fetch operand type 1343 mov.b %d0,STAG(%a6) 1344 mov.b %d0,%d1 1345 1346 andi.l &0x00ff00ff,USER_FPSR(%a6) 1347 1348 clr.l %d0 1349 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1350 1351 mov.b %d1,STAG(%a6) 1352 tst.b %d1 1353 bne.b _L4_2d 1354 bsr.l setoxm1 # operand is a NORM 1355 bra.b _L4_6d 1356_L4_2d: 1357 cmpi.b %d1,&ZERO # is operand a ZERO? 1358 bne.b _L4_3d # no 1359 bsr.l src_zero # yes 1360 bra.b _L4_6d 1361_L4_3d: 1362 cmpi.b %d1,&INF # is operand an INF? 1363 bne.b _L4_4d # no 1364 bsr.l setoxm1i # yes 1365 bra.b _L4_6d 1366_L4_4d: 1367 cmpi.b %d1,&QNAN # is operand a QNAN? 1368 bne.b _L4_5d # no 1369 bsr.l src_qnan # yes 1370 bra.b _L4_6d 1371_L4_5d: 1372 bsr.l setoxm1d # operand is a DENORM 1373_L4_6d: 1374 1375# 1376# Result is now in FP0 1377# 1378 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1379 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1380 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1381 unlk %a6 1382 rts 1383 1384 global _fetoxm1x_ 1385_fetoxm1x_: 1386 link %a6,&-LOCAL_SIZE 1387 1388 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1389 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1390 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1391 1392 fmov.l &0x0,%fpcr # zero FPCR 1393 1394# 1395# copy, convert, and tag input argument 1396# 1397 lea FP_SRC(%a6),%a0 1398 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 1399 mov.l 0x8+0x4(%a6),0x4(%a0) 1400 mov.l 0x8+0x8(%a6),0x8(%a0) 1401 bsr.l tag # fetch operand type 1402 mov.b %d0,STAG(%a6) 1403 mov.b %d0,%d1 1404 1405 andi.l &0x00ff00ff,USER_FPSR(%a6) 1406 1407 clr.l %d0 1408 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1409 1410 tst.b %d1 1411 bne.b _L4_2x 1412 bsr.l setoxm1 # operand is a NORM 1413 bra.b _L4_6x 1414_L4_2x: 1415 cmpi.b %d1,&ZERO # is operand a ZERO? 1416 bne.b _L4_3x # no 1417 bsr.l src_zero # yes 1418 bra.b _L4_6x 1419_L4_3x: 1420 cmpi.b %d1,&INF # is operand an INF? 1421 bne.b _L4_4x # no 1422 bsr.l setoxm1i # yes 1423 bra.b _L4_6x 1424_L4_4x: 1425 cmpi.b %d1,&QNAN # is operand a QNAN? 1426 bne.b _L4_5x # no 1427 bsr.l src_qnan # yes 1428 bra.b _L4_6x 1429_L4_5x: 1430 bsr.l setoxm1d # operand is a DENORM 1431_L4_6x: 1432 1433# 1434# Result is now in FP0 1435# 1436 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1437 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1438 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1439 unlk %a6 1440 rts 1441 1442 1443######################################################################### 1444# MONADIC TEMPLATE # 1445######################################################################### 1446 global _ftanhs_ 1447_ftanhs_: 1448 link %a6,&-LOCAL_SIZE 1449 1450 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1451 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1452 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1453 1454 fmov.l &0x0,%fpcr # zero FPCR 1455 1456# 1457# copy, convert, and tag input argument 1458# 1459 fmov.s 0x8(%a6),%fp0 # load sgl input 1460 fmov.x %fp0,FP_SRC(%a6) 1461 lea FP_SRC(%a6),%a0 1462 bsr.l tag # fetch operand type 1463 mov.b %d0,STAG(%a6) 1464 mov.b %d0,%d1 1465 1466 andi.l &0x00ff00ff,USER_FPSR(%a6) 1467 1468 clr.l %d0 1469 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1470 1471 tst.b %d1 1472 bne.b _L5_2s 1473 bsr.l stanh # operand is a NORM 1474 bra.b _L5_6s 1475_L5_2s: 1476 cmpi.b %d1,&ZERO # is operand a ZERO? 1477 bne.b _L5_3s # no 1478 bsr.l src_zero # yes 1479 bra.b _L5_6s 1480_L5_3s: 1481 cmpi.b %d1,&INF # is operand an INF? 1482 bne.b _L5_4s # no 1483 bsr.l src_one # yes 1484 bra.b _L5_6s 1485_L5_4s: 1486 cmpi.b %d1,&QNAN # is operand a QNAN? 1487 bne.b _L5_5s # no 1488 bsr.l src_qnan # yes 1489 bra.b _L5_6s 1490_L5_5s: 1491 bsr.l stanhd # operand is a DENORM 1492_L5_6s: 1493 1494# 1495# Result is now in FP0 1496# 1497 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1498 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1499 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1500 unlk %a6 1501 rts 1502 1503 global _ftanhd_ 1504_ftanhd_: 1505 link %a6,&-LOCAL_SIZE 1506 1507 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1508 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1509 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1510 1511 fmov.l &0x0,%fpcr # zero FPCR 1512 1513# 1514# copy, convert, and tag input argument 1515# 1516 fmov.d 0x8(%a6),%fp0 # load dbl input 1517 fmov.x %fp0,FP_SRC(%a6) 1518 lea FP_SRC(%a6),%a0 1519 bsr.l tag # fetch operand type 1520 mov.b %d0,STAG(%a6) 1521 mov.b %d0,%d1 1522 1523 andi.l &0x00ff00ff,USER_FPSR(%a6) 1524 1525 clr.l %d0 1526 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1527 1528 mov.b %d1,STAG(%a6) 1529 tst.b %d1 1530 bne.b _L5_2d 1531 bsr.l stanh # operand is a NORM 1532 bra.b _L5_6d 1533_L5_2d: 1534 cmpi.b %d1,&ZERO # is operand a ZERO? 1535 bne.b _L5_3d # no 1536 bsr.l src_zero # yes 1537 bra.b _L5_6d 1538_L5_3d: 1539 cmpi.b %d1,&INF # is operand an INF? 1540 bne.b _L5_4d # no 1541 bsr.l src_one # yes 1542 bra.b _L5_6d 1543_L5_4d: 1544 cmpi.b %d1,&QNAN # is operand a QNAN? 1545 bne.b _L5_5d # no 1546 bsr.l src_qnan # yes 1547 bra.b _L5_6d 1548_L5_5d: 1549 bsr.l stanhd # operand is a DENORM 1550_L5_6d: 1551 1552# 1553# Result is now in FP0 1554# 1555 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1556 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1557 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1558 unlk %a6 1559 rts 1560 1561 global _ftanhx_ 1562_ftanhx_: 1563 link %a6,&-LOCAL_SIZE 1564 1565 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1566 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1567 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1568 1569 fmov.l &0x0,%fpcr # zero FPCR 1570 1571# 1572# copy, convert, and tag input argument 1573# 1574 lea FP_SRC(%a6),%a0 1575 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 1576 mov.l 0x8+0x4(%a6),0x4(%a0) 1577 mov.l 0x8+0x8(%a6),0x8(%a0) 1578 bsr.l tag # fetch operand type 1579 mov.b %d0,STAG(%a6) 1580 mov.b %d0,%d1 1581 1582 andi.l &0x00ff00ff,USER_FPSR(%a6) 1583 1584 clr.l %d0 1585 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1586 1587 tst.b %d1 1588 bne.b _L5_2x 1589 bsr.l stanh # operand is a NORM 1590 bra.b _L5_6x 1591_L5_2x: 1592 cmpi.b %d1,&ZERO # is operand a ZERO? 1593 bne.b _L5_3x # no 1594 bsr.l src_zero # yes 1595 bra.b _L5_6x 1596_L5_3x: 1597 cmpi.b %d1,&INF # is operand an INF? 1598 bne.b _L5_4x # no 1599 bsr.l src_one # yes 1600 bra.b _L5_6x 1601_L5_4x: 1602 cmpi.b %d1,&QNAN # is operand a QNAN? 1603 bne.b _L5_5x # no 1604 bsr.l src_qnan # yes 1605 bra.b _L5_6x 1606_L5_5x: 1607 bsr.l stanhd # operand is a DENORM 1608_L5_6x: 1609 1610# 1611# Result is now in FP0 1612# 1613 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1614 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1615 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1616 unlk %a6 1617 rts 1618 1619 1620######################################################################### 1621# MONADIC TEMPLATE # 1622######################################################################### 1623 global _fatans_ 1624_fatans_: 1625 link %a6,&-LOCAL_SIZE 1626 1627 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1628 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1629 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1630 1631 fmov.l &0x0,%fpcr # zero FPCR 1632 1633# 1634# copy, convert, and tag input argument 1635# 1636 fmov.s 0x8(%a6),%fp0 # load sgl input 1637 fmov.x %fp0,FP_SRC(%a6) 1638 lea FP_SRC(%a6),%a0 1639 bsr.l tag # fetch operand type 1640 mov.b %d0,STAG(%a6) 1641 mov.b %d0,%d1 1642 1643 andi.l &0x00ff00ff,USER_FPSR(%a6) 1644 1645 clr.l %d0 1646 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1647 1648 tst.b %d1 1649 bne.b _L6_2s 1650 bsr.l satan # operand is a NORM 1651 bra.b _L6_6s 1652_L6_2s: 1653 cmpi.b %d1,&ZERO # is operand a ZERO? 1654 bne.b _L6_3s # no 1655 bsr.l src_zero # yes 1656 bra.b _L6_6s 1657_L6_3s: 1658 cmpi.b %d1,&INF # is operand an INF? 1659 bne.b _L6_4s # no 1660 bsr.l spi_2 # yes 1661 bra.b _L6_6s 1662_L6_4s: 1663 cmpi.b %d1,&QNAN # is operand a QNAN? 1664 bne.b _L6_5s # no 1665 bsr.l src_qnan # yes 1666 bra.b _L6_6s 1667_L6_5s: 1668 bsr.l satand # operand is a DENORM 1669_L6_6s: 1670 1671# 1672# Result is now in FP0 1673# 1674 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1675 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1676 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1677 unlk %a6 1678 rts 1679 1680 global _fatand_ 1681_fatand_: 1682 link %a6,&-LOCAL_SIZE 1683 1684 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1685 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1686 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1687 1688 fmov.l &0x0,%fpcr # zero FPCR 1689 1690# 1691# copy, convert, and tag input argument 1692# 1693 fmov.d 0x8(%a6),%fp0 # load dbl input 1694 fmov.x %fp0,FP_SRC(%a6) 1695 lea FP_SRC(%a6),%a0 1696 bsr.l tag # fetch operand type 1697 mov.b %d0,STAG(%a6) 1698 mov.b %d0,%d1 1699 1700 andi.l &0x00ff00ff,USER_FPSR(%a6) 1701 1702 clr.l %d0 1703 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1704 1705 mov.b %d1,STAG(%a6) 1706 tst.b %d1 1707 bne.b _L6_2d 1708 bsr.l satan # operand is a NORM 1709 bra.b _L6_6d 1710_L6_2d: 1711 cmpi.b %d1,&ZERO # is operand a ZERO? 1712 bne.b _L6_3d # no 1713 bsr.l src_zero # yes 1714 bra.b _L6_6d 1715_L6_3d: 1716 cmpi.b %d1,&INF # is operand an INF? 1717 bne.b _L6_4d # no 1718 bsr.l spi_2 # yes 1719 bra.b _L6_6d 1720_L6_4d: 1721 cmpi.b %d1,&QNAN # is operand a QNAN? 1722 bne.b _L6_5d # no 1723 bsr.l src_qnan # yes 1724 bra.b _L6_6d 1725_L6_5d: 1726 bsr.l satand # operand is a DENORM 1727_L6_6d: 1728 1729# 1730# Result is now in FP0 1731# 1732 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1733 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1734 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1735 unlk %a6 1736 rts 1737 1738 global _fatanx_ 1739_fatanx_: 1740 link %a6,&-LOCAL_SIZE 1741 1742 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1743 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1744 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1745 1746 fmov.l &0x0,%fpcr # zero FPCR 1747 1748# 1749# copy, convert, and tag input argument 1750# 1751 lea FP_SRC(%a6),%a0 1752 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 1753 mov.l 0x8+0x4(%a6),0x4(%a0) 1754 mov.l 0x8+0x8(%a6),0x8(%a0) 1755 bsr.l tag # fetch operand type 1756 mov.b %d0,STAG(%a6) 1757 mov.b %d0,%d1 1758 1759 andi.l &0x00ff00ff,USER_FPSR(%a6) 1760 1761 clr.l %d0 1762 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1763 1764 tst.b %d1 1765 bne.b _L6_2x 1766 bsr.l satan # operand is a NORM 1767 bra.b _L6_6x 1768_L6_2x: 1769 cmpi.b %d1,&ZERO # is operand a ZERO? 1770 bne.b _L6_3x # no 1771 bsr.l src_zero # yes 1772 bra.b _L6_6x 1773_L6_3x: 1774 cmpi.b %d1,&INF # is operand an INF? 1775 bne.b _L6_4x # no 1776 bsr.l spi_2 # yes 1777 bra.b _L6_6x 1778_L6_4x: 1779 cmpi.b %d1,&QNAN # is operand a QNAN? 1780 bne.b _L6_5x # no 1781 bsr.l src_qnan # yes 1782 bra.b _L6_6x 1783_L6_5x: 1784 bsr.l satand # operand is a DENORM 1785_L6_6x: 1786 1787# 1788# Result is now in FP0 1789# 1790 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1791 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1792 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1793 unlk %a6 1794 rts 1795 1796 1797######################################################################### 1798# MONADIC TEMPLATE # 1799######################################################################### 1800 global _fasins_ 1801_fasins_: 1802 link %a6,&-LOCAL_SIZE 1803 1804 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1805 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1806 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1807 1808 fmov.l &0x0,%fpcr # zero FPCR 1809 1810# 1811# copy, convert, and tag input argument 1812# 1813 fmov.s 0x8(%a6),%fp0 # load sgl input 1814 fmov.x %fp0,FP_SRC(%a6) 1815 lea FP_SRC(%a6),%a0 1816 bsr.l tag # fetch operand type 1817 mov.b %d0,STAG(%a6) 1818 mov.b %d0,%d1 1819 1820 andi.l &0x00ff00ff,USER_FPSR(%a6) 1821 1822 clr.l %d0 1823 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1824 1825 tst.b %d1 1826 bne.b _L7_2s 1827 bsr.l sasin # operand is a NORM 1828 bra.b _L7_6s 1829_L7_2s: 1830 cmpi.b %d1,&ZERO # is operand a ZERO? 1831 bne.b _L7_3s # no 1832 bsr.l src_zero # yes 1833 bra.b _L7_6s 1834_L7_3s: 1835 cmpi.b %d1,&INF # is operand an INF? 1836 bne.b _L7_4s # no 1837 bsr.l t_operr # yes 1838 bra.b _L7_6s 1839_L7_4s: 1840 cmpi.b %d1,&QNAN # is operand a QNAN? 1841 bne.b _L7_5s # no 1842 bsr.l src_qnan # yes 1843 bra.b _L7_6s 1844_L7_5s: 1845 bsr.l sasind # operand is a DENORM 1846_L7_6s: 1847 1848# 1849# Result is now in FP0 1850# 1851 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1852 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1853 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1854 unlk %a6 1855 rts 1856 1857 global _fasind_ 1858_fasind_: 1859 link %a6,&-LOCAL_SIZE 1860 1861 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1862 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1863 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1864 1865 fmov.l &0x0,%fpcr # zero FPCR 1866 1867# 1868# copy, convert, and tag input argument 1869# 1870 fmov.d 0x8(%a6),%fp0 # load dbl input 1871 fmov.x %fp0,FP_SRC(%a6) 1872 lea FP_SRC(%a6),%a0 1873 bsr.l tag # fetch operand type 1874 mov.b %d0,STAG(%a6) 1875 mov.b %d0,%d1 1876 1877 andi.l &0x00ff00ff,USER_FPSR(%a6) 1878 1879 clr.l %d0 1880 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1881 1882 mov.b %d1,STAG(%a6) 1883 tst.b %d1 1884 bne.b _L7_2d 1885 bsr.l sasin # operand is a NORM 1886 bra.b _L7_6d 1887_L7_2d: 1888 cmpi.b %d1,&ZERO # is operand a ZERO? 1889 bne.b _L7_3d # no 1890 bsr.l src_zero # yes 1891 bra.b _L7_6d 1892_L7_3d: 1893 cmpi.b %d1,&INF # is operand an INF? 1894 bne.b _L7_4d # no 1895 bsr.l t_operr # yes 1896 bra.b _L7_6d 1897_L7_4d: 1898 cmpi.b %d1,&QNAN # is operand a QNAN? 1899 bne.b _L7_5d # no 1900 bsr.l src_qnan # yes 1901 bra.b _L7_6d 1902_L7_5d: 1903 bsr.l sasind # operand is a DENORM 1904_L7_6d: 1905 1906# 1907# Result is now in FP0 1908# 1909 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1910 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1911 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1912 unlk %a6 1913 rts 1914 1915 global _fasinx_ 1916_fasinx_: 1917 link %a6,&-LOCAL_SIZE 1918 1919 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1920 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1921 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1922 1923 fmov.l &0x0,%fpcr # zero FPCR 1924 1925# 1926# copy, convert, and tag input argument 1927# 1928 lea FP_SRC(%a6),%a0 1929 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 1930 mov.l 0x8+0x4(%a6),0x4(%a0) 1931 mov.l 0x8+0x8(%a6),0x8(%a0) 1932 bsr.l tag # fetch operand type 1933 mov.b %d0,STAG(%a6) 1934 mov.b %d0,%d1 1935 1936 andi.l &0x00ff00ff,USER_FPSR(%a6) 1937 1938 clr.l %d0 1939 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 1940 1941 tst.b %d1 1942 bne.b _L7_2x 1943 bsr.l sasin # operand is a NORM 1944 bra.b _L7_6x 1945_L7_2x: 1946 cmpi.b %d1,&ZERO # is operand a ZERO? 1947 bne.b _L7_3x # no 1948 bsr.l src_zero # yes 1949 bra.b _L7_6x 1950_L7_3x: 1951 cmpi.b %d1,&INF # is operand an INF? 1952 bne.b _L7_4x # no 1953 bsr.l t_operr # yes 1954 bra.b _L7_6x 1955_L7_4x: 1956 cmpi.b %d1,&QNAN # is operand a QNAN? 1957 bne.b _L7_5x # no 1958 bsr.l src_qnan # yes 1959 bra.b _L7_6x 1960_L7_5x: 1961 bsr.l sasind # operand is a DENORM 1962_L7_6x: 1963 1964# 1965# Result is now in FP0 1966# 1967 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1968 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 1969 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 1970 unlk %a6 1971 rts 1972 1973 1974######################################################################### 1975# MONADIC TEMPLATE # 1976######################################################################### 1977 global _fatanhs_ 1978_fatanhs_: 1979 link %a6,&-LOCAL_SIZE 1980 1981 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1982 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 1983 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 1984 1985 fmov.l &0x0,%fpcr # zero FPCR 1986 1987# 1988# copy, convert, and tag input argument 1989# 1990 fmov.s 0x8(%a6),%fp0 # load sgl input 1991 fmov.x %fp0,FP_SRC(%a6) 1992 lea FP_SRC(%a6),%a0 1993 bsr.l tag # fetch operand type 1994 mov.b %d0,STAG(%a6) 1995 mov.b %d0,%d1 1996 1997 andi.l &0x00ff00ff,USER_FPSR(%a6) 1998 1999 clr.l %d0 2000 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2001 2002 tst.b %d1 2003 bne.b _L8_2s 2004 bsr.l satanh # operand is a NORM 2005 bra.b _L8_6s 2006_L8_2s: 2007 cmpi.b %d1,&ZERO # is operand a ZERO? 2008 bne.b _L8_3s # no 2009 bsr.l src_zero # yes 2010 bra.b _L8_6s 2011_L8_3s: 2012 cmpi.b %d1,&INF # is operand an INF? 2013 bne.b _L8_4s # no 2014 bsr.l t_operr # yes 2015 bra.b _L8_6s 2016_L8_4s: 2017 cmpi.b %d1,&QNAN # is operand a QNAN? 2018 bne.b _L8_5s # no 2019 bsr.l src_qnan # yes 2020 bra.b _L8_6s 2021_L8_5s: 2022 bsr.l satanhd # operand is a DENORM 2023_L8_6s: 2024 2025# 2026# Result is now in FP0 2027# 2028 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2029 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2030 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2031 unlk %a6 2032 rts 2033 2034 global _fatanhd_ 2035_fatanhd_: 2036 link %a6,&-LOCAL_SIZE 2037 2038 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2039 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2040 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2041 2042 fmov.l &0x0,%fpcr # zero FPCR 2043 2044# 2045# copy, convert, and tag input argument 2046# 2047 fmov.d 0x8(%a6),%fp0 # load dbl input 2048 fmov.x %fp0,FP_SRC(%a6) 2049 lea FP_SRC(%a6),%a0 2050 bsr.l tag # fetch operand type 2051 mov.b %d0,STAG(%a6) 2052 mov.b %d0,%d1 2053 2054 andi.l &0x00ff00ff,USER_FPSR(%a6) 2055 2056 clr.l %d0 2057 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2058 2059 mov.b %d1,STAG(%a6) 2060 tst.b %d1 2061 bne.b _L8_2d 2062 bsr.l satanh # operand is a NORM 2063 bra.b _L8_6d 2064_L8_2d: 2065 cmpi.b %d1,&ZERO # is operand a ZERO? 2066 bne.b _L8_3d # no 2067 bsr.l src_zero # yes 2068 bra.b _L8_6d 2069_L8_3d: 2070 cmpi.b %d1,&INF # is operand an INF? 2071 bne.b _L8_4d # no 2072 bsr.l t_operr # yes 2073 bra.b _L8_6d 2074_L8_4d: 2075 cmpi.b %d1,&QNAN # is operand a QNAN? 2076 bne.b _L8_5d # no 2077 bsr.l src_qnan # yes 2078 bra.b _L8_6d 2079_L8_5d: 2080 bsr.l satanhd # operand is a DENORM 2081_L8_6d: 2082 2083# 2084# Result is now in FP0 2085# 2086 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2087 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2088 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2089 unlk %a6 2090 rts 2091 2092 global _fatanhx_ 2093_fatanhx_: 2094 link %a6,&-LOCAL_SIZE 2095 2096 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2097 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2098 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2099 2100 fmov.l &0x0,%fpcr # zero FPCR 2101 2102# 2103# copy, convert, and tag input argument 2104# 2105 lea FP_SRC(%a6),%a0 2106 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 2107 mov.l 0x8+0x4(%a6),0x4(%a0) 2108 mov.l 0x8+0x8(%a6),0x8(%a0) 2109 bsr.l tag # fetch operand type 2110 mov.b %d0,STAG(%a6) 2111 mov.b %d0,%d1 2112 2113 andi.l &0x00ff00ff,USER_FPSR(%a6) 2114 2115 clr.l %d0 2116 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2117 2118 tst.b %d1 2119 bne.b _L8_2x 2120 bsr.l satanh # operand is a NORM 2121 bra.b _L8_6x 2122_L8_2x: 2123 cmpi.b %d1,&ZERO # is operand a ZERO? 2124 bne.b _L8_3x # no 2125 bsr.l src_zero # yes 2126 bra.b _L8_6x 2127_L8_3x: 2128 cmpi.b %d1,&INF # is operand an INF? 2129 bne.b _L8_4x # no 2130 bsr.l t_operr # yes 2131 bra.b _L8_6x 2132_L8_4x: 2133 cmpi.b %d1,&QNAN # is operand a QNAN? 2134 bne.b _L8_5x # no 2135 bsr.l src_qnan # yes 2136 bra.b _L8_6x 2137_L8_5x: 2138 bsr.l satanhd # operand is a DENORM 2139_L8_6x: 2140 2141# 2142# Result is now in FP0 2143# 2144 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2145 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2146 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2147 unlk %a6 2148 rts 2149 2150 2151######################################################################### 2152# MONADIC TEMPLATE # 2153######################################################################### 2154 global _ftans_ 2155_ftans_: 2156 link %a6,&-LOCAL_SIZE 2157 2158 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2159 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2160 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2161 2162 fmov.l &0x0,%fpcr # zero FPCR 2163 2164# 2165# copy, convert, and tag input argument 2166# 2167 fmov.s 0x8(%a6),%fp0 # load sgl input 2168 fmov.x %fp0,FP_SRC(%a6) 2169 lea FP_SRC(%a6),%a0 2170 bsr.l tag # fetch operand type 2171 mov.b %d0,STAG(%a6) 2172 mov.b %d0,%d1 2173 2174 andi.l &0x00ff00ff,USER_FPSR(%a6) 2175 2176 clr.l %d0 2177 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2178 2179 tst.b %d1 2180 bne.b _L9_2s 2181 bsr.l stan # operand is a NORM 2182 bra.b _L9_6s 2183_L9_2s: 2184 cmpi.b %d1,&ZERO # is operand a ZERO? 2185 bne.b _L9_3s # no 2186 bsr.l src_zero # yes 2187 bra.b _L9_6s 2188_L9_3s: 2189 cmpi.b %d1,&INF # is operand an INF? 2190 bne.b _L9_4s # no 2191 bsr.l t_operr # yes 2192 bra.b _L9_6s 2193_L9_4s: 2194 cmpi.b %d1,&QNAN # is operand a QNAN? 2195 bne.b _L9_5s # no 2196 bsr.l src_qnan # yes 2197 bra.b _L9_6s 2198_L9_5s: 2199 bsr.l stand # operand is a DENORM 2200_L9_6s: 2201 2202# 2203# Result is now in FP0 2204# 2205 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2206 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2207 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2208 unlk %a6 2209 rts 2210 2211 global _ftand_ 2212_ftand_: 2213 link %a6,&-LOCAL_SIZE 2214 2215 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2216 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2217 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2218 2219 fmov.l &0x0,%fpcr # zero FPCR 2220 2221# 2222# copy, convert, and tag input argument 2223# 2224 fmov.d 0x8(%a6),%fp0 # load dbl input 2225 fmov.x %fp0,FP_SRC(%a6) 2226 lea FP_SRC(%a6),%a0 2227 bsr.l tag # fetch operand type 2228 mov.b %d0,STAG(%a6) 2229 mov.b %d0,%d1 2230 2231 andi.l &0x00ff00ff,USER_FPSR(%a6) 2232 2233 clr.l %d0 2234 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2235 2236 mov.b %d1,STAG(%a6) 2237 tst.b %d1 2238 bne.b _L9_2d 2239 bsr.l stan # operand is a NORM 2240 bra.b _L9_6d 2241_L9_2d: 2242 cmpi.b %d1,&ZERO # is operand a ZERO? 2243 bne.b _L9_3d # no 2244 bsr.l src_zero # yes 2245 bra.b _L9_6d 2246_L9_3d: 2247 cmpi.b %d1,&INF # is operand an INF? 2248 bne.b _L9_4d # no 2249 bsr.l t_operr # yes 2250 bra.b _L9_6d 2251_L9_4d: 2252 cmpi.b %d1,&QNAN # is operand a QNAN? 2253 bne.b _L9_5d # no 2254 bsr.l src_qnan # yes 2255 bra.b _L9_6d 2256_L9_5d: 2257 bsr.l stand # operand is a DENORM 2258_L9_6d: 2259 2260# 2261# Result is now in FP0 2262# 2263 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2264 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2265 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2266 unlk %a6 2267 rts 2268 2269 global _ftanx_ 2270_ftanx_: 2271 link %a6,&-LOCAL_SIZE 2272 2273 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2274 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2275 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2276 2277 fmov.l &0x0,%fpcr # zero FPCR 2278 2279# 2280# copy, convert, and tag input argument 2281# 2282 lea FP_SRC(%a6),%a0 2283 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 2284 mov.l 0x8+0x4(%a6),0x4(%a0) 2285 mov.l 0x8+0x8(%a6),0x8(%a0) 2286 bsr.l tag # fetch operand type 2287 mov.b %d0,STAG(%a6) 2288 mov.b %d0,%d1 2289 2290 andi.l &0x00ff00ff,USER_FPSR(%a6) 2291 2292 clr.l %d0 2293 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2294 2295 tst.b %d1 2296 bne.b _L9_2x 2297 bsr.l stan # operand is a NORM 2298 bra.b _L9_6x 2299_L9_2x: 2300 cmpi.b %d1,&ZERO # is operand a ZERO? 2301 bne.b _L9_3x # no 2302 bsr.l src_zero # yes 2303 bra.b _L9_6x 2304_L9_3x: 2305 cmpi.b %d1,&INF # is operand an INF? 2306 bne.b _L9_4x # no 2307 bsr.l t_operr # yes 2308 bra.b _L9_6x 2309_L9_4x: 2310 cmpi.b %d1,&QNAN # is operand a QNAN? 2311 bne.b _L9_5x # no 2312 bsr.l src_qnan # yes 2313 bra.b _L9_6x 2314_L9_5x: 2315 bsr.l stand # operand is a DENORM 2316_L9_6x: 2317 2318# 2319# Result is now in FP0 2320# 2321 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2322 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2323 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2324 unlk %a6 2325 rts 2326 2327 2328######################################################################### 2329# MONADIC TEMPLATE # 2330######################################################################### 2331 global _fetoxs_ 2332_fetoxs_: 2333 link %a6,&-LOCAL_SIZE 2334 2335 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2336 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2337 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2338 2339 fmov.l &0x0,%fpcr # zero FPCR 2340 2341# 2342# copy, convert, and tag input argument 2343# 2344 fmov.s 0x8(%a6),%fp0 # load sgl input 2345 fmov.x %fp0,FP_SRC(%a6) 2346 lea FP_SRC(%a6),%a0 2347 bsr.l tag # fetch operand type 2348 mov.b %d0,STAG(%a6) 2349 mov.b %d0,%d1 2350 2351 andi.l &0x00ff00ff,USER_FPSR(%a6) 2352 2353 clr.l %d0 2354 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2355 2356 tst.b %d1 2357 bne.b _L10_2s 2358 bsr.l setox # operand is a NORM 2359 bra.b _L10_6s 2360_L10_2s: 2361 cmpi.b %d1,&ZERO # is operand a ZERO? 2362 bne.b _L10_3s # no 2363 bsr.l ld_pone # yes 2364 bra.b _L10_6s 2365_L10_3s: 2366 cmpi.b %d1,&INF # is operand an INF? 2367 bne.b _L10_4s # no 2368 bsr.l szr_inf # yes 2369 bra.b _L10_6s 2370_L10_4s: 2371 cmpi.b %d1,&QNAN # is operand a QNAN? 2372 bne.b _L10_5s # no 2373 bsr.l src_qnan # yes 2374 bra.b _L10_6s 2375_L10_5s: 2376 bsr.l setoxd # operand is a DENORM 2377_L10_6s: 2378 2379# 2380# Result is now in FP0 2381# 2382 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2383 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2384 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2385 unlk %a6 2386 rts 2387 2388 global _fetoxd_ 2389_fetoxd_: 2390 link %a6,&-LOCAL_SIZE 2391 2392 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2393 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2394 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2395 2396 fmov.l &0x0,%fpcr # zero FPCR 2397 2398# 2399# copy, convert, and tag input argument 2400# 2401 fmov.d 0x8(%a6),%fp0 # load dbl input 2402 fmov.x %fp0,FP_SRC(%a6) 2403 lea FP_SRC(%a6),%a0 2404 bsr.l tag # fetch operand type 2405 mov.b %d0,STAG(%a6) 2406 mov.b %d0,%d1 2407 2408 andi.l &0x00ff00ff,USER_FPSR(%a6) 2409 2410 clr.l %d0 2411 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2412 2413 mov.b %d1,STAG(%a6) 2414 tst.b %d1 2415 bne.b _L10_2d 2416 bsr.l setox # operand is a NORM 2417 bra.b _L10_6d 2418_L10_2d: 2419 cmpi.b %d1,&ZERO # is operand a ZERO? 2420 bne.b _L10_3d # no 2421 bsr.l ld_pone # yes 2422 bra.b _L10_6d 2423_L10_3d: 2424 cmpi.b %d1,&INF # is operand an INF? 2425 bne.b _L10_4d # no 2426 bsr.l szr_inf # yes 2427 bra.b _L10_6d 2428_L10_4d: 2429 cmpi.b %d1,&QNAN # is operand a QNAN? 2430 bne.b _L10_5d # no 2431 bsr.l src_qnan # yes 2432 bra.b _L10_6d 2433_L10_5d: 2434 bsr.l setoxd # operand is a DENORM 2435_L10_6d: 2436 2437# 2438# Result is now in FP0 2439# 2440 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2441 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2442 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2443 unlk %a6 2444 rts 2445 2446 global _fetoxx_ 2447_fetoxx_: 2448 link %a6,&-LOCAL_SIZE 2449 2450 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2451 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2452 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2453 2454 fmov.l &0x0,%fpcr # zero FPCR 2455 2456# 2457# copy, convert, and tag input argument 2458# 2459 lea FP_SRC(%a6),%a0 2460 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 2461 mov.l 0x8+0x4(%a6),0x4(%a0) 2462 mov.l 0x8+0x8(%a6),0x8(%a0) 2463 bsr.l tag # fetch operand type 2464 mov.b %d0,STAG(%a6) 2465 mov.b %d0,%d1 2466 2467 andi.l &0x00ff00ff,USER_FPSR(%a6) 2468 2469 clr.l %d0 2470 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2471 2472 tst.b %d1 2473 bne.b _L10_2x 2474 bsr.l setox # operand is a NORM 2475 bra.b _L10_6x 2476_L10_2x: 2477 cmpi.b %d1,&ZERO # is operand a ZERO? 2478 bne.b _L10_3x # no 2479 bsr.l ld_pone # yes 2480 bra.b _L10_6x 2481_L10_3x: 2482 cmpi.b %d1,&INF # is operand an INF? 2483 bne.b _L10_4x # no 2484 bsr.l szr_inf # yes 2485 bra.b _L10_6x 2486_L10_4x: 2487 cmpi.b %d1,&QNAN # is operand a QNAN? 2488 bne.b _L10_5x # no 2489 bsr.l src_qnan # yes 2490 bra.b _L10_6x 2491_L10_5x: 2492 bsr.l setoxd # operand is a DENORM 2493_L10_6x: 2494 2495# 2496# Result is now in FP0 2497# 2498 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2499 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2500 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2501 unlk %a6 2502 rts 2503 2504 2505######################################################################### 2506# MONADIC TEMPLATE # 2507######################################################################### 2508 global _ftwotoxs_ 2509_ftwotoxs_: 2510 link %a6,&-LOCAL_SIZE 2511 2512 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2513 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2514 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2515 2516 fmov.l &0x0,%fpcr # zero FPCR 2517 2518# 2519# copy, convert, and tag input argument 2520# 2521 fmov.s 0x8(%a6),%fp0 # load sgl input 2522 fmov.x %fp0,FP_SRC(%a6) 2523 lea FP_SRC(%a6),%a0 2524 bsr.l tag # fetch operand type 2525 mov.b %d0,STAG(%a6) 2526 mov.b %d0,%d1 2527 2528 andi.l &0x00ff00ff,USER_FPSR(%a6) 2529 2530 clr.l %d0 2531 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2532 2533 tst.b %d1 2534 bne.b _L11_2s 2535 bsr.l stwotox # operand is a NORM 2536 bra.b _L11_6s 2537_L11_2s: 2538 cmpi.b %d1,&ZERO # is operand a ZERO? 2539 bne.b _L11_3s # no 2540 bsr.l ld_pone # yes 2541 bra.b _L11_6s 2542_L11_3s: 2543 cmpi.b %d1,&INF # is operand an INF? 2544 bne.b _L11_4s # no 2545 bsr.l szr_inf # yes 2546 bra.b _L11_6s 2547_L11_4s: 2548 cmpi.b %d1,&QNAN # is operand a QNAN? 2549 bne.b _L11_5s # no 2550 bsr.l src_qnan # yes 2551 bra.b _L11_6s 2552_L11_5s: 2553 bsr.l stwotoxd # operand is a DENORM 2554_L11_6s: 2555 2556# 2557# Result is now in FP0 2558# 2559 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2560 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2561 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2562 unlk %a6 2563 rts 2564 2565 global _ftwotoxd_ 2566_ftwotoxd_: 2567 link %a6,&-LOCAL_SIZE 2568 2569 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2570 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2571 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2572 2573 fmov.l &0x0,%fpcr # zero FPCR 2574 2575# 2576# copy, convert, and tag input argument 2577# 2578 fmov.d 0x8(%a6),%fp0 # load dbl input 2579 fmov.x %fp0,FP_SRC(%a6) 2580 lea FP_SRC(%a6),%a0 2581 bsr.l tag # fetch operand type 2582 mov.b %d0,STAG(%a6) 2583 mov.b %d0,%d1 2584 2585 andi.l &0x00ff00ff,USER_FPSR(%a6) 2586 2587 clr.l %d0 2588 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2589 2590 mov.b %d1,STAG(%a6) 2591 tst.b %d1 2592 bne.b _L11_2d 2593 bsr.l stwotox # operand is a NORM 2594 bra.b _L11_6d 2595_L11_2d: 2596 cmpi.b %d1,&ZERO # is operand a ZERO? 2597 bne.b _L11_3d # no 2598 bsr.l ld_pone # yes 2599 bra.b _L11_6d 2600_L11_3d: 2601 cmpi.b %d1,&INF # is operand an INF? 2602 bne.b _L11_4d # no 2603 bsr.l szr_inf # yes 2604 bra.b _L11_6d 2605_L11_4d: 2606 cmpi.b %d1,&QNAN # is operand a QNAN? 2607 bne.b _L11_5d # no 2608 bsr.l src_qnan # yes 2609 bra.b _L11_6d 2610_L11_5d: 2611 bsr.l stwotoxd # operand is a DENORM 2612_L11_6d: 2613 2614# 2615# Result is now in FP0 2616# 2617 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2618 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2619 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2620 unlk %a6 2621 rts 2622 2623 global _ftwotoxx_ 2624_ftwotoxx_: 2625 link %a6,&-LOCAL_SIZE 2626 2627 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2628 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2629 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2630 2631 fmov.l &0x0,%fpcr # zero FPCR 2632 2633# 2634# copy, convert, and tag input argument 2635# 2636 lea FP_SRC(%a6),%a0 2637 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 2638 mov.l 0x8+0x4(%a6),0x4(%a0) 2639 mov.l 0x8+0x8(%a6),0x8(%a0) 2640 bsr.l tag # fetch operand type 2641 mov.b %d0,STAG(%a6) 2642 mov.b %d0,%d1 2643 2644 andi.l &0x00ff00ff,USER_FPSR(%a6) 2645 2646 clr.l %d0 2647 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2648 2649 tst.b %d1 2650 bne.b _L11_2x 2651 bsr.l stwotox # operand is a NORM 2652 bra.b _L11_6x 2653_L11_2x: 2654 cmpi.b %d1,&ZERO # is operand a ZERO? 2655 bne.b _L11_3x # no 2656 bsr.l ld_pone # yes 2657 bra.b _L11_6x 2658_L11_3x: 2659 cmpi.b %d1,&INF # is operand an INF? 2660 bne.b _L11_4x # no 2661 bsr.l szr_inf # yes 2662 bra.b _L11_6x 2663_L11_4x: 2664 cmpi.b %d1,&QNAN # is operand a QNAN? 2665 bne.b _L11_5x # no 2666 bsr.l src_qnan # yes 2667 bra.b _L11_6x 2668_L11_5x: 2669 bsr.l stwotoxd # operand is a DENORM 2670_L11_6x: 2671 2672# 2673# Result is now in FP0 2674# 2675 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2676 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2677 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2678 unlk %a6 2679 rts 2680 2681 2682######################################################################### 2683# MONADIC TEMPLATE # 2684######################################################################### 2685 global _ftentoxs_ 2686_ftentoxs_: 2687 link %a6,&-LOCAL_SIZE 2688 2689 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2690 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2691 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2692 2693 fmov.l &0x0,%fpcr # zero FPCR 2694 2695# 2696# copy, convert, and tag input argument 2697# 2698 fmov.s 0x8(%a6),%fp0 # load sgl input 2699 fmov.x %fp0,FP_SRC(%a6) 2700 lea FP_SRC(%a6),%a0 2701 bsr.l tag # fetch operand type 2702 mov.b %d0,STAG(%a6) 2703 mov.b %d0,%d1 2704 2705 andi.l &0x00ff00ff,USER_FPSR(%a6) 2706 2707 clr.l %d0 2708 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2709 2710 tst.b %d1 2711 bne.b _L12_2s 2712 bsr.l stentox # operand is a NORM 2713 bra.b _L12_6s 2714_L12_2s: 2715 cmpi.b %d1,&ZERO # is operand a ZERO? 2716 bne.b _L12_3s # no 2717 bsr.l ld_pone # yes 2718 bra.b _L12_6s 2719_L12_3s: 2720 cmpi.b %d1,&INF # is operand an INF? 2721 bne.b _L12_4s # no 2722 bsr.l szr_inf # yes 2723 bra.b _L12_6s 2724_L12_4s: 2725 cmpi.b %d1,&QNAN # is operand a QNAN? 2726 bne.b _L12_5s # no 2727 bsr.l src_qnan # yes 2728 bra.b _L12_6s 2729_L12_5s: 2730 bsr.l stentoxd # operand is a DENORM 2731_L12_6s: 2732 2733# 2734# Result is now in FP0 2735# 2736 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2737 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2738 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2739 unlk %a6 2740 rts 2741 2742 global _ftentoxd_ 2743_ftentoxd_: 2744 link %a6,&-LOCAL_SIZE 2745 2746 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2747 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2748 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2749 2750 fmov.l &0x0,%fpcr # zero FPCR 2751 2752# 2753# copy, convert, and tag input argument 2754# 2755 fmov.d 0x8(%a6),%fp0 # load dbl input 2756 fmov.x %fp0,FP_SRC(%a6) 2757 lea FP_SRC(%a6),%a0 2758 bsr.l tag # fetch operand type 2759 mov.b %d0,STAG(%a6) 2760 mov.b %d0,%d1 2761 2762 andi.l &0x00ff00ff,USER_FPSR(%a6) 2763 2764 clr.l %d0 2765 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2766 2767 mov.b %d1,STAG(%a6) 2768 tst.b %d1 2769 bne.b _L12_2d 2770 bsr.l stentox # operand is a NORM 2771 bra.b _L12_6d 2772_L12_2d: 2773 cmpi.b %d1,&ZERO # is operand a ZERO? 2774 bne.b _L12_3d # no 2775 bsr.l ld_pone # yes 2776 bra.b _L12_6d 2777_L12_3d: 2778 cmpi.b %d1,&INF # is operand an INF? 2779 bne.b _L12_4d # no 2780 bsr.l szr_inf # yes 2781 bra.b _L12_6d 2782_L12_4d: 2783 cmpi.b %d1,&QNAN # is operand a QNAN? 2784 bne.b _L12_5d # no 2785 bsr.l src_qnan # yes 2786 bra.b _L12_6d 2787_L12_5d: 2788 bsr.l stentoxd # operand is a DENORM 2789_L12_6d: 2790 2791# 2792# Result is now in FP0 2793# 2794 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2795 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2796 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2797 unlk %a6 2798 rts 2799 2800 global _ftentoxx_ 2801_ftentoxx_: 2802 link %a6,&-LOCAL_SIZE 2803 2804 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2805 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2806 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2807 2808 fmov.l &0x0,%fpcr # zero FPCR 2809 2810# 2811# copy, convert, and tag input argument 2812# 2813 lea FP_SRC(%a6),%a0 2814 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 2815 mov.l 0x8+0x4(%a6),0x4(%a0) 2816 mov.l 0x8+0x8(%a6),0x8(%a0) 2817 bsr.l tag # fetch operand type 2818 mov.b %d0,STAG(%a6) 2819 mov.b %d0,%d1 2820 2821 andi.l &0x00ff00ff,USER_FPSR(%a6) 2822 2823 clr.l %d0 2824 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2825 2826 tst.b %d1 2827 bne.b _L12_2x 2828 bsr.l stentox # operand is a NORM 2829 bra.b _L12_6x 2830_L12_2x: 2831 cmpi.b %d1,&ZERO # is operand a ZERO? 2832 bne.b _L12_3x # no 2833 bsr.l ld_pone # yes 2834 bra.b _L12_6x 2835_L12_3x: 2836 cmpi.b %d1,&INF # is operand an INF? 2837 bne.b _L12_4x # no 2838 bsr.l szr_inf # yes 2839 bra.b _L12_6x 2840_L12_4x: 2841 cmpi.b %d1,&QNAN # is operand a QNAN? 2842 bne.b _L12_5x # no 2843 bsr.l src_qnan # yes 2844 bra.b _L12_6x 2845_L12_5x: 2846 bsr.l stentoxd # operand is a DENORM 2847_L12_6x: 2848 2849# 2850# Result is now in FP0 2851# 2852 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2853 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2854 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2855 unlk %a6 2856 rts 2857 2858 2859######################################################################### 2860# MONADIC TEMPLATE # 2861######################################################################### 2862 global _flogns_ 2863_flogns_: 2864 link %a6,&-LOCAL_SIZE 2865 2866 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2867 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2868 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2869 2870 fmov.l &0x0,%fpcr # zero FPCR 2871 2872# 2873# copy, convert, and tag input argument 2874# 2875 fmov.s 0x8(%a6),%fp0 # load sgl input 2876 fmov.x %fp0,FP_SRC(%a6) 2877 lea FP_SRC(%a6),%a0 2878 bsr.l tag # fetch operand type 2879 mov.b %d0,STAG(%a6) 2880 mov.b %d0,%d1 2881 2882 andi.l &0x00ff00ff,USER_FPSR(%a6) 2883 2884 clr.l %d0 2885 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2886 2887 tst.b %d1 2888 bne.b _L13_2s 2889 bsr.l slogn # operand is a NORM 2890 bra.b _L13_6s 2891_L13_2s: 2892 cmpi.b %d1,&ZERO # is operand a ZERO? 2893 bne.b _L13_3s # no 2894 bsr.l t_dz2 # yes 2895 bra.b _L13_6s 2896_L13_3s: 2897 cmpi.b %d1,&INF # is operand an INF? 2898 bne.b _L13_4s # no 2899 bsr.l sopr_inf # yes 2900 bra.b _L13_6s 2901_L13_4s: 2902 cmpi.b %d1,&QNAN # is operand a QNAN? 2903 bne.b _L13_5s # no 2904 bsr.l src_qnan # yes 2905 bra.b _L13_6s 2906_L13_5s: 2907 bsr.l slognd # operand is a DENORM 2908_L13_6s: 2909 2910# 2911# Result is now in FP0 2912# 2913 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2914 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2915 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2916 unlk %a6 2917 rts 2918 2919 global _flognd_ 2920_flognd_: 2921 link %a6,&-LOCAL_SIZE 2922 2923 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2924 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2925 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2926 2927 fmov.l &0x0,%fpcr # zero FPCR 2928 2929# 2930# copy, convert, and tag input argument 2931# 2932 fmov.d 0x8(%a6),%fp0 # load dbl input 2933 fmov.x %fp0,FP_SRC(%a6) 2934 lea FP_SRC(%a6),%a0 2935 bsr.l tag # fetch operand type 2936 mov.b %d0,STAG(%a6) 2937 mov.b %d0,%d1 2938 2939 andi.l &0x00ff00ff,USER_FPSR(%a6) 2940 2941 clr.l %d0 2942 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 2943 2944 mov.b %d1,STAG(%a6) 2945 tst.b %d1 2946 bne.b _L13_2d 2947 bsr.l slogn # operand is a NORM 2948 bra.b _L13_6d 2949_L13_2d: 2950 cmpi.b %d1,&ZERO # is operand a ZERO? 2951 bne.b _L13_3d # no 2952 bsr.l t_dz2 # yes 2953 bra.b _L13_6d 2954_L13_3d: 2955 cmpi.b %d1,&INF # is operand an INF? 2956 bne.b _L13_4d # no 2957 bsr.l sopr_inf # yes 2958 bra.b _L13_6d 2959_L13_4d: 2960 cmpi.b %d1,&QNAN # is operand a QNAN? 2961 bne.b _L13_5d # no 2962 bsr.l src_qnan # yes 2963 bra.b _L13_6d 2964_L13_5d: 2965 bsr.l slognd # operand is a DENORM 2966_L13_6d: 2967 2968# 2969# Result is now in FP0 2970# 2971 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2972 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 2973 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 2974 unlk %a6 2975 rts 2976 2977 global _flognx_ 2978_flognx_: 2979 link %a6,&-LOCAL_SIZE 2980 2981 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2982 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 2983 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 2984 2985 fmov.l &0x0,%fpcr # zero FPCR 2986 2987# 2988# copy, convert, and tag input argument 2989# 2990 lea FP_SRC(%a6),%a0 2991 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 2992 mov.l 0x8+0x4(%a6),0x4(%a0) 2993 mov.l 0x8+0x8(%a6),0x8(%a0) 2994 bsr.l tag # fetch operand type 2995 mov.b %d0,STAG(%a6) 2996 mov.b %d0,%d1 2997 2998 andi.l &0x00ff00ff,USER_FPSR(%a6) 2999 3000 clr.l %d0 3001 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3002 3003 tst.b %d1 3004 bne.b _L13_2x 3005 bsr.l slogn # operand is a NORM 3006 bra.b _L13_6x 3007_L13_2x: 3008 cmpi.b %d1,&ZERO # is operand a ZERO? 3009 bne.b _L13_3x # no 3010 bsr.l t_dz2 # yes 3011 bra.b _L13_6x 3012_L13_3x: 3013 cmpi.b %d1,&INF # is operand an INF? 3014 bne.b _L13_4x # no 3015 bsr.l sopr_inf # yes 3016 bra.b _L13_6x 3017_L13_4x: 3018 cmpi.b %d1,&QNAN # is operand a QNAN? 3019 bne.b _L13_5x # no 3020 bsr.l src_qnan # yes 3021 bra.b _L13_6x 3022_L13_5x: 3023 bsr.l slognd # operand is a DENORM 3024_L13_6x: 3025 3026# 3027# Result is now in FP0 3028# 3029 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3030 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3031 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3032 unlk %a6 3033 rts 3034 3035 3036######################################################################### 3037# MONADIC TEMPLATE # 3038######################################################################### 3039 global _flog10s_ 3040_flog10s_: 3041 link %a6,&-LOCAL_SIZE 3042 3043 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3044 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3045 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3046 3047 fmov.l &0x0,%fpcr # zero FPCR 3048 3049# 3050# copy, convert, and tag input argument 3051# 3052 fmov.s 0x8(%a6),%fp0 # load sgl input 3053 fmov.x %fp0,FP_SRC(%a6) 3054 lea FP_SRC(%a6),%a0 3055 bsr.l tag # fetch operand type 3056 mov.b %d0,STAG(%a6) 3057 mov.b %d0,%d1 3058 3059 andi.l &0x00ff00ff,USER_FPSR(%a6) 3060 3061 clr.l %d0 3062 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3063 3064 tst.b %d1 3065 bne.b _L14_2s 3066 bsr.l slog10 # operand is a NORM 3067 bra.b _L14_6s 3068_L14_2s: 3069 cmpi.b %d1,&ZERO # is operand a ZERO? 3070 bne.b _L14_3s # no 3071 bsr.l t_dz2 # yes 3072 bra.b _L14_6s 3073_L14_3s: 3074 cmpi.b %d1,&INF # is operand an INF? 3075 bne.b _L14_4s # no 3076 bsr.l sopr_inf # yes 3077 bra.b _L14_6s 3078_L14_4s: 3079 cmpi.b %d1,&QNAN # is operand a QNAN? 3080 bne.b _L14_5s # no 3081 bsr.l src_qnan # yes 3082 bra.b _L14_6s 3083_L14_5s: 3084 bsr.l slog10d # operand is a DENORM 3085_L14_6s: 3086 3087# 3088# Result is now in FP0 3089# 3090 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3091 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3092 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3093 unlk %a6 3094 rts 3095 3096 global _flog10d_ 3097_flog10d_: 3098 link %a6,&-LOCAL_SIZE 3099 3100 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3101 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3102 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3103 3104 fmov.l &0x0,%fpcr # zero FPCR 3105 3106# 3107# copy, convert, and tag input argument 3108# 3109 fmov.d 0x8(%a6),%fp0 # load dbl input 3110 fmov.x %fp0,FP_SRC(%a6) 3111 lea FP_SRC(%a6),%a0 3112 bsr.l tag # fetch operand type 3113 mov.b %d0,STAG(%a6) 3114 mov.b %d0,%d1 3115 3116 andi.l &0x00ff00ff,USER_FPSR(%a6) 3117 3118 clr.l %d0 3119 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3120 3121 mov.b %d1,STAG(%a6) 3122 tst.b %d1 3123 bne.b _L14_2d 3124 bsr.l slog10 # operand is a NORM 3125 bra.b _L14_6d 3126_L14_2d: 3127 cmpi.b %d1,&ZERO # is operand a ZERO? 3128 bne.b _L14_3d # no 3129 bsr.l t_dz2 # yes 3130 bra.b _L14_6d 3131_L14_3d: 3132 cmpi.b %d1,&INF # is operand an INF? 3133 bne.b _L14_4d # no 3134 bsr.l sopr_inf # yes 3135 bra.b _L14_6d 3136_L14_4d: 3137 cmpi.b %d1,&QNAN # is operand a QNAN? 3138 bne.b _L14_5d # no 3139 bsr.l src_qnan # yes 3140 bra.b _L14_6d 3141_L14_5d: 3142 bsr.l slog10d # operand is a DENORM 3143_L14_6d: 3144 3145# 3146# Result is now in FP0 3147# 3148 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3149 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3150 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3151 unlk %a6 3152 rts 3153 3154 global _flog10x_ 3155_flog10x_: 3156 link %a6,&-LOCAL_SIZE 3157 3158 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3159 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3160 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3161 3162 fmov.l &0x0,%fpcr # zero FPCR 3163 3164# 3165# copy, convert, and tag input argument 3166# 3167 lea FP_SRC(%a6),%a0 3168 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 3169 mov.l 0x8+0x4(%a6),0x4(%a0) 3170 mov.l 0x8+0x8(%a6),0x8(%a0) 3171 bsr.l tag # fetch operand type 3172 mov.b %d0,STAG(%a6) 3173 mov.b %d0,%d1 3174 3175 andi.l &0x00ff00ff,USER_FPSR(%a6) 3176 3177 clr.l %d0 3178 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3179 3180 tst.b %d1 3181 bne.b _L14_2x 3182 bsr.l slog10 # operand is a NORM 3183 bra.b _L14_6x 3184_L14_2x: 3185 cmpi.b %d1,&ZERO # is operand a ZERO? 3186 bne.b _L14_3x # no 3187 bsr.l t_dz2 # yes 3188 bra.b _L14_6x 3189_L14_3x: 3190 cmpi.b %d1,&INF # is operand an INF? 3191 bne.b _L14_4x # no 3192 bsr.l sopr_inf # yes 3193 bra.b _L14_6x 3194_L14_4x: 3195 cmpi.b %d1,&QNAN # is operand a QNAN? 3196 bne.b _L14_5x # no 3197 bsr.l src_qnan # yes 3198 bra.b _L14_6x 3199_L14_5x: 3200 bsr.l slog10d # operand is a DENORM 3201_L14_6x: 3202 3203# 3204# Result is now in FP0 3205# 3206 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3207 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3208 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3209 unlk %a6 3210 rts 3211 3212 3213######################################################################### 3214# MONADIC TEMPLATE # 3215######################################################################### 3216 global _flog2s_ 3217_flog2s_: 3218 link %a6,&-LOCAL_SIZE 3219 3220 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3221 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3222 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3223 3224 fmov.l &0x0,%fpcr # zero FPCR 3225 3226# 3227# copy, convert, and tag input argument 3228# 3229 fmov.s 0x8(%a6),%fp0 # load sgl input 3230 fmov.x %fp0,FP_SRC(%a6) 3231 lea FP_SRC(%a6),%a0 3232 bsr.l tag # fetch operand type 3233 mov.b %d0,STAG(%a6) 3234 mov.b %d0,%d1 3235 3236 andi.l &0x00ff00ff,USER_FPSR(%a6) 3237 3238 clr.l %d0 3239 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3240 3241 tst.b %d1 3242 bne.b _L15_2s 3243 bsr.l slog2 # operand is a NORM 3244 bra.b _L15_6s 3245_L15_2s: 3246 cmpi.b %d1,&ZERO # is operand a ZERO? 3247 bne.b _L15_3s # no 3248 bsr.l t_dz2 # yes 3249 bra.b _L15_6s 3250_L15_3s: 3251 cmpi.b %d1,&INF # is operand an INF? 3252 bne.b _L15_4s # no 3253 bsr.l sopr_inf # yes 3254 bra.b _L15_6s 3255_L15_4s: 3256 cmpi.b %d1,&QNAN # is operand a QNAN? 3257 bne.b _L15_5s # no 3258 bsr.l src_qnan # yes 3259 bra.b _L15_6s 3260_L15_5s: 3261 bsr.l slog2d # operand is a DENORM 3262_L15_6s: 3263 3264# 3265# Result is now in FP0 3266# 3267 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3268 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3269 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3270 unlk %a6 3271 rts 3272 3273 global _flog2d_ 3274_flog2d_: 3275 link %a6,&-LOCAL_SIZE 3276 3277 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3278 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3279 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3280 3281 fmov.l &0x0,%fpcr # zero FPCR 3282 3283# 3284# copy, convert, and tag input argument 3285# 3286 fmov.d 0x8(%a6),%fp0 # load dbl input 3287 fmov.x %fp0,FP_SRC(%a6) 3288 lea FP_SRC(%a6),%a0 3289 bsr.l tag # fetch operand type 3290 mov.b %d0,STAG(%a6) 3291 mov.b %d0,%d1 3292 3293 andi.l &0x00ff00ff,USER_FPSR(%a6) 3294 3295 clr.l %d0 3296 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3297 3298 mov.b %d1,STAG(%a6) 3299 tst.b %d1 3300 bne.b _L15_2d 3301 bsr.l slog2 # operand is a NORM 3302 bra.b _L15_6d 3303_L15_2d: 3304 cmpi.b %d1,&ZERO # is operand a ZERO? 3305 bne.b _L15_3d # no 3306 bsr.l t_dz2 # yes 3307 bra.b _L15_6d 3308_L15_3d: 3309 cmpi.b %d1,&INF # is operand an INF? 3310 bne.b _L15_4d # no 3311 bsr.l sopr_inf # yes 3312 bra.b _L15_6d 3313_L15_4d: 3314 cmpi.b %d1,&QNAN # is operand a QNAN? 3315 bne.b _L15_5d # no 3316 bsr.l src_qnan # yes 3317 bra.b _L15_6d 3318_L15_5d: 3319 bsr.l slog2d # operand is a DENORM 3320_L15_6d: 3321 3322# 3323# Result is now in FP0 3324# 3325 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3326 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3327 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3328 unlk %a6 3329 rts 3330 3331 global _flog2x_ 3332_flog2x_: 3333 link %a6,&-LOCAL_SIZE 3334 3335 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3336 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3337 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3338 3339 fmov.l &0x0,%fpcr # zero FPCR 3340 3341# 3342# copy, convert, and tag input argument 3343# 3344 lea FP_SRC(%a6),%a0 3345 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 3346 mov.l 0x8+0x4(%a6),0x4(%a0) 3347 mov.l 0x8+0x8(%a6),0x8(%a0) 3348 bsr.l tag # fetch operand type 3349 mov.b %d0,STAG(%a6) 3350 mov.b %d0,%d1 3351 3352 andi.l &0x00ff00ff,USER_FPSR(%a6) 3353 3354 clr.l %d0 3355 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3356 3357 tst.b %d1 3358 bne.b _L15_2x 3359 bsr.l slog2 # operand is a NORM 3360 bra.b _L15_6x 3361_L15_2x: 3362 cmpi.b %d1,&ZERO # is operand a ZERO? 3363 bne.b _L15_3x # no 3364 bsr.l t_dz2 # yes 3365 bra.b _L15_6x 3366_L15_3x: 3367 cmpi.b %d1,&INF # is operand an INF? 3368 bne.b _L15_4x # no 3369 bsr.l sopr_inf # yes 3370 bra.b _L15_6x 3371_L15_4x: 3372 cmpi.b %d1,&QNAN # is operand a QNAN? 3373 bne.b _L15_5x # no 3374 bsr.l src_qnan # yes 3375 bra.b _L15_6x 3376_L15_5x: 3377 bsr.l slog2d # operand is a DENORM 3378_L15_6x: 3379 3380# 3381# Result is now in FP0 3382# 3383 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3384 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3385 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3386 unlk %a6 3387 rts 3388 3389 3390######################################################################### 3391# MONADIC TEMPLATE # 3392######################################################################### 3393 global _fcoshs_ 3394_fcoshs_: 3395 link %a6,&-LOCAL_SIZE 3396 3397 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3398 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3399 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3400 3401 fmov.l &0x0,%fpcr # zero FPCR 3402 3403# 3404# copy, convert, and tag input argument 3405# 3406 fmov.s 0x8(%a6),%fp0 # load sgl input 3407 fmov.x %fp0,FP_SRC(%a6) 3408 lea FP_SRC(%a6),%a0 3409 bsr.l tag # fetch operand type 3410 mov.b %d0,STAG(%a6) 3411 mov.b %d0,%d1 3412 3413 andi.l &0x00ff00ff,USER_FPSR(%a6) 3414 3415 clr.l %d0 3416 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3417 3418 tst.b %d1 3419 bne.b _L16_2s 3420 bsr.l scosh # operand is a NORM 3421 bra.b _L16_6s 3422_L16_2s: 3423 cmpi.b %d1,&ZERO # is operand a ZERO? 3424 bne.b _L16_3s # no 3425 bsr.l ld_pone # yes 3426 bra.b _L16_6s 3427_L16_3s: 3428 cmpi.b %d1,&INF # is operand an INF? 3429 bne.b _L16_4s # no 3430 bsr.l ld_pinf # yes 3431 bra.b _L16_6s 3432_L16_4s: 3433 cmpi.b %d1,&QNAN # is operand a QNAN? 3434 bne.b _L16_5s # no 3435 bsr.l src_qnan # yes 3436 bra.b _L16_6s 3437_L16_5s: 3438 bsr.l scoshd # operand is a DENORM 3439_L16_6s: 3440 3441# 3442# Result is now in FP0 3443# 3444 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3445 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3446 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3447 unlk %a6 3448 rts 3449 3450 global _fcoshd_ 3451_fcoshd_: 3452 link %a6,&-LOCAL_SIZE 3453 3454 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3455 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3456 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3457 3458 fmov.l &0x0,%fpcr # zero FPCR 3459 3460# 3461# copy, convert, and tag input argument 3462# 3463 fmov.d 0x8(%a6),%fp0 # load dbl input 3464 fmov.x %fp0,FP_SRC(%a6) 3465 lea FP_SRC(%a6),%a0 3466 bsr.l tag # fetch operand type 3467 mov.b %d0,STAG(%a6) 3468 mov.b %d0,%d1 3469 3470 andi.l &0x00ff00ff,USER_FPSR(%a6) 3471 3472 clr.l %d0 3473 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3474 3475 mov.b %d1,STAG(%a6) 3476 tst.b %d1 3477 bne.b _L16_2d 3478 bsr.l scosh # operand is a NORM 3479 bra.b _L16_6d 3480_L16_2d: 3481 cmpi.b %d1,&ZERO # is operand a ZERO? 3482 bne.b _L16_3d # no 3483 bsr.l ld_pone # yes 3484 bra.b _L16_6d 3485_L16_3d: 3486 cmpi.b %d1,&INF # is operand an INF? 3487 bne.b _L16_4d # no 3488 bsr.l ld_pinf # yes 3489 bra.b _L16_6d 3490_L16_4d: 3491 cmpi.b %d1,&QNAN # is operand a QNAN? 3492 bne.b _L16_5d # no 3493 bsr.l src_qnan # yes 3494 bra.b _L16_6d 3495_L16_5d: 3496 bsr.l scoshd # operand is a DENORM 3497_L16_6d: 3498 3499# 3500# Result is now in FP0 3501# 3502 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3503 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3504 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3505 unlk %a6 3506 rts 3507 3508 global _fcoshx_ 3509_fcoshx_: 3510 link %a6,&-LOCAL_SIZE 3511 3512 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3513 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3514 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3515 3516 fmov.l &0x0,%fpcr # zero FPCR 3517 3518# 3519# copy, convert, and tag input argument 3520# 3521 lea FP_SRC(%a6),%a0 3522 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 3523 mov.l 0x8+0x4(%a6),0x4(%a0) 3524 mov.l 0x8+0x8(%a6),0x8(%a0) 3525 bsr.l tag # fetch operand type 3526 mov.b %d0,STAG(%a6) 3527 mov.b %d0,%d1 3528 3529 andi.l &0x00ff00ff,USER_FPSR(%a6) 3530 3531 clr.l %d0 3532 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3533 3534 tst.b %d1 3535 bne.b _L16_2x 3536 bsr.l scosh # operand is a NORM 3537 bra.b _L16_6x 3538_L16_2x: 3539 cmpi.b %d1,&ZERO # is operand a ZERO? 3540 bne.b _L16_3x # no 3541 bsr.l ld_pone # yes 3542 bra.b _L16_6x 3543_L16_3x: 3544 cmpi.b %d1,&INF # is operand an INF? 3545 bne.b _L16_4x # no 3546 bsr.l ld_pinf # yes 3547 bra.b _L16_6x 3548_L16_4x: 3549 cmpi.b %d1,&QNAN # is operand a QNAN? 3550 bne.b _L16_5x # no 3551 bsr.l src_qnan # yes 3552 bra.b _L16_6x 3553_L16_5x: 3554 bsr.l scoshd # operand is a DENORM 3555_L16_6x: 3556 3557# 3558# Result is now in FP0 3559# 3560 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3561 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3562 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3563 unlk %a6 3564 rts 3565 3566 3567######################################################################### 3568# MONADIC TEMPLATE # 3569######################################################################### 3570 global _facoss_ 3571_facoss_: 3572 link %a6,&-LOCAL_SIZE 3573 3574 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3575 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3576 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3577 3578 fmov.l &0x0,%fpcr # zero FPCR 3579 3580# 3581# copy, convert, and tag input argument 3582# 3583 fmov.s 0x8(%a6),%fp0 # load sgl input 3584 fmov.x %fp0,FP_SRC(%a6) 3585 lea FP_SRC(%a6),%a0 3586 bsr.l tag # fetch operand type 3587 mov.b %d0,STAG(%a6) 3588 mov.b %d0,%d1 3589 3590 andi.l &0x00ff00ff,USER_FPSR(%a6) 3591 3592 clr.l %d0 3593 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3594 3595 tst.b %d1 3596 bne.b _L17_2s 3597 bsr.l sacos # operand is a NORM 3598 bra.b _L17_6s 3599_L17_2s: 3600 cmpi.b %d1,&ZERO # is operand a ZERO? 3601 bne.b _L17_3s # no 3602 bsr.l ld_ppi2 # yes 3603 bra.b _L17_6s 3604_L17_3s: 3605 cmpi.b %d1,&INF # is operand an INF? 3606 bne.b _L17_4s # no 3607 bsr.l t_operr # yes 3608 bra.b _L17_6s 3609_L17_4s: 3610 cmpi.b %d1,&QNAN # is operand a QNAN? 3611 bne.b _L17_5s # no 3612 bsr.l src_qnan # yes 3613 bra.b _L17_6s 3614_L17_5s: 3615 bsr.l sacosd # operand is a DENORM 3616_L17_6s: 3617 3618# 3619# Result is now in FP0 3620# 3621 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3622 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3623 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3624 unlk %a6 3625 rts 3626 3627 global _facosd_ 3628_facosd_: 3629 link %a6,&-LOCAL_SIZE 3630 3631 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3632 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3633 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3634 3635 fmov.l &0x0,%fpcr # zero FPCR 3636 3637# 3638# copy, convert, and tag input argument 3639# 3640 fmov.d 0x8(%a6),%fp0 # load dbl input 3641 fmov.x %fp0,FP_SRC(%a6) 3642 lea FP_SRC(%a6),%a0 3643 bsr.l tag # fetch operand type 3644 mov.b %d0,STAG(%a6) 3645 mov.b %d0,%d1 3646 3647 andi.l &0x00ff00ff,USER_FPSR(%a6) 3648 3649 clr.l %d0 3650 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3651 3652 mov.b %d1,STAG(%a6) 3653 tst.b %d1 3654 bne.b _L17_2d 3655 bsr.l sacos # operand is a NORM 3656 bra.b _L17_6d 3657_L17_2d: 3658 cmpi.b %d1,&ZERO # is operand a ZERO? 3659 bne.b _L17_3d # no 3660 bsr.l ld_ppi2 # yes 3661 bra.b _L17_6d 3662_L17_3d: 3663 cmpi.b %d1,&INF # is operand an INF? 3664 bne.b _L17_4d # no 3665 bsr.l t_operr # yes 3666 bra.b _L17_6d 3667_L17_4d: 3668 cmpi.b %d1,&QNAN # is operand a QNAN? 3669 bne.b _L17_5d # no 3670 bsr.l src_qnan # yes 3671 bra.b _L17_6d 3672_L17_5d: 3673 bsr.l sacosd # operand is a DENORM 3674_L17_6d: 3675 3676# 3677# Result is now in FP0 3678# 3679 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3680 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3681 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3682 unlk %a6 3683 rts 3684 3685 global _facosx_ 3686_facosx_: 3687 link %a6,&-LOCAL_SIZE 3688 3689 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3690 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3691 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3692 3693 fmov.l &0x0,%fpcr # zero FPCR 3694 3695# 3696# copy, convert, and tag input argument 3697# 3698 lea FP_SRC(%a6),%a0 3699 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 3700 mov.l 0x8+0x4(%a6),0x4(%a0) 3701 mov.l 0x8+0x8(%a6),0x8(%a0) 3702 bsr.l tag # fetch operand type 3703 mov.b %d0,STAG(%a6) 3704 mov.b %d0,%d1 3705 3706 andi.l &0x00ff00ff,USER_FPSR(%a6) 3707 3708 clr.l %d0 3709 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3710 3711 tst.b %d1 3712 bne.b _L17_2x 3713 bsr.l sacos # operand is a NORM 3714 bra.b _L17_6x 3715_L17_2x: 3716 cmpi.b %d1,&ZERO # is operand a ZERO? 3717 bne.b _L17_3x # no 3718 bsr.l ld_ppi2 # yes 3719 bra.b _L17_6x 3720_L17_3x: 3721 cmpi.b %d1,&INF # is operand an INF? 3722 bne.b _L17_4x # no 3723 bsr.l t_operr # yes 3724 bra.b _L17_6x 3725_L17_4x: 3726 cmpi.b %d1,&QNAN # is operand a QNAN? 3727 bne.b _L17_5x # no 3728 bsr.l src_qnan # yes 3729 bra.b _L17_6x 3730_L17_5x: 3731 bsr.l sacosd # operand is a DENORM 3732_L17_6x: 3733 3734# 3735# Result is now in FP0 3736# 3737 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3738 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3739 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3740 unlk %a6 3741 rts 3742 3743 3744######################################################################### 3745# MONADIC TEMPLATE # 3746######################################################################### 3747 global _fgetexps_ 3748_fgetexps_: 3749 link %a6,&-LOCAL_SIZE 3750 3751 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3752 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3753 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3754 3755 fmov.l &0x0,%fpcr # zero FPCR 3756 3757# 3758# copy, convert, and tag input argument 3759# 3760 fmov.s 0x8(%a6),%fp0 # load sgl input 3761 fmov.x %fp0,FP_SRC(%a6) 3762 lea FP_SRC(%a6),%a0 3763 bsr.l tag # fetch operand type 3764 mov.b %d0,STAG(%a6) 3765 mov.b %d0,%d1 3766 3767 andi.l &0x00ff00ff,USER_FPSR(%a6) 3768 3769 clr.l %d0 3770 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3771 3772 tst.b %d1 3773 bne.b _L18_2s 3774 bsr.l sgetexp # operand is a NORM 3775 bra.b _L18_6s 3776_L18_2s: 3777 cmpi.b %d1,&ZERO # is operand a ZERO? 3778 bne.b _L18_3s # no 3779 bsr.l src_zero # yes 3780 bra.b _L18_6s 3781_L18_3s: 3782 cmpi.b %d1,&INF # is operand an INF? 3783 bne.b _L18_4s # no 3784 bsr.l t_operr # yes 3785 bra.b _L18_6s 3786_L18_4s: 3787 cmpi.b %d1,&QNAN # is operand a QNAN? 3788 bne.b _L18_5s # no 3789 bsr.l src_qnan # yes 3790 bra.b _L18_6s 3791_L18_5s: 3792 bsr.l sgetexpd # operand is a DENORM 3793_L18_6s: 3794 3795# 3796# Result is now in FP0 3797# 3798 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3799 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3800 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3801 unlk %a6 3802 rts 3803 3804 global _fgetexpd_ 3805_fgetexpd_: 3806 link %a6,&-LOCAL_SIZE 3807 3808 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3809 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3810 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3811 3812 fmov.l &0x0,%fpcr # zero FPCR 3813 3814# 3815# copy, convert, and tag input argument 3816# 3817 fmov.d 0x8(%a6),%fp0 # load dbl input 3818 fmov.x %fp0,FP_SRC(%a6) 3819 lea FP_SRC(%a6),%a0 3820 bsr.l tag # fetch operand type 3821 mov.b %d0,STAG(%a6) 3822 mov.b %d0,%d1 3823 3824 andi.l &0x00ff00ff,USER_FPSR(%a6) 3825 3826 clr.l %d0 3827 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3828 3829 mov.b %d1,STAG(%a6) 3830 tst.b %d1 3831 bne.b _L18_2d 3832 bsr.l sgetexp # operand is a NORM 3833 bra.b _L18_6d 3834_L18_2d: 3835 cmpi.b %d1,&ZERO # is operand a ZERO? 3836 bne.b _L18_3d # no 3837 bsr.l src_zero # yes 3838 bra.b _L18_6d 3839_L18_3d: 3840 cmpi.b %d1,&INF # is operand an INF? 3841 bne.b _L18_4d # no 3842 bsr.l t_operr # yes 3843 bra.b _L18_6d 3844_L18_4d: 3845 cmpi.b %d1,&QNAN # is operand a QNAN? 3846 bne.b _L18_5d # no 3847 bsr.l src_qnan # yes 3848 bra.b _L18_6d 3849_L18_5d: 3850 bsr.l sgetexpd # operand is a DENORM 3851_L18_6d: 3852 3853# 3854# Result is now in FP0 3855# 3856 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3857 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3858 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3859 unlk %a6 3860 rts 3861 3862 global _fgetexpx_ 3863_fgetexpx_: 3864 link %a6,&-LOCAL_SIZE 3865 3866 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3867 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3868 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3869 3870 fmov.l &0x0,%fpcr # zero FPCR 3871 3872# 3873# copy, convert, and tag input argument 3874# 3875 lea FP_SRC(%a6),%a0 3876 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 3877 mov.l 0x8+0x4(%a6),0x4(%a0) 3878 mov.l 0x8+0x8(%a6),0x8(%a0) 3879 bsr.l tag # fetch operand type 3880 mov.b %d0,STAG(%a6) 3881 mov.b %d0,%d1 3882 3883 andi.l &0x00ff00ff,USER_FPSR(%a6) 3884 3885 clr.l %d0 3886 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3887 3888 tst.b %d1 3889 bne.b _L18_2x 3890 bsr.l sgetexp # operand is a NORM 3891 bra.b _L18_6x 3892_L18_2x: 3893 cmpi.b %d1,&ZERO # is operand a ZERO? 3894 bne.b _L18_3x # no 3895 bsr.l src_zero # yes 3896 bra.b _L18_6x 3897_L18_3x: 3898 cmpi.b %d1,&INF # is operand an INF? 3899 bne.b _L18_4x # no 3900 bsr.l t_operr # yes 3901 bra.b _L18_6x 3902_L18_4x: 3903 cmpi.b %d1,&QNAN # is operand a QNAN? 3904 bne.b _L18_5x # no 3905 bsr.l src_qnan # yes 3906 bra.b _L18_6x 3907_L18_5x: 3908 bsr.l sgetexpd # operand is a DENORM 3909_L18_6x: 3910 3911# 3912# Result is now in FP0 3913# 3914 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3915 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3916 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3917 unlk %a6 3918 rts 3919 3920 3921######################################################################### 3922# MONADIC TEMPLATE # 3923######################################################################### 3924 global _fgetmans_ 3925_fgetmans_: 3926 link %a6,&-LOCAL_SIZE 3927 3928 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3929 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3930 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3931 3932 fmov.l &0x0,%fpcr # zero FPCR 3933 3934# 3935# copy, convert, and tag input argument 3936# 3937 fmov.s 0x8(%a6),%fp0 # load sgl input 3938 fmov.x %fp0,FP_SRC(%a6) 3939 lea FP_SRC(%a6),%a0 3940 bsr.l tag # fetch operand type 3941 mov.b %d0,STAG(%a6) 3942 mov.b %d0,%d1 3943 3944 andi.l &0x00ff00ff,USER_FPSR(%a6) 3945 3946 clr.l %d0 3947 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 3948 3949 tst.b %d1 3950 bne.b _L19_2s 3951 bsr.l sgetman # operand is a NORM 3952 bra.b _L19_6s 3953_L19_2s: 3954 cmpi.b %d1,&ZERO # is operand a ZERO? 3955 bne.b _L19_3s # no 3956 bsr.l src_zero # yes 3957 bra.b _L19_6s 3958_L19_3s: 3959 cmpi.b %d1,&INF # is operand an INF? 3960 bne.b _L19_4s # no 3961 bsr.l t_operr # yes 3962 bra.b _L19_6s 3963_L19_4s: 3964 cmpi.b %d1,&QNAN # is operand a QNAN? 3965 bne.b _L19_5s # no 3966 bsr.l src_qnan # yes 3967 bra.b _L19_6s 3968_L19_5s: 3969 bsr.l sgetmand # operand is a DENORM 3970_L19_6s: 3971 3972# 3973# Result is now in FP0 3974# 3975 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3976 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 3977 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 3978 unlk %a6 3979 rts 3980 3981 global _fgetmand_ 3982_fgetmand_: 3983 link %a6,&-LOCAL_SIZE 3984 3985 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3986 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 3987 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 3988 3989 fmov.l &0x0,%fpcr # zero FPCR 3990 3991# 3992# copy, convert, and tag input argument 3993# 3994 fmov.d 0x8(%a6),%fp0 # load dbl input 3995 fmov.x %fp0,FP_SRC(%a6) 3996 lea FP_SRC(%a6),%a0 3997 bsr.l tag # fetch operand type 3998 mov.b %d0,STAG(%a6) 3999 mov.b %d0,%d1 4000 4001 andi.l &0x00ff00ff,USER_FPSR(%a6) 4002 4003 clr.l %d0 4004 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4005 4006 mov.b %d1,STAG(%a6) 4007 tst.b %d1 4008 bne.b _L19_2d 4009 bsr.l sgetman # operand is a NORM 4010 bra.b _L19_6d 4011_L19_2d: 4012 cmpi.b %d1,&ZERO # is operand a ZERO? 4013 bne.b _L19_3d # no 4014 bsr.l src_zero # yes 4015 bra.b _L19_6d 4016_L19_3d: 4017 cmpi.b %d1,&INF # is operand an INF? 4018 bne.b _L19_4d # no 4019 bsr.l t_operr # yes 4020 bra.b _L19_6d 4021_L19_4d: 4022 cmpi.b %d1,&QNAN # is operand a QNAN? 4023 bne.b _L19_5d # no 4024 bsr.l src_qnan # yes 4025 bra.b _L19_6d 4026_L19_5d: 4027 bsr.l sgetmand # operand is a DENORM 4028_L19_6d: 4029 4030# 4031# Result is now in FP0 4032# 4033 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4034 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4035 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4036 unlk %a6 4037 rts 4038 4039 global _fgetmanx_ 4040_fgetmanx_: 4041 link %a6,&-LOCAL_SIZE 4042 4043 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4044 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4045 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4046 4047 fmov.l &0x0,%fpcr # zero FPCR 4048 4049# 4050# copy, convert, and tag input argument 4051# 4052 lea FP_SRC(%a6),%a0 4053 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 4054 mov.l 0x8+0x4(%a6),0x4(%a0) 4055 mov.l 0x8+0x8(%a6),0x8(%a0) 4056 bsr.l tag # fetch operand type 4057 mov.b %d0,STAG(%a6) 4058 mov.b %d0,%d1 4059 4060 andi.l &0x00ff00ff,USER_FPSR(%a6) 4061 4062 clr.l %d0 4063 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4064 4065 tst.b %d1 4066 bne.b _L19_2x 4067 bsr.l sgetman # operand is a NORM 4068 bra.b _L19_6x 4069_L19_2x: 4070 cmpi.b %d1,&ZERO # is operand a ZERO? 4071 bne.b _L19_3x # no 4072 bsr.l src_zero # yes 4073 bra.b _L19_6x 4074_L19_3x: 4075 cmpi.b %d1,&INF # is operand an INF? 4076 bne.b _L19_4x # no 4077 bsr.l t_operr # yes 4078 bra.b _L19_6x 4079_L19_4x: 4080 cmpi.b %d1,&QNAN # is operand a QNAN? 4081 bne.b _L19_5x # no 4082 bsr.l src_qnan # yes 4083 bra.b _L19_6x 4084_L19_5x: 4085 bsr.l sgetmand # operand is a DENORM 4086_L19_6x: 4087 4088# 4089# Result is now in FP0 4090# 4091 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4092 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4093 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4094 unlk %a6 4095 rts 4096 4097 4098######################################################################### 4099# MONADIC TEMPLATE # 4100######################################################################### 4101 global _fsincoss_ 4102_fsincoss_: 4103 link %a6,&-LOCAL_SIZE 4104 4105 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4106 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4107 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4108 4109 fmov.l &0x0,%fpcr # zero FPCR 4110 4111# 4112# copy, convert, and tag input argument 4113# 4114 fmov.s 0x8(%a6),%fp0 # load sgl input 4115 fmov.x %fp0,FP_SRC(%a6) 4116 lea FP_SRC(%a6),%a0 4117 bsr.l tag # fetch operand type 4118 mov.b %d0,STAG(%a6) 4119 mov.b %d0,%d1 4120 4121 andi.l &0x00ff00ff,USER_FPSR(%a6) 4122 4123 clr.l %d0 4124 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4125 4126 tst.b %d1 4127 bne.b _L20_2s 4128 bsr.l ssincos # operand is a NORM 4129 bra.b _L20_6s 4130_L20_2s: 4131 cmpi.b %d1,&ZERO # is operand a ZERO? 4132 bne.b _L20_3s # no 4133 bsr.l ssincosz # yes 4134 bra.b _L20_6s 4135_L20_3s: 4136 cmpi.b %d1,&INF # is operand an INF? 4137 bne.b _L20_4s # no 4138 bsr.l ssincosi # yes 4139 bra.b _L20_6s 4140_L20_4s: 4141 cmpi.b %d1,&QNAN # is operand a QNAN? 4142 bne.b _L20_5s # no 4143 bsr.l ssincosqnan # yes 4144 bra.b _L20_6s 4145_L20_5s: 4146 bsr.l ssincosd # operand is a DENORM 4147_L20_6s: 4148 4149# 4150# Result is now in FP0 4151# 4152 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4153 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4154 fmovm.x &0x03,-(%sp) # store off fp0/fp1 4155 fmovm.x (%sp)+,&0x40 # fp0 now in fp1 4156 fmovm.x (%sp)+,&0x80 # fp1 now in fp0 4157 unlk %a6 4158 rts 4159 4160 global _fsincosd_ 4161_fsincosd_: 4162 link %a6,&-LOCAL_SIZE 4163 4164 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4165 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4166 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4167 4168 fmov.l &0x0,%fpcr # zero FPCR 4169 4170# 4171# copy, convert, and tag input argument 4172# 4173 fmov.d 0x8(%a6),%fp0 # load dbl input 4174 fmov.x %fp0,FP_SRC(%a6) 4175 lea FP_SRC(%a6),%a0 4176 bsr.l tag # fetch operand type 4177 mov.b %d0,STAG(%a6) 4178 mov.b %d0,%d1 4179 4180 andi.l &0x00ff00ff,USER_FPSR(%a6) 4181 4182 clr.l %d0 4183 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4184 4185 mov.b %d1,STAG(%a6) 4186 tst.b %d1 4187 bne.b _L20_2d 4188 bsr.l ssincos # operand is a NORM 4189 bra.b _L20_6d 4190_L20_2d: 4191 cmpi.b %d1,&ZERO # is operand a ZERO? 4192 bne.b _L20_3d # no 4193 bsr.l ssincosz # yes 4194 bra.b _L20_6d 4195_L20_3d: 4196 cmpi.b %d1,&INF # is operand an INF? 4197 bne.b _L20_4d # no 4198 bsr.l ssincosi # yes 4199 bra.b _L20_6d 4200_L20_4d: 4201 cmpi.b %d1,&QNAN # is operand a QNAN? 4202 bne.b _L20_5d # no 4203 bsr.l ssincosqnan # yes 4204 bra.b _L20_6d 4205_L20_5d: 4206 bsr.l ssincosd # operand is a DENORM 4207_L20_6d: 4208 4209# 4210# Result is now in FP0 4211# 4212 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4213 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4214 fmovm.x &0x03,-(%sp) # store off fp0/fp1 4215 fmovm.x (%sp)+,&0x40 # fp0 now in fp1 4216 fmovm.x (%sp)+,&0x80 # fp1 now in fp0 4217 unlk %a6 4218 rts 4219 4220 global _fsincosx_ 4221_fsincosx_: 4222 link %a6,&-LOCAL_SIZE 4223 4224 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4225 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4226 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4227 4228 fmov.l &0x0,%fpcr # zero FPCR 4229 4230# 4231# copy, convert, and tag input argument 4232# 4233 lea FP_SRC(%a6),%a0 4234 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input 4235 mov.l 0x8+0x4(%a6),0x4(%a0) 4236 mov.l 0x8+0x8(%a6),0x8(%a0) 4237 bsr.l tag # fetch operand type 4238 mov.b %d0,STAG(%a6) 4239 mov.b %d0,%d1 4240 4241 andi.l &0x00ff00ff,USER_FPSR(%a6) 4242 4243 clr.l %d0 4244 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4245 4246 tst.b %d1 4247 bne.b _L20_2x 4248 bsr.l ssincos # operand is a NORM 4249 bra.b _L20_6x 4250_L20_2x: 4251 cmpi.b %d1,&ZERO # is operand a ZERO? 4252 bne.b _L20_3x # no 4253 bsr.l ssincosz # yes 4254 bra.b _L20_6x 4255_L20_3x: 4256 cmpi.b %d1,&INF # is operand an INF? 4257 bne.b _L20_4x # no 4258 bsr.l ssincosi # yes 4259 bra.b _L20_6x 4260_L20_4x: 4261 cmpi.b %d1,&QNAN # is operand a QNAN? 4262 bne.b _L20_5x # no 4263 bsr.l ssincosqnan # yes 4264 bra.b _L20_6x 4265_L20_5x: 4266 bsr.l ssincosd # operand is a DENORM 4267_L20_6x: 4268 4269# 4270# Result is now in FP0 4271# 4272 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4273 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4274 fmovm.x &0x03,-(%sp) # store off fp0/fp1 4275 fmovm.x (%sp)+,&0x40 # fp0 now in fp1 4276 fmovm.x (%sp)+,&0x80 # fp1 now in fp0 4277 unlk %a6 4278 rts 4279 4280 4281######################################################################### 4282# DYADIC TEMPLATE # 4283######################################################################### 4284 global _frems_ 4285_frems_: 4286 link %a6,&-LOCAL_SIZE 4287 4288 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4289 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4290 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4291 4292 fmov.l &0x0,%fpcr # zero FPCR 4293 4294# 4295# copy, convert, and tag input argument 4296# 4297 fmov.s 0x8(%a6),%fp0 # load sgl dst 4298 fmov.x %fp0,FP_DST(%a6) 4299 lea FP_DST(%a6),%a0 4300 bsr.l tag # fetch operand type 4301 mov.b %d0,DTAG(%a6) 4302 4303 fmov.s 0xc(%a6),%fp0 # load sgl src 4304 fmov.x %fp0,FP_SRC(%a6) 4305 lea FP_SRC(%a6),%a0 4306 bsr.l tag # fetch operand type 4307 mov.b %d0,STAG(%a6) 4308 mov.l %d0,%d1 4309 4310 andi.l &0x00ff00ff,USER_FPSR(%a6) 4311 4312 clr.l %d0 4313 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4314 4315 lea FP_SRC(%a6),%a0 # pass ptr to src 4316 lea FP_DST(%a6),%a1 # pass ptr to dst 4317 4318 tst.b %d1 4319 bne.b _L21_2s 4320 bsr.l srem_snorm # operand is a NORM 4321 bra.b _L21_6s 4322_L21_2s: 4323 cmpi.b %d1,&ZERO # is operand a ZERO? 4324 bne.b _L21_3s # no 4325 bsr.l srem_szero # yes 4326 bra.b _L21_6s 4327_L21_3s: 4328 cmpi.b %d1,&INF # is operand an INF? 4329 bne.b _L21_4s # no 4330 bsr.l srem_sinf # yes 4331 bra.b _L21_6s 4332_L21_4s: 4333 cmpi.b %d1,&QNAN # is operand a QNAN? 4334 bne.b _L21_5s # no 4335 bsr.l sop_sqnan # yes 4336 bra.b _L21_6s 4337_L21_5s: 4338 bsr.l srem_sdnrm # operand is a DENORM 4339_L21_6s: 4340 4341# 4342# Result is now in FP0 4343# 4344 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4345 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4346 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4347 unlk %a6 4348 rts 4349 4350 global _fremd_ 4351_fremd_: 4352 link %a6,&-LOCAL_SIZE 4353 4354 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4355 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4356 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4357 4358 fmov.l &0x0,%fpcr # zero FPCR 4359 4360# 4361# copy, convert, and tag input argument 4362# 4363 fmov.d 0x8(%a6),%fp0 # load dbl dst 4364 fmov.x %fp0,FP_DST(%a6) 4365 lea FP_DST(%a6),%a0 4366 bsr.l tag # fetch operand type 4367 mov.b %d0,DTAG(%a6) 4368 4369 fmov.d 0x10(%a6),%fp0 # load dbl src 4370 fmov.x %fp0,FP_SRC(%a6) 4371 lea FP_SRC(%a6),%a0 4372 bsr.l tag # fetch operand type 4373 mov.b %d0,STAG(%a6) 4374 mov.l %d0,%d1 4375 4376 andi.l &0x00ff00ff,USER_FPSR(%a6) 4377 4378 clr.l %d0 4379 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4380 4381 lea FP_SRC(%a6),%a0 # pass ptr to src 4382 lea FP_DST(%a6),%a1 # pass ptr to dst 4383 4384 tst.b %d1 4385 bne.b _L21_2d 4386 bsr.l srem_snorm # operand is a NORM 4387 bra.b _L21_6d 4388_L21_2d: 4389 cmpi.b %d1,&ZERO # is operand a ZERO? 4390 bne.b _L21_3d # no 4391 bsr.l srem_szero # yes 4392 bra.b _L21_6d 4393_L21_3d: 4394 cmpi.b %d1,&INF # is operand an INF? 4395 bne.b _L21_4d # no 4396 bsr.l srem_sinf # yes 4397 bra.b _L21_6d 4398_L21_4d: 4399 cmpi.b %d1,&QNAN # is operand a QNAN? 4400 bne.b _L21_5d # no 4401 bsr.l sop_sqnan # yes 4402 bra.b _L21_6d 4403_L21_5d: 4404 bsr.l srem_sdnrm # operand is a DENORM 4405_L21_6d: 4406 4407# 4408# Result is now in FP0 4409# 4410 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4411 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4412 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4413 unlk %a6 4414 rts 4415 4416 global _fremx_ 4417_fremx_: 4418 link %a6,&-LOCAL_SIZE 4419 4420 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4421 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4422 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4423 4424 fmov.l &0x0,%fpcr # zero FPCR 4425 4426# 4427# copy, convert, and tag input argument 4428# 4429 lea FP_DST(%a6),%a0 4430 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst 4431 mov.l 0x8+0x4(%a6),0x4(%a0) 4432 mov.l 0x8+0x8(%a6),0x8(%a0) 4433 bsr.l tag # fetch operand type 4434 mov.b %d0,DTAG(%a6) 4435 4436 lea FP_SRC(%a6),%a0 4437 mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src 4438 mov.l 0x14+0x4(%a6),0x4(%a0) 4439 mov.l 0x14+0x8(%a6),0x8(%a0) 4440 bsr.l tag # fetch operand type 4441 mov.b %d0,STAG(%a6) 4442 mov.l %d0,%d1 4443 4444 andi.l &0x00ff00ff,USER_FPSR(%a6) 4445 4446 clr.l %d0 4447 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4448 4449 lea FP_SRC(%a6),%a0 # pass ptr to src 4450 lea FP_DST(%a6),%a1 # pass ptr to dst 4451 4452 tst.b %d1 4453 bne.b _L21_2x 4454 bsr.l srem_snorm # operand is a NORM 4455 bra.b _L21_6x 4456_L21_2x: 4457 cmpi.b %d1,&ZERO # is operand a ZERO? 4458 bne.b _L21_3x # no 4459 bsr.l srem_szero # yes 4460 bra.b _L21_6x 4461_L21_3x: 4462 cmpi.b %d1,&INF # is operand an INF? 4463 bne.b _L21_4x # no 4464 bsr.l srem_sinf # yes 4465 bra.b _L21_6x 4466_L21_4x: 4467 cmpi.b %d1,&QNAN # is operand a QNAN? 4468 bne.b _L21_5x # no 4469 bsr.l sop_sqnan # yes 4470 bra.b _L21_6x 4471_L21_5x: 4472 bsr.l srem_sdnrm # operand is a DENORM 4473_L21_6x: 4474 4475# 4476# Result is now in FP0 4477# 4478 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4479 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4480 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4481 unlk %a6 4482 rts 4483 4484 4485######################################################################### 4486# DYADIC TEMPLATE # 4487######################################################################### 4488 global _fmods_ 4489_fmods_: 4490 link %a6,&-LOCAL_SIZE 4491 4492 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4493 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4494 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4495 4496 fmov.l &0x0,%fpcr # zero FPCR 4497 4498# 4499# copy, convert, and tag input argument 4500# 4501 fmov.s 0x8(%a6),%fp0 # load sgl dst 4502 fmov.x %fp0,FP_DST(%a6) 4503 lea FP_DST(%a6),%a0 4504 bsr.l tag # fetch operand type 4505 mov.b %d0,DTAG(%a6) 4506 4507 fmov.s 0xc(%a6),%fp0 # load sgl src 4508 fmov.x %fp0,FP_SRC(%a6) 4509 lea FP_SRC(%a6),%a0 4510 bsr.l tag # fetch operand type 4511 mov.b %d0,STAG(%a6) 4512 mov.l %d0,%d1 4513 4514 andi.l &0x00ff00ff,USER_FPSR(%a6) 4515 4516 clr.l %d0 4517 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4518 4519 lea FP_SRC(%a6),%a0 # pass ptr to src 4520 lea FP_DST(%a6),%a1 # pass ptr to dst 4521 4522 tst.b %d1 4523 bne.b _L22_2s 4524 bsr.l smod_snorm # operand is a NORM 4525 bra.b _L22_6s 4526_L22_2s: 4527 cmpi.b %d1,&ZERO # is operand a ZERO? 4528 bne.b _L22_3s # no 4529 bsr.l smod_szero # yes 4530 bra.b _L22_6s 4531_L22_3s: 4532 cmpi.b %d1,&INF # is operand an INF? 4533 bne.b _L22_4s # no 4534 bsr.l smod_sinf # yes 4535 bra.b _L22_6s 4536_L22_4s: 4537 cmpi.b %d1,&QNAN # is operand a QNAN? 4538 bne.b _L22_5s # no 4539 bsr.l sop_sqnan # yes 4540 bra.b _L22_6s 4541_L22_5s: 4542 bsr.l smod_sdnrm # operand is a DENORM 4543_L22_6s: 4544 4545# 4546# Result is now in FP0 4547# 4548 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4549 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4550 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4551 unlk %a6 4552 rts 4553 4554 global _fmodd_ 4555_fmodd_: 4556 link %a6,&-LOCAL_SIZE 4557 4558 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4559 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4560 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4561 4562 fmov.l &0x0,%fpcr # zero FPCR 4563 4564# 4565# copy, convert, and tag input argument 4566# 4567 fmov.d 0x8(%a6),%fp0 # load dbl dst 4568 fmov.x %fp0,FP_DST(%a6) 4569 lea FP_DST(%a6),%a0 4570 bsr.l tag # fetch operand type 4571 mov.b %d0,DTAG(%a6) 4572 4573 fmov.d 0x10(%a6),%fp0 # load dbl src 4574 fmov.x %fp0,FP_SRC(%a6) 4575 lea FP_SRC(%a6),%a0 4576 bsr.l tag # fetch operand type 4577 mov.b %d0,STAG(%a6) 4578 mov.l %d0,%d1 4579 4580 andi.l &0x00ff00ff,USER_FPSR(%a6) 4581 4582 clr.l %d0 4583 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4584 4585 lea FP_SRC(%a6),%a0 # pass ptr to src 4586 lea FP_DST(%a6),%a1 # pass ptr to dst 4587 4588 tst.b %d1 4589 bne.b _L22_2d 4590 bsr.l smod_snorm # operand is a NORM 4591 bra.b _L22_6d 4592_L22_2d: 4593 cmpi.b %d1,&ZERO # is operand a ZERO? 4594 bne.b _L22_3d # no 4595 bsr.l smod_szero # yes 4596 bra.b _L22_6d 4597_L22_3d: 4598 cmpi.b %d1,&INF # is operand an INF? 4599 bne.b _L22_4d # no 4600 bsr.l smod_sinf # yes 4601 bra.b _L22_6d 4602_L22_4d: 4603 cmpi.b %d1,&QNAN # is operand a QNAN? 4604 bne.b _L22_5d # no 4605 bsr.l sop_sqnan # yes 4606 bra.b _L22_6d 4607_L22_5d: 4608 bsr.l smod_sdnrm # operand is a DENORM 4609_L22_6d: 4610 4611# 4612# Result is now in FP0 4613# 4614 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4615 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4616 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4617 unlk %a6 4618 rts 4619 4620 global _fmodx_ 4621_fmodx_: 4622 link %a6,&-LOCAL_SIZE 4623 4624 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4625 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4626 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4627 4628 fmov.l &0x0,%fpcr # zero FPCR 4629 4630# 4631# copy, convert, and tag input argument 4632# 4633 lea FP_DST(%a6),%a0 4634 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst 4635 mov.l 0x8+0x4(%a6),0x4(%a0) 4636 mov.l 0x8+0x8(%a6),0x8(%a0) 4637 bsr.l tag # fetch operand type 4638 mov.b %d0,DTAG(%a6) 4639 4640 lea FP_SRC(%a6),%a0 4641 mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src 4642 mov.l 0x14+0x4(%a6),0x4(%a0) 4643 mov.l 0x14+0x8(%a6),0x8(%a0) 4644 bsr.l tag # fetch operand type 4645 mov.b %d0,STAG(%a6) 4646 mov.l %d0,%d1 4647 4648 andi.l &0x00ff00ff,USER_FPSR(%a6) 4649 4650 clr.l %d0 4651 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4652 4653 lea FP_SRC(%a6),%a0 # pass ptr to src 4654 lea FP_DST(%a6),%a1 # pass ptr to dst 4655 4656 tst.b %d1 4657 bne.b _L22_2x 4658 bsr.l smod_snorm # operand is a NORM 4659 bra.b _L22_6x 4660_L22_2x: 4661 cmpi.b %d1,&ZERO # is operand a ZERO? 4662 bne.b _L22_3x # no 4663 bsr.l smod_szero # yes 4664 bra.b _L22_6x 4665_L22_3x: 4666 cmpi.b %d1,&INF # is operand an INF? 4667 bne.b _L22_4x # no 4668 bsr.l smod_sinf # yes 4669 bra.b _L22_6x 4670_L22_4x: 4671 cmpi.b %d1,&QNAN # is operand a QNAN? 4672 bne.b _L22_5x # no 4673 bsr.l sop_sqnan # yes 4674 bra.b _L22_6x 4675_L22_5x: 4676 bsr.l smod_sdnrm # operand is a DENORM 4677_L22_6x: 4678 4679# 4680# Result is now in FP0 4681# 4682 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4683 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4684 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4685 unlk %a6 4686 rts 4687 4688 4689######################################################################### 4690# DYADIC TEMPLATE # 4691######################################################################### 4692 global _fscales_ 4693_fscales_: 4694 link %a6,&-LOCAL_SIZE 4695 4696 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4697 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4698 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4699 4700 fmov.l &0x0,%fpcr # zero FPCR 4701 4702# 4703# copy, convert, and tag input argument 4704# 4705 fmov.s 0x8(%a6),%fp0 # load sgl dst 4706 fmov.x %fp0,FP_DST(%a6) 4707 lea FP_DST(%a6),%a0 4708 bsr.l tag # fetch operand type 4709 mov.b %d0,DTAG(%a6) 4710 4711 fmov.s 0xc(%a6),%fp0 # load sgl src 4712 fmov.x %fp0,FP_SRC(%a6) 4713 lea FP_SRC(%a6),%a0 4714 bsr.l tag # fetch operand type 4715 mov.b %d0,STAG(%a6) 4716 mov.l %d0,%d1 4717 4718 andi.l &0x00ff00ff,USER_FPSR(%a6) 4719 4720 clr.l %d0 4721 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4722 4723 lea FP_SRC(%a6),%a0 # pass ptr to src 4724 lea FP_DST(%a6),%a1 # pass ptr to dst 4725 4726 tst.b %d1 4727 bne.b _L23_2s 4728 bsr.l sscale_snorm # operand is a NORM 4729 bra.b _L23_6s 4730_L23_2s: 4731 cmpi.b %d1,&ZERO # is operand a ZERO? 4732 bne.b _L23_3s # no 4733 bsr.l sscale_szero # yes 4734 bra.b _L23_6s 4735_L23_3s: 4736 cmpi.b %d1,&INF # is operand an INF? 4737 bne.b _L23_4s # no 4738 bsr.l sscale_sinf # yes 4739 bra.b _L23_6s 4740_L23_4s: 4741 cmpi.b %d1,&QNAN # is operand a QNAN? 4742 bne.b _L23_5s # no 4743 bsr.l sop_sqnan # yes 4744 bra.b _L23_6s 4745_L23_5s: 4746 bsr.l sscale_sdnrm # operand is a DENORM 4747_L23_6s: 4748 4749# 4750# Result is now in FP0 4751# 4752 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4753 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4754 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4755 unlk %a6 4756 rts 4757 4758 global _fscaled_ 4759_fscaled_: 4760 link %a6,&-LOCAL_SIZE 4761 4762 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4763 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4764 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4765 4766 fmov.l &0x0,%fpcr # zero FPCR 4767 4768# 4769# copy, convert, and tag input argument 4770# 4771 fmov.d 0x8(%a6),%fp0 # load dbl dst 4772 fmov.x %fp0,FP_DST(%a6) 4773 lea FP_DST(%a6),%a0 4774 bsr.l tag # fetch operand type 4775 mov.b %d0,DTAG(%a6) 4776 4777 fmov.d 0x10(%a6),%fp0 # load dbl src 4778 fmov.x %fp0,FP_SRC(%a6) 4779 lea FP_SRC(%a6),%a0 4780 bsr.l tag # fetch operand type 4781 mov.b %d0,STAG(%a6) 4782 mov.l %d0,%d1 4783 4784 andi.l &0x00ff00ff,USER_FPSR(%a6) 4785 4786 clr.l %d0 4787 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4788 4789 lea FP_SRC(%a6),%a0 # pass ptr to src 4790 lea FP_DST(%a6),%a1 # pass ptr to dst 4791 4792 tst.b %d1 4793 bne.b _L23_2d 4794 bsr.l sscale_snorm # operand is a NORM 4795 bra.b _L23_6d 4796_L23_2d: 4797 cmpi.b %d1,&ZERO # is operand a ZERO? 4798 bne.b _L23_3d # no 4799 bsr.l sscale_szero # yes 4800 bra.b _L23_6d 4801_L23_3d: 4802 cmpi.b %d1,&INF # is operand an INF? 4803 bne.b _L23_4d # no 4804 bsr.l sscale_sinf # yes 4805 bra.b _L23_6d 4806_L23_4d: 4807 cmpi.b %d1,&QNAN # is operand a QNAN? 4808 bne.b _L23_5d # no 4809 bsr.l sop_sqnan # yes 4810 bra.b _L23_6d 4811_L23_5d: 4812 bsr.l sscale_sdnrm # operand is a DENORM 4813_L23_6d: 4814 4815# 4816# Result is now in FP0 4817# 4818 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4819 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4820 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4821 unlk %a6 4822 rts 4823 4824 global _fscalex_ 4825_fscalex_: 4826 link %a6,&-LOCAL_SIZE 4827 4828 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4829 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs 4830 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 4831 4832 fmov.l &0x0,%fpcr # zero FPCR 4833 4834# 4835# copy, convert, and tag input argument 4836# 4837 lea FP_DST(%a6),%a0 4838 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst 4839 mov.l 0x8+0x4(%a6),0x4(%a0) 4840 mov.l 0x8+0x8(%a6),0x8(%a0) 4841 bsr.l tag # fetch operand type 4842 mov.b %d0,DTAG(%a6) 4843 4844 lea FP_SRC(%a6),%a0 4845 mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src 4846 mov.l 0x14+0x4(%a6),0x4(%a0) 4847 mov.l 0x14+0x8(%a6),0x8(%a0) 4848 bsr.l tag # fetch operand type 4849 mov.b %d0,STAG(%a6) 4850 mov.l %d0,%d1 4851 4852 andi.l &0x00ff00ff,USER_FPSR(%a6) 4853 4854 clr.l %d0 4855 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec 4856 4857 lea FP_SRC(%a6),%a0 # pass ptr to src 4858 lea FP_DST(%a6),%a1 # pass ptr to dst 4859 4860 tst.b %d1 4861 bne.b _L23_2x 4862 bsr.l sscale_snorm # operand is a NORM 4863 bra.b _L23_6x 4864_L23_2x: 4865 cmpi.b %d1,&ZERO # is operand a ZERO? 4866 bne.b _L23_3x # no 4867 bsr.l sscale_szero # yes 4868 bra.b _L23_6x 4869_L23_3x: 4870 cmpi.b %d1,&INF # is operand an INF? 4871 bne.b _L23_4x # no 4872 bsr.l sscale_sinf # yes 4873 bra.b _L23_6x 4874_L23_4x: 4875 cmpi.b %d1,&QNAN # is operand a QNAN? 4876 bne.b _L23_5x # no 4877 bsr.l sop_sqnan # yes 4878 bra.b _L23_6x 4879_L23_5x: 4880 bsr.l sscale_sdnrm # operand is a DENORM 4881_L23_6x: 4882 4883# 4884# Result is now in FP0 4885# 4886 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4887 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs 4888 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 4889 unlk %a6 4890 rts 4891 4892 4893######################################################################### 4894# ssin(): computes the sine of a normalized input # 4895# ssind(): computes the sine of a denormalized input # 4896# scos(): computes the cosine of a normalized input # 4897# scosd(): computes the cosine of a denormalized input # 4898# ssincos(): computes the sine and cosine of a normalized input # 4899# ssincosd(): computes the sine and cosine of a denormalized input # 4900# # 4901# INPUT *************************************************************** # 4902# a0 = pointer to extended precision input # 4903# d0 = round precision,mode # 4904# # 4905# OUTPUT ************************************************************** # 4906# fp0 = sin(X) or cos(X) # 4907# # 4908# For ssincos(X): # 4909# fp0 = sin(X) # 4910# fp1 = cos(X) # 4911# # 4912# ACCURACY and MONOTONICITY ******************************************* # 4913# The returned result is within 1 ulp in 64 significant bit, i.e. # 4914# within 0.5001 ulp to 53 bits if the result is subsequently # 4915# rounded to double precision. The result is provably monotonic # 4916# in double precision. # 4917# # 4918# ALGORITHM *********************************************************** # 4919# # 4920# SIN and COS: # 4921# 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. # 4922# # 4923# 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. # 4924# # 4925# 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # 4926# k = N mod 4, so in particular, k = 0,1,2,or 3. # 4927# Overwrite k by k := k + AdjN. # 4928# # 4929# 4. If k is even, go to 6. # 4930# # 4931# 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. # 4932# Return sgn*cos(r) where cos(r) is approximated by an # 4933# even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), # 4934# s = r*r. # 4935# Exit. # 4936# # 4937# 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) # 4938# where sin(r) is approximated by an odd polynomial in r # 4939# r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. # 4940# Exit. # 4941# # 4942# 7. If |X| > 1, go to 9. # 4943# # 4944# 8. (|X|<2**(-40)) If SIN is invoked, return X; # 4945# otherwise return 1. # 4946# # 4947# 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, # 4948# go back to 3. # 4949# # 4950# SINCOS: # 4951# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. # 4952# # 4953# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # 4954# k = N mod 4, so in particular, k = 0,1,2,or 3. # 4955# # 4956# 3. If k is even, go to 5. # 4957# # 4958# 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. # 4959# j1 exclusive or with the l.s.b. of k. # 4960# sgn1 := (-1)**j1, sgn2 := (-1)**j2. # 4961# SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where # 4962# sin(r) and cos(r) are computed as odd and even # 4963# polynomials in r, respectively. Exit # 4964# # 4965# 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. # 4966# SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where # 4967# sin(r) and cos(r) are computed as odd and even # 4968# polynomials in r, respectively. Exit # 4969# # 4970# 6. If |X| > 1, go to 8. # 4971# # 4972# 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. # 4973# # 4974# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, # 4975# go back to 2. # 4976# # 4977######################################################################### 4978 4979SINA7: long 0xBD6AAA77,0xCCC994F5 4980SINA6: long 0x3DE61209,0x7AAE8DA1 4981SINA5: long 0xBE5AE645,0x2A118AE4 4982SINA4: long 0x3EC71DE3,0xA5341531 4983SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000 4984SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000 4985SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000 4986 4987COSB8: long 0x3D2AC4D0,0xD6011EE3 4988COSB7: long 0xBDA9396F,0x9F45AC19 4989COSB6: long 0x3E21EED9,0x0612C972 4990COSB5: long 0xBE927E4F,0xB79D9FCF 4991COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000 4992COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000 4993COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E 4994COSB1: long 0xBF000000 4995 4996 set INARG,FP_SCR0 4997 4998 set X,FP_SCR0 4999# set XDCARE,X+2 5000 set XFRAC,X+4 5001 5002 set RPRIME,FP_SCR0 5003 set SPRIME,FP_SCR1 5004 5005 set POSNEG1,L_SCR1 5006 set TWOTO63,L_SCR1 5007 5008 set ENDFLAG,L_SCR2 5009 set INT,L_SCR2 5010 5011 set ADJN,L_SCR3 5012 5013############################################ 5014 global ssin 5015ssin: 5016 mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0 5017 bra.b SINBGN 5018 5019############################################ 5020 global scos 5021scos: 5022 mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1 5023 5024############################################ 5025SINBGN: 5026#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE 5027 5028 fmov.x (%a0),%fp0 # LOAD INPUT 5029 fmov.x %fp0,X(%a6) # save input at X 5030 5031# "COMPACTIFY" X 5032 mov.l (%a0),%d1 # put exp in hi word 5033 mov.w 4(%a0),%d1 # fetch hi(man) 5034 and.l &0x7FFFFFFF,%d1 # strip sign 5035 5036 cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)? 5037 bge.b SOK1 # no 5038 bra.w SINSM # yes; input is very small 5039 5040SOK1: 5041 cmp.l %d1,&0x4004BC7E # is |X| < 15 PI? 5042 blt.b SINMAIN # no 5043 bra.w SREDUCEX # yes; input is very large 5044 5045#--THIS IS THE USUAL CASE, |X| <= 15 PI. 5046#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. 5047SINMAIN: 5048 fmov.x %fp0,%fp1 5049 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI 5050 5051 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 5052 5053 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER 5054 5055 mov.l INT(%a6),%d1 # make a copy of N 5056 asl.l &4,%d1 # N *= 16 5057 add.l %d1,%a1 # tbl_addr = a1 + (N*16) 5058 5059# A1 IS THE ADDRESS OF N*PIBY2 5060# ...WHICH IS IN TWO PIECES Y1 & Y2 5061 fsub.x (%a1)+,%fp0 # X-Y1 5062 fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2 5063 5064SINCONT: 5065#--continuation from REDUCEX 5066 5067#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED 5068 mov.l INT(%a6),%d1 5069 add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN 5070 ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE 5071 cmp.l %d1,&0 5072 blt.w COSPOLY 5073 5074#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. 5075#--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY 5076#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE 5077#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS 5078#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))]) 5079#--WHERE T=S*S. 5080#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION 5081#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT. 5082SINPOLY: 5083 fmovm.x &0x0c,-(%sp) # save fp2/fp3 5084 5085 fmov.x %fp0,X(%a6) # X IS R 5086 fmul.x %fp0,%fp0 # FP0 IS S 5087 5088 fmov.d SINA7(%pc),%fp3 5089 fmov.d SINA6(%pc),%fp2 5090 5091 fmov.x %fp0,%fp1 5092 fmul.x %fp1,%fp1 # FP1 IS T 5093 5094 ror.l &1,%d1 5095 and.l &0x80000000,%d1 5096# ...LEAST SIG. BIT OF D0 IN SIGN POSITION 5097 eor.l %d1,X(%a6) # X IS NOW R'= SGN*R 5098 5099 fmul.x %fp1,%fp3 # TA7 5100 fmul.x %fp1,%fp2 # TA6 5101 5102 fadd.d SINA5(%pc),%fp3 # A5+TA7 5103 fadd.d SINA4(%pc),%fp2 # A4+TA6 5104 5105 fmul.x %fp1,%fp3 # T(A5+TA7) 5106 fmul.x %fp1,%fp2 # T(A4+TA6) 5107 5108 fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7) 5109 fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6) 5110 5111 fmul.x %fp3,%fp1 # T(A3+T(A5+TA7)) 5112 5113 fmul.x %fp0,%fp2 # S(A2+T(A4+TA6)) 5114 fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7)) 5115 fmul.x X(%a6),%fp0 # R'*S 5116 5117 fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] 5118 5119 fmul.x %fp1,%fp0 # SIN(R')-R' 5120 5121 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 5122 5123 fmov.l %d0,%fpcr # restore users round mode,prec 5124 fadd.x X(%a6),%fp0 # last inst - possible exception set 5125 bra t_inx2 5126 5127#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. 5128#--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY 5129#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE 5130#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS 5131#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))]) 5132#--WHERE T=S*S. 5133#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION 5134#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2 5135#--AND IS THEREFORE STORED AS SINGLE PRECISION. 5136COSPOLY: 5137 fmovm.x &0x0c,-(%sp) # save fp2/fp3 5138 5139 fmul.x %fp0,%fp0 # FP0 IS S 5140 5141 fmov.d COSB8(%pc),%fp2 5142 fmov.d COSB7(%pc),%fp3 5143 5144 fmov.x %fp0,%fp1 5145 fmul.x %fp1,%fp1 # FP1 IS T 5146 5147 fmov.x %fp0,X(%a6) # X IS S 5148 ror.l &1,%d1 5149 and.l &0x80000000,%d1 5150# ...LEAST SIG. BIT OF D0 IN SIGN POSITION 5151 5152 fmul.x %fp1,%fp2 # TB8 5153 5154 eor.l %d1,X(%a6) # X IS NOW S'= SGN*S 5155 and.l &0x80000000,%d1 5156 5157 fmul.x %fp1,%fp3 # TB7 5158 5159 or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE 5160 mov.l %d1,POSNEG1(%a6) 5161 5162 fadd.d COSB6(%pc),%fp2 # B6+TB8 5163 fadd.d COSB5(%pc),%fp3 # B5+TB7 5164 5165 fmul.x %fp1,%fp2 # T(B6+TB8) 5166 fmul.x %fp1,%fp3 # T(B5+TB7) 5167 5168 fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8) 5169 fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7) 5170 5171 fmul.x %fp1,%fp2 # T(B4+T(B6+TB8)) 5172 fmul.x %fp3,%fp1 # T(B3+T(B5+TB7)) 5173 5174 fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8)) 5175 fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7)) 5176 5177 fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8))) 5178 5179 fadd.x %fp1,%fp0 5180 5181 fmul.x X(%a6),%fp0 5182 5183 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 5184 5185 fmov.l %d0,%fpcr # restore users round mode,prec 5186 fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set 5187 bra t_inx2 5188 5189############################################## 5190 5191# SINe: Big OR Small? 5192#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. 5193#--IF |X| < 2**(-40), RETURN X OR 1. 5194SINBORS: 5195 cmp.l %d1,&0x3FFF8000 5196 bgt.l SREDUCEX 5197 5198SINSM: 5199 mov.l ADJN(%a6),%d1 5200 cmp.l %d1,&0 5201 bgt.b COSTINY 5202 5203# here, the operation may underflow iff the precision is sgl or dbl. 5204# extended denorms are handled through another entry point. 5205SINTINY: 5206# mov.w &0x0000,XDCARE(%a6) # JUST IN CASE 5207 5208 fmov.l %d0,%fpcr # restore users round mode,prec 5209 mov.b &FMOV_OP,%d1 # last inst is MOVE 5210 fmov.x X(%a6),%fp0 # last inst - possible exception set 5211 bra t_catch 5212 5213COSTINY: 5214 fmov.s &0x3F800000,%fp0 # fp0 = 1.0 5215 fmov.l %d0,%fpcr # restore users round mode,prec 5216 fadd.s &0x80800000,%fp0 # last inst - possible exception set 5217 bra t_pinx2 5218 5219################################################ 5220 global ssind 5221#--SIN(X) = X FOR DENORMALIZED X 5222ssind: 5223 bra t_extdnrm 5224 5225############################################ 5226 global scosd 5227#--COS(X) = 1 FOR DENORMALIZED X 5228scosd: 5229 fmov.s &0x3F800000,%fp0 # fp0 = 1.0 5230 bra t_pinx2 5231 5232################################################## 5233 5234 global ssincos 5235ssincos: 5236#--SET ADJN TO 4 5237 mov.l &4,ADJN(%a6) 5238 5239 fmov.x (%a0),%fp0 # LOAD INPUT 5240 fmov.x %fp0,X(%a6) 5241 5242 mov.l (%a0),%d1 5243 mov.w 4(%a0),%d1 5244 and.l &0x7FFFFFFF,%d1 # COMPACTIFY X 5245 5246 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)? 5247 bge.b SCOK1 5248 bra.w SCSM 5249 5250SCOK1: 5251 cmp.l %d1,&0x4004BC7E # |X| < 15 PI? 5252 blt.b SCMAIN 5253 bra.w SREDUCEX 5254 5255 5256#--THIS IS THE USUAL CASE, |X| <= 15 PI. 5257#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. 5258SCMAIN: 5259 fmov.x %fp0,%fp1 5260 5261 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI 5262 5263 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 5264 5265 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER 5266 5267 mov.l INT(%a6),%d1 5268 asl.l &4,%d1 5269 add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2 5270 5271 fsub.x (%a1)+,%fp0 # X-Y1 5272 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2 5273 5274SCCONT: 5275#--continuation point from REDUCEX 5276 5277 mov.l INT(%a6),%d1 5278 ror.l &1,%d1 5279 cmp.l %d1,&0 # D0 < 0 IFF N IS ODD 5280 bge.w NEVEN 5281 5282SNODD: 5283#--REGISTERS SAVED SO FAR: D0, A0, FP2. 5284 fmovm.x &0x04,-(%sp) # save fp2 5285 5286 fmov.x %fp0,RPRIME(%a6) 5287 fmul.x %fp0,%fp0 # FP0 IS S = R*R 5288 fmov.d SINA7(%pc),%fp1 # A7 5289 fmov.d COSB8(%pc),%fp2 # B8 5290 fmul.x %fp0,%fp1 # SA7 5291 fmul.x %fp0,%fp2 # SB8 5292 5293 mov.l %d2,-(%sp) 5294 mov.l %d1,%d2 5295 ror.l &1,%d2 5296 and.l &0x80000000,%d2 5297 eor.l %d1,%d2 5298 and.l &0x80000000,%d2 5299 5300 fadd.d SINA6(%pc),%fp1 # A6+SA7 5301 fadd.d COSB7(%pc),%fp2 # B7+SB8 5302 5303 fmul.x %fp0,%fp1 # S(A6+SA7) 5304 eor.l %d2,RPRIME(%a6) 5305 mov.l (%sp)+,%d2 5306 fmul.x %fp0,%fp2 # S(B7+SB8) 5307 ror.l &1,%d1 5308 and.l &0x80000000,%d1 5309 mov.l &0x3F800000,POSNEG1(%a6) 5310 eor.l %d1,POSNEG1(%a6) 5311 5312 fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7) 5313 fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8) 5314 5315 fmul.x %fp0,%fp1 # S(A5+S(A6+SA7)) 5316 fmul.x %fp0,%fp2 # S(B6+S(B7+SB8)) 5317 fmov.x %fp0,SPRIME(%a6) 5318 5319 fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7)) 5320 eor.l %d1,SPRIME(%a6) 5321 fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8)) 5322 5323 fmul.x %fp0,%fp1 # S(A4+...) 5324 fmul.x %fp0,%fp2 # S(B5+...) 5325 5326 fadd.d SINA3(%pc),%fp1 # A3+S(A4+...) 5327 fadd.d COSB4(%pc),%fp2 # B4+S(B5+...) 5328 5329 fmul.x %fp0,%fp1 # S(A3+...) 5330 fmul.x %fp0,%fp2 # S(B4+...) 5331 5332 fadd.x SINA2(%pc),%fp1 # A2+S(A3+...) 5333 fadd.x COSB3(%pc),%fp2 # B3+S(B4+...) 5334 5335 fmul.x %fp0,%fp1 # S(A2+...) 5336 fmul.x %fp0,%fp2 # S(B3+...) 5337 5338 fadd.x SINA1(%pc),%fp1 # A1+S(A2+...) 5339 fadd.x COSB2(%pc),%fp2 # B2+S(B3+...) 5340 5341 fmul.x %fp0,%fp1 # S(A1+...) 5342 fmul.x %fp2,%fp0 # S(B2+...) 5343 5344 fmul.x RPRIME(%a6),%fp1 # R'S(A1+...) 5345 fadd.s COSB1(%pc),%fp0 # B1+S(B2...) 5346 fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...)) 5347 5348 fmovm.x (%sp)+,&0x20 # restore fp2 5349 5350 fmov.l %d0,%fpcr 5351 fadd.x RPRIME(%a6),%fp1 # COS(X) 5352 bsr sto_cos # store cosine result 5353 fadd.s POSNEG1(%a6),%fp0 # SIN(X) 5354 bra t_inx2 5355 5356NEVEN: 5357#--REGISTERS SAVED SO FAR: FP2. 5358 fmovm.x &0x04,-(%sp) # save fp2 5359 5360 fmov.x %fp0,RPRIME(%a6) 5361 fmul.x %fp0,%fp0 # FP0 IS S = R*R 5362 5363 fmov.d COSB8(%pc),%fp1 # B8 5364 fmov.d SINA7(%pc),%fp2 # A7 5365 5366 fmul.x %fp0,%fp1 # SB8 5367 fmov.x %fp0,SPRIME(%a6) 5368 fmul.x %fp0,%fp2 # SA7 5369 5370 ror.l &1,%d1 5371 and.l &0x80000000,%d1 5372 5373 fadd.d COSB7(%pc),%fp1 # B7+SB8 5374 fadd.d SINA6(%pc),%fp2 # A6+SA7 5375 5376 eor.l %d1,RPRIME(%a6) 5377 eor.l %d1,SPRIME(%a6) 5378 5379 fmul.x %fp0,%fp1 # S(B7+SB8) 5380 5381 or.l &0x3F800000,%d1 5382 mov.l %d1,POSNEG1(%a6) 5383 5384 fmul.x %fp0,%fp2 # S(A6+SA7) 5385 5386 fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8) 5387 fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7) 5388 5389 fmul.x %fp0,%fp1 # S(B6+S(B7+SB8)) 5390 fmul.x %fp0,%fp2 # S(A5+S(A6+SA7)) 5391 5392 fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8)) 5393 fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7)) 5394 5395 fmul.x %fp0,%fp1 # S(B5+...) 5396 fmul.x %fp0,%fp2 # S(A4+...) 5397 5398 fadd.d COSB4(%pc),%fp1 # B4+S(B5+...) 5399 fadd.d SINA3(%pc),%fp2 # A3+S(A4+...) 5400 5401 fmul.x %fp0,%fp1 # S(B4+...) 5402 fmul.x %fp0,%fp2 # S(A3+...) 5403 5404 fadd.x COSB3(%pc),%fp1 # B3+S(B4+...) 5405 fadd.x SINA2(%pc),%fp2 # A2+S(A3+...) 5406 5407 fmul.x %fp0,%fp1 # S(B3+...) 5408 fmul.x %fp0,%fp2 # S(A2+...) 5409 5410 fadd.x COSB2(%pc),%fp1 # B2+S(B3+...) 5411 fadd.x SINA1(%pc),%fp2 # A1+S(A2+...) 5412 5413 fmul.x %fp0,%fp1 # S(B2+...) 5414 fmul.x %fp2,%fp0 # s(a1+...) 5415 5416 5417 fadd.s COSB1(%pc),%fp1 # B1+S(B2...) 5418 fmul.x RPRIME(%a6),%fp0 # R'S(A1+...) 5419 fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...)) 5420 5421 fmovm.x (%sp)+,&0x20 # restore fp2 5422 5423 fmov.l %d0,%fpcr 5424 fadd.s POSNEG1(%a6),%fp1 # COS(X) 5425 bsr sto_cos # store cosine result 5426 fadd.x RPRIME(%a6),%fp0 # SIN(X) 5427 bra t_inx2 5428 5429################################################ 5430 5431SCBORS: 5432 cmp.l %d1,&0x3FFF8000 5433 bgt.w SREDUCEX 5434 5435################################################ 5436 5437SCSM: 5438# mov.w &0x0000,XDCARE(%a6) 5439 fmov.s &0x3F800000,%fp1 5440 5441 fmov.l %d0,%fpcr 5442 fsub.s &0x00800000,%fp1 5443 bsr sto_cos # store cosine result 5444 fmov.l %fpcr,%d0 # d0 must have fpcr,too 5445 mov.b &FMOV_OP,%d1 # last inst is MOVE 5446 fmov.x X(%a6),%fp0 5447 bra t_catch 5448 5449############################################## 5450 5451 global ssincosd 5452#--SIN AND COS OF X FOR DENORMALIZED X 5453ssincosd: 5454 mov.l %d0,-(%sp) # save d0 5455 fmov.s &0x3F800000,%fp1 5456 bsr sto_cos # store cosine result 5457 mov.l (%sp)+,%d0 # restore d0 5458 bra t_extdnrm 5459 5460############################################ 5461 5462#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. 5463#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING 5464#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. 5465SREDUCEX: 5466 fmovm.x &0x3c,-(%sp) # save {fp2-fp5} 5467 mov.l %d2,-(%sp) # save d2 5468 fmov.s &0x00000000,%fp1 # fp1 = 0 5469 5470#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that 5471#--there is a danger of unwanted overflow in first LOOP iteration. In this 5472#--case, reduce argument by one remainder step to make subsequent reduction 5473#--safe. 5474 cmp.l %d1,&0x7ffeffff # is arg dangerously large? 5475 bne.b SLOOP # no 5476 5477# yes; create 2**16383*PI/2 5478 mov.w &0x7ffe,FP_SCR0_EX(%a6) 5479 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6) 5480 clr.l FP_SCR0_LO(%a6) 5481 5482# create low half of 2**16383*PI/2 at FP_SCR1 5483 mov.w &0x7fdc,FP_SCR1_EX(%a6) 5484 mov.l &0x85a308d3,FP_SCR1_HI(%a6) 5485 clr.l FP_SCR1_LO(%a6) 5486 5487 ftest.x %fp0 # test sign of argument 5488 fblt.w sred_neg 5489 5490 or.b &0x80,FP_SCR0_EX(%a6) # positive arg 5491 or.b &0x80,FP_SCR1_EX(%a6) 5492sred_neg: 5493 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact 5494 fmov.x %fp0,%fp1 # save high result in fp1 5495 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction 5496 fsub.x %fp0,%fp1 # determine low component of result 5497 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument. 5498 5499#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. 5500#--integer quotient will be stored in N 5501#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) 5502SLOOP: 5503 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2 5504 mov.w INARG(%a6),%d1 5505 mov.l %d1,%a1 # save a copy of D0 5506 and.l &0x00007FFF,%d1 5507 sub.l &0x00003FFF,%d1 # d0 = K 5508 cmp.l %d1,&28 5509 ble.b SLASTLOOP 5510SCONTLOOP: 5511 sub.l &27,%d1 # d0 = L := K-27 5512 mov.b &0,ENDFLAG(%a6) 5513 bra.b SWORK 5514SLASTLOOP: 5515 clr.l %d1 # d0 = L := 0 5516 mov.b &1,ENDFLAG(%a6) 5517 5518SWORK: 5519#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN 5520#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. 5521 5522#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), 5523#--2**L * (PIby2_1), 2**L * (PIby2_2) 5524 5525 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI 5526 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI) 5527 5528 mov.l &0xA2F9836E,FP_SCR0_HI(%a6) 5529 mov.l &0x4E44152A,FP_SCR0_LO(%a6) 5530 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI) 5531 5532 fmov.x %fp0,%fp2 5533 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI) 5534 5535#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN 5536#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N 5537#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT 5538#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE 5539#--US THE DESIRED VALUE IN FLOATING POINT. 5540 mov.l %a1,%d2 5541 swap %d2 5542 and.l &0x80000000,%d2 5543 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL 5544 mov.l %d2,TWOTO63(%a6) 5545 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED 5546 fsub.s TWOTO63(%a6),%fp2 # fp2 = N 5547# fint.x %fp2 5548 5549#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2 5550 mov.l %d1,%d2 # d2 = L 5551 5552 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2) 5553 mov.w %d2,FP_SCR0_EX(%a6) 5554 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6) 5555 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1 5556 5557 add.l &0x00003FDD,%d1 5558 mov.w %d1,FP_SCR1_EX(%a6) 5559 mov.l &0x85A308D3,FP_SCR1_HI(%a6) 5560 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2 5561 5562 mov.b ENDFLAG(%a6),%d1 5563 5564#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and 5565#--P2 = 2**(L) * Piby2_2 5566 fmov.x %fp2,%fp4 # fp4 = N 5567 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1 5568 fmov.x %fp2,%fp5 # fp5 = N 5569 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2 5570 fmov.x %fp4,%fp3 # fp3 = W = N*P1 5571 5572#--we want P+p = W+w but |p| <= half ulp of P 5573#--Then, we need to compute A := R-P and a := r-p 5574 fadd.x %fp5,%fp3 # fp3 = P 5575 fsub.x %fp3,%fp4 # fp4 = W-P 5576 5577 fsub.x %fp3,%fp0 # fp0 = A := R - P 5578 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w 5579 5580 fmov.x %fp0,%fp3 # fp3 = A 5581 fsub.x %fp4,%fp1 # fp1 = a := r - p 5582 5583#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but 5584#--|r| <= half ulp of R. 5585 fadd.x %fp1,%fp0 # fp0 = R := A+a 5586#--No need to calculate r if this is the last loop 5587 cmp.b %d1,&0 5588 bgt.w SRESTORE 5589 5590#--Need to calculate r 5591 fsub.x %fp0,%fp3 # fp3 = A-R 5592 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a 5593 bra.w SLOOP 5594 5595SRESTORE: 5596 fmov.l %fp2,INT(%a6) 5597 mov.l (%sp)+,%d2 # restore d2 5598 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5} 5599 5600 mov.l ADJN(%a6),%d1 5601 cmp.l %d1,&4 5602 5603 blt.w SINCONT 5604 bra.w SCCONT 5605 5606######################################################################### 5607# stan(): computes the tangent of a normalized input # 5608# stand(): computes the tangent of a denormalized input # 5609# # 5610# INPUT *************************************************************** # 5611# a0 = pointer to extended precision input # 5612# d0 = round precision,mode # 5613# # 5614# OUTPUT ************************************************************** # 5615# fp0 = tan(X) # 5616# # 5617# ACCURACY and MONOTONICITY ******************************************* # 5618# The returned result is within 3 ulp in 64 significant bit, i.e. # 5619# within 0.5001 ulp to 53 bits if the result is subsequently # 5620# rounded to double precision. The result is provably monotonic # 5621# in double precision. # 5622# # 5623# ALGORITHM *********************************************************** # 5624# # 5625# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. # 5626# # 5627# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # 5628# k = N mod 2, so in particular, k = 0 or 1. # 5629# # 5630# 3. If k is odd, go to 5. # 5631# # 5632# 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a # 5633# rational function U/V where # 5634# U = r + r*s*(P1 + s*(P2 + s*P3)), and # 5635# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. # 5636# Exit. # 5637# # 5638# 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by # 5639# a rational function U/V where # 5640# U = r + r*s*(P1 + s*(P2 + s*P3)), and # 5641# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, # 5642# -Cot(r) = -V/U. Exit. # 5643# # 5644# 6. If |X| > 1, go to 8. # 5645# # 5646# 7. (|X|<2**(-40)) Tan(X) = X. Exit. # 5647# # 5648# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back # 5649# to 2. # 5650# # 5651######################################################################### 5652 5653TANQ4: 5654 long 0x3EA0B759,0xF50F8688 5655TANP3: 5656 long 0xBEF2BAA5,0xA8924F04 5657 5658TANQ3: 5659 long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000 5660 5661TANP2: 5662 long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000 5663 5664TANQ2: 5665 long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000 5666 5667TANP1: 5668 long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000 5669 5670TANQ1: 5671 long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000 5672 5673INVTWOPI: 5674 long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000 5675 5676TWOPI1: 5677 long 0x40010000,0xC90FDAA2,0x00000000,0x00000000 5678TWOPI2: 5679 long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000 5680 5681#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING 5682#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT 5683#--MOST 69 BITS LONG. 5684# global PITBL 5685PITBL: 5686 long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000 5687 long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000 5688 long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000 5689 long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000 5690 long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000 5691 long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000 5692 long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000 5693 long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000 5694 long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000 5695 long 0xC0040000,0x90836524,0x88034B96,0x20B00000 5696 long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000 5697 long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000 5698 long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000 5699 long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000 5700 long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000 5701 long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000 5702 long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000 5703 long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000 5704 long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000 5705 long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000 5706 long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000 5707 long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000 5708 long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000 5709 long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000 5710 long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000 5711 long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000 5712 long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000 5713 long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000 5714 long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000 5715 long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000 5716 long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000 5717 long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000 5718 long 0x00000000,0x00000000,0x00000000,0x00000000 5719 long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000 5720 long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000 5721 long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000 5722 long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000 5723 long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000 5724 long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000 5725 long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000 5726 long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000 5727 long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000 5728 long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000 5729 long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000 5730 long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000 5731 long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000 5732 long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000 5733 long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000 5734 long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000 5735 long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000 5736 long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000 5737 long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000 5738 long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000 5739 long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000 5740 long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000 5741 long 0x40040000,0x90836524,0x88034B96,0xA0B00000 5742 long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000 5743 long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000 5744 long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000 5745 long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000 5746 long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000 5747 long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000 5748 long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000 5749 long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000 5750 long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000 5751 5752 set INARG,FP_SCR0 5753 5754 set TWOTO63,L_SCR1 5755 set INT,L_SCR1 5756 set ENDFLAG,L_SCR2 5757 5758 global stan 5759stan: 5760 fmov.x (%a0),%fp0 # LOAD INPUT 5761 5762 mov.l (%a0),%d1 5763 mov.w 4(%a0),%d1 5764 and.l &0x7FFFFFFF,%d1 5765 5766 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)? 5767 bge.b TANOK1 5768 bra.w TANSM 5769TANOK1: 5770 cmp.l %d1,&0x4004BC7E # |X| < 15 PI? 5771 blt.b TANMAIN 5772 bra.w REDUCEX 5773 5774TANMAIN: 5775#--THIS IS THE USUAL CASE, |X| <= 15 PI. 5776#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. 5777 fmov.x %fp0,%fp1 5778 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI 5779 5780 lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 5781 5782 fmov.l %fp1,%d1 # CONVERT TO INTEGER 5783 5784 asl.l &4,%d1 5785 add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2 5786 5787 fsub.x (%a1)+,%fp0 # X-Y1 5788 5789 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2 5790 5791 ror.l &5,%d1 5792 and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0 5793 5794TANCONT: 5795 fmovm.x &0x0c,-(%sp) # save fp2,fp3 5796 5797 cmp.l %d1,&0 5798 blt.w NODD 5799 5800 fmov.x %fp0,%fp1 5801 fmul.x %fp1,%fp1 # S = R*R 5802 5803 fmov.d TANQ4(%pc),%fp3 5804 fmov.d TANP3(%pc),%fp2 5805 5806 fmul.x %fp1,%fp3 # SQ4 5807 fmul.x %fp1,%fp2 # SP3 5808 5809 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4 5810 fadd.x TANP2(%pc),%fp2 # P2+SP3 5811 5812 fmul.x %fp1,%fp3 # S(Q3+SQ4) 5813 fmul.x %fp1,%fp2 # S(P2+SP3) 5814 5815 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4) 5816 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3) 5817 5818 fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4)) 5819 fmul.x %fp1,%fp2 # S(P1+S(P2+SP3)) 5820 5821 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4)) 5822 fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3)) 5823 5824 fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4))) 5825 5826 fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3)) 5827 5828 fadd.s &0x3F800000,%fp1 # 1+S(Q1+...) 5829 5830 fmovm.x (%sp)+,&0x30 # restore fp2,fp3 5831 5832 fmov.l %d0,%fpcr # restore users round mode,prec 5833 fdiv.x %fp1,%fp0 # last inst - possible exception set 5834 bra t_inx2 5835 5836NODD: 5837 fmov.x %fp0,%fp1 5838 fmul.x %fp0,%fp0 # S = R*R 5839 5840 fmov.d TANQ4(%pc),%fp3 5841 fmov.d TANP3(%pc),%fp2 5842 5843 fmul.x %fp0,%fp3 # SQ4 5844 fmul.x %fp0,%fp2 # SP3 5845 5846 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4 5847 fadd.x TANP2(%pc),%fp2 # P2+SP3 5848 5849 fmul.x %fp0,%fp3 # S(Q3+SQ4) 5850 fmul.x %fp0,%fp2 # S(P2+SP3) 5851 5852 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4) 5853 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3) 5854 5855 fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4)) 5856 fmul.x %fp0,%fp2 # S(P1+S(P2+SP3)) 5857 5858 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4)) 5859 fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3)) 5860 5861 fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4))) 5862 5863 fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3)) 5864 fadd.s &0x3F800000,%fp0 # 1+S(Q1+...) 5865 5866 fmovm.x (%sp)+,&0x30 # restore fp2,fp3 5867 5868 fmov.x %fp1,-(%sp) 5869 eor.l &0x80000000,(%sp) 5870 5871 fmov.l %d0,%fpcr # restore users round mode,prec 5872 fdiv.x (%sp)+,%fp0 # last inst - possible exception set 5873 bra t_inx2 5874 5875TANBORS: 5876#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. 5877#--IF |X| < 2**(-40), RETURN X OR 1. 5878 cmp.l %d1,&0x3FFF8000 5879 bgt.b REDUCEX 5880 5881TANSM: 5882 fmov.x %fp0,-(%sp) 5883 fmov.l %d0,%fpcr # restore users round mode,prec 5884 mov.b &FMOV_OP,%d1 # last inst is MOVE 5885 fmov.x (%sp)+,%fp0 # last inst - posibble exception set 5886 bra t_catch 5887 5888 global stand 5889#--TAN(X) = X FOR DENORMALIZED X 5890stand: 5891 bra t_extdnrm 5892 5893#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. 5894#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING 5895#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. 5896REDUCEX: 5897 fmovm.x &0x3c,-(%sp) # save {fp2-fp5} 5898 mov.l %d2,-(%sp) # save d2 5899 fmov.s &0x00000000,%fp1 # fp1 = 0 5900 5901#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that 5902#--there is a danger of unwanted overflow in first LOOP iteration. In this 5903#--case, reduce argument by one remainder step to make subsequent reduction 5904#--safe. 5905 cmp.l %d1,&0x7ffeffff # is arg dangerously large? 5906 bne.b LOOP # no 5907 5908# yes; create 2**16383*PI/2 5909 mov.w &0x7ffe,FP_SCR0_EX(%a6) 5910 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6) 5911 clr.l FP_SCR0_LO(%a6) 5912 5913# create low half of 2**16383*PI/2 at FP_SCR1 5914 mov.w &0x7fdc,FP_SCR1_EX(%a6) 5915 mov.l &0x85a308d3,FP_SCR1_HI(%a6) 5916 clr.l FP_SCR1_LO(%a6) 5917 5918 ftest.x %fp0 # test sign of argument 5919 fblt.w red_neg 5920 5921 or.b &0x80,FP_SCR0_EX(%a6) # positive arg 5922 or.b &0x80,FP_SCR1_EX(%a6) 5923red_neg: 5924 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact 5925 fmov.x %fp0,%fp1 # save high result in fp1 5926 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction 5927 fsub.x %fp0,%fp1 # determine low component of result 5928 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument. 5929 5930#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. 5931#--integer quotient will be stored in N 5932#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) 5933LOOP: 5934 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2 5935 mov.w INARG(%a6),%d1 5936 mov.l %d1,%a1 # save a copy of D0 5937 and.l &0x00007FFF,%d1 5938 sub.l &0x00003FFF,%d1 # d0 = K 5939 cmp.l %d1,&28 5940 ble.b LASTLOOP 5941CONTLOOP: 5942 sub.l &27,%d1 # d0 = L := K-27 5943 mov.b &0,ENDFLAG(%a6) 5944 bra.b WORK 5945LASTLOOP: 5946 clr.l %d1 # d0 = L := 0 5947 mov.b &1,ENDFLAG(%a6) 5948 5949WORK: 5950#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN 5951#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. 5952 5953#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), 5954#--2**L * (PIby2_1), 2**L * (PIby2_2) 5955 5956 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI 5957 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI) 5958 5959 mov.l &0xA2F9836E,FP_SCR0_HI(%a6) 5960 mov.l &0x4E44152A,FP_SCR0_LO(%a6) 5961 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI) 5962 5963 fmov.x %fp0,%fp2 5964 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI) 5965 5966#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN 5967#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N 5968#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT 5969#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE 5970#--US THE DESIRED VALUE IN FLOATING POINT. 5971 mov.l %a1,%d2 5972 swap %d2 5973 and.l &0x80000000,%d2 5974 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL 5975 mov.l %d2,TWOTO63(%a6) 5976 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED 5977 fsub.s TWOTO63(%a6),%fp2 # fp2 = N 5978# fintrz.x %fp2,%fp2 5979 5980#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2 5981 mov.l %d1,%d2 # d2 = L 5982 5983 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2) 5984 mov.w %d2,FP_SCR0_EX(%a6) 5985 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6) 5986 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1 5987 5988 add.l &0x00003FDD,%d1 5989 mov.w %d1,FP_SCR1_EX(%a6) 5990 mov.l &0x85A308D3,FP_SCR1_HI(%a6) 5991 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2 5992 5993 mov.b ENDFLAG(%a6),%d1 5994 5995#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and 5996#--P2 = 2**(L) * Piby2_2 5997 fmov.x %fp2,%fp4 # fp4 = N 5998 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1 5999 fmov.x %fp2,%fp5 # fp5 = N 6000 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2 6001 fmov.x %fp4,%fp3 # fp3 = W = N*P1 6002 6003#--we want P+p = W+w but |p| <= half ulp of P 6004#--Then, we need to compute A := R-P and a := r-p 6005 fadd.x %fp5,%fp3 # fp3 = P 6006 fsub.x %fp3,%fp4 # fp4 = W-P 6007 6008 fsub.x %fp3,%fp0 # fp0 = A := R - P 6009 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w 6010 6011 fmov.x %fp0,%fp3 # fp3 = A 6012 fsub.x %fp4,%fp1 # fp1 = a := r - p 6013 6014#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but 6015#--|r| <= half ulp of R. 6016 fadd.x %fp1,%fp0 # fp0 = R := A+a 6017#--No need to calculate r if this is the last loop 6018 cmp.b %d1,&0 6019 bgt.w RESTORE 6020 6021#--Need to calculate r 6022 fsub.x %fp0,%fp3 # fp3 = A-R 6023 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a 6024 bra.w LOOP 6025 6026RESTORE: 6027 fmov.l %fp2,INT(%a6) 6028 mov.l (%sp)+,%d2 # restore d2 6029 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5} 6030 6031 mov.l INT(%a6),%d1 6032 ror.l &1,%d1 6033 6034 bra.w TANCONT 6035 6036######################################################################### 6037# satan(): computes the arctangent of a normalized number # 6038# satand(): computes the arctangent of a denormalized number # 6039# # 6040# INPUT *************************************************************** # 6041# a0 = pointer to extended precision input # 6042# d0 = round precision,mode # 6043# # 6044# OUTPUT ************************************************************** # 6045# fp0 = arctan(X) # 6046# # 6047# ACCURACY and MONOTONICITY ******************************************* # 6048# The returned result is within 2 ulps in 64 significant bit, # 6049# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6050# rounded to double precision. The result is provably monotonic # 6051# in double precision. # 6052# # 6053# ALGORITHM *********************************************************** # 6054# Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. # 6055# # 6056# Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. # 6057# Note that k = -4, -3,..., or 3. # 6058# Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 # 6059# significant bits of X with a bit-1 attached at the 6-th # 6060# bit position. Define u to be u = (X-F) / (1 + X*F). # 6061# # 6062# Step 3. Approximate arctan(u) by a polynomial poly. # 6063# # 6064# Step 4. Return arctan(F) + poly, arctan(F) is fetched from a # 6065# table of values calculated beforehand. Exit. # 6066# # 6067# Step 5. If |X| >= 16, go to Step 7. # 6068# # 6069# Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. # 6070# # 6071# Step 7. Define X' = -1/X. Approximate arctan(X') by an odd # 6072# polynomial in X'. # 6073# Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. # 6074# # 6075######################################################################### 6076 6077ATANA3: long 0xBFF6687E,0x314987D8 6078ATANA2: long 0x4002AC69,0x34A26DB3 6079ATANA1: long 0xBFC2476F,0x4E1DA28E 6080 6081ATANB6: long 0x3FB34444,0x7F876989 6082ATANB5: long 0xBFB744EE,0x7FAF45DB 6083ATANB4: long 0x3FBC71C6,0x46940220 6084ATANB3: long 0xBFC24924,0x921872F9 6085ATANB2: long 0x3FC99999,0x99998FA9 6086ATANB1: long 0xBFD55555,0x55555555 6087 6088ATANC5: long 0xBFB70BF3,0x98539E6A 6089ATANC4: long 0x3FBC7187,0x962D1D7D 6090ATANC3: long 0xBFC24924,0x827107B8 6091ATANC2: long 0x3FC99999,0x9996263E 6092ATANC1: long 0xBFD55555,0x55555536 6093 6094PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 6095NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000 6096 6097PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000 6098NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000 6099 6100ATANTBL: 6101 long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000 6102 long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000 6103 long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000 6104 long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000 6105 long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000 6106 long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000 6107 long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000 6108 long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000 6109 long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000 6110 long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000 6111 long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000 6112 long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000 6113 long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000 6114 long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000 6115 long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000 6116 long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000 6117 long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000 6118 long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000 6119 long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000 6120 long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000 6121 long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000 6122 long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000 6123 long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000 6124 long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000 6125 long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000 6126 long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000 6127 long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000 6128 long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000 6129 long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000 6130 long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000 6131 long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000 6132 long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000 6133 long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000 6134 long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000 6135 long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000 6136 long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000 6137 long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000 6138 long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000 6139 long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000 6140 long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000 6141 long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000 6142 long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000 6143 long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000 6144 long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000 6145 long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000 6146 long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000 6147 long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000 6148 long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000 6149 long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000 6150 long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000 6151 long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000 6152 long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000 6153 long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000 6154 long 0x3FFE0000,0x97731420,0x365E538C,0x00000000 6155 long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000 6156 long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000 6157 long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000 6158 long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000 6159 long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000 6160 long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000 6161 long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000 6162 long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000 6163 long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000 6164 long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000 6165 long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000 6166 long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000 6167 long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000 6168 long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000 6169 long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000 6170 long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000 6171 long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000 6172 long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000 6173 long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000 6174 long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000 6175 long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000 6176 long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000 6177 long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000 6178 long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000 6179 long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000 6180 long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000 6181 long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000 6182 long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000 6183 long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000 6184 long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000 6185 long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000 6186 long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000 6187 long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000 6188 long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000 6189 long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000 6190 long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000 6191 long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000 6192 long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000 6193 long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000 6194 long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000 6195 long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000 6196 long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000 6197 long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000 6198 long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000 6199 long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000 6200 long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000 6201 long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000 6202 long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000 6203 long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000 6204 long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000 6205 long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000 6206 long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000 6207 long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000 6208 long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000 6209 long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000 6210 long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000 6211 long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000 6212 long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000 6213 long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000 6214 long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000 6215 long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000 6216 long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000 6217 long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000 6218 long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000 6219 long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000 6220 long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000 6221 long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000 6222 long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000 6223 long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000 6224 long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000 6225 long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000 6226 long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000 6227 long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000 6228 long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000 6229 6230 set X,FP_SCR0 6231 set XDCARE,X+2 6232 set XFRAC,X+4 6233 set XFRACLO,X+8 6234 6235 set ATANF,FP_SCR1 6236 set ATANFHI,ATANF+4 6237 set ATANFLO,ATANF+8 6238 6239 global satan 6240#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 6241satan: 6242 fmov.x (%a0),%fp0 # LOAD INPUT 6243 6244 mov.l (%a0),%d1 6245 mov.w 4(%a0),%d1 6246 fmov.x %fp0,X(%a6) 6247 and.l &0x7FFFFFFF,%d1 6248 6249 cmp.l %d1,&0x3FFB8000 # |X| >= 1/16? 6250 bge.b ATANOK1 6251 bra.w ATANSM 6252 6253ATANOK1: 6254 cmp.l %d1,&0x4002FFFF # |X| < 16 ? 6255 ble.b ATANMAIN 6256 bra.w ATANBIG 6257 6258#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE 6259#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). 6260#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN 6261#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE 6262#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS 6263#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR 6264#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO 6265#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE 6266#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL 6267#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE 6268#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION 6269#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION 6270#--WILL INVOLVE A VERY LONG POLYNOMIAL. 6271 6272#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS 6273#--WE CHOSE F TO BE +-2^K * 1.BBBB1 6274#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE 6275#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE 6276#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS 6277#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). 6278 6279ATANMAIN: 6280 6281 and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS 6282 or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1 6283 mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F 6284 6285 fmov.x %fp0,%fp1 # FP1 IS X 6286 fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 0 6287 fsub.x X(%a6),%fp0 # FP0 IS X-F 6288 fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F 6289 fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F) 6290 6291#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) 6292#--CREATE ATAN(F) AND STORE IT IN ATANF, AND 6293#--SAVE REGISTERS FP2. 6294 6295 mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY 6296 mov.l %d1,%d2 # THE EXP AND 16 BITS OF X 6297 and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION 6298 and.l &0x7FFF0000,%d2 # EXPONENT OF F 6299 sub.l &0x3FFB0000,%d2 # K+4 6300 asr.l &1,%d2 6301 add.l %d2,%d1 # THE 7 BITS IDENTIFYING F 6302 asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|) 6303 lea ATANTBL(%pc),%a1 6304 add.l %d1,%a1 # ADDRESS OF ATAN(|F|) 6305 mov.l (%a1)+,ATANF(%a6) 6306 mov.l (%a1)+,ATANFHI(%a6) 6307 mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|) 6308 mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN 6309 and.l &0x80000000,%d1 # SIGN(F) 6310 or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|) 6311 mov.l (%sp)+,%d2 # RESTORE d2 6312 6313#--THAT'S ALL I HAVE TO DO FOR NOW, 6314#--BUT ALAS, THE DIVIDE IS STILL CRANKING! 6315 6316#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS 6317#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U 6318#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. 6319#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) 6320#--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. 6321#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT 6322#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED 6323 6324 fmovm.x &0x04,-(%sp) # save fp2 6325 6326 fmov.x %fp0,%fp1 6327 fmul.x %fp1,%fp1 6328 fmov.d ATANA3(%pc),%fp2 6329 fadd.x %fp1,%fp2 # A3+V 6330 fmul.x %fp1,%fp2 # V*(A3+V) 6331 fmul.x %fp0,%fp1 # U*V 6332 fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V) 6333 fmul.d ATANA1(%pc),%fp1 # A1*U*V 6334 fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V)) 6335 fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED 6336 6337 fmovm.x (%sp)+,&0x20 # restore fp2 6338 6339 fmov.l %d0,%fpcr # restore users rnd mode,prec 6340 fadd.x ATANF(%a6),%fp0 # ATAN(X) 6341 bra t_inx2 6342 6343ATANBORS: 6344#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. 6345#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. 6346 cmp.l %d1,&0x3FFF8000 6347 bgt.w ATANBIG # I.E. |X| >= 16 6348 6349ATANSM: 6350#--|X| <= 1/16 6351#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE 6352#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) 6353#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) 6354#--WHERE Y = X*X, AND Z = Y*Y. 6355 6356 cmp.l %d1,&0x3FD78000 6357 blt.w ATANTINY 6358 6359#--COMPUTE POLYNOMIAL 6360 fmovm.x &0x0c,-(%sp) # save fp2/fp3 6361 6362 fmul.x %fp0,%fp0 # FPO IS Y = X*X 6363 6364 fmov.x %fp0,%fp1 6365 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y 6366 6367 fmov.d ATANB6(%pc),%fp2 6368 fmov.d ATANB5(%pc),%fp3 6369 6370 fmul.x %fp1,%fp2 # Z*B6 6371 fmul.x %fp1,%fp3 # Z*B5 6372 6373 fadd.d ATANB4(%pc),%fp2 # B4+Z*B6 6374 fadd.d ATANB3(%pc),%fp3 # B3+Z*B5 6375 6376 fmul.x %fp1,%fp2 # Z*(B4+Z*B6) 6377 fmul.x %fp3,%fp1 # Z*(B3+Z*B5) 6378 6379 fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6) 6380 fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5) 6381 6382 fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6)) 6383 fmul.x X(%a6),%fp0 # X*Y 6384 6385 fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] 6386 6387 fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) 6388 6389 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 6390 6391 fmov.l %d0,%fpcr # restore users rnd mode,prec 6392 fadd.x X(%a6),%fp0 6393 bra t_inx2 6394 6395ATANTINY: 6396#--|X| < 2^(-40), ATAN(X) = X 6397 6398 fmov.l %d0,%fpcr # restore users rnd mode,prec 6399 mov.b &FMOV_OP,%d1 # last inst is MOVE 6400 fmov.x X(%a6),%fp0 # last inst - possible exception set 6401 6402 bra t_catch 6403 6404ATANBIG: 6405#--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, 6406#--RETURN SIGN(X)*PI/2 + ATAN(-1/X). 6407 cmp.l %d1,&0x40638000 6408 bgt.w ATANHUGE 6409 6410#--APPROXIMATE ATAN(-1/X) BY 6411#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' 6412#--THIS CAN BE RE-WRITTEN AS 6413#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. 6414 6415 fmovm.x &0x0c,-(%sp) # save fp2/fp3 6416 6417 fmov.s &0xBF800000,%fp1 # LOAD -1 6418 fdiv.x %fp0,%fp1 # FP1 IS -1/X 6419 6420#--DIVIDE IS STILL CRANKING 6421 6422 fmov.x %fp1,%fp0 # FP0 IS X' 6423 fmul.x %fp0,%fp0 # FP0 IS Y = X'*X' 6424 fmov.x %fp1,X(%a6) # X IS REALLY X' 6425 6426 fmov.x %fp0,%fp1 6427 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y 6428 6429 fmov.d ATANC5(%pc),%fp3 6430 fmov.d ATANC4(%pc),%fp2 6431 6432 fmul.x %fp1,%fp3 # Z*C5 6433 fmul.x %fp1,%fp2 # Z*B4 6434 6435 fadd.d ATANC3(%pc),%fp3 # C3+Z*C5 6436 fadd.d ATANC2(%pc),%fp2 # C2+Z*C4 6437 6438 fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED 6439 fmul.x %fp0,%fp2 # Y*(C2+Z*C4) 6440 6441 fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5) 6442 fmul.x X(%a6),%fp0 # X'*Y 6443 6444 fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] 6445 6446 fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)] 6447# ... +[Y*(B2+Z*(B4+Z*B6))]) 6448 fadd.x X(%a6),%fp0 6449 6450 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 6451 6452 fmov.l %d0,%fpcr # restore users rnd mode,prec 6453 tst.b (%a0) 6454 bpl.b pos_big 6455 6456neg_big: 6457 fadd.x NPIBY2(%pc),%fp0 6458 bra t_minx2 6459 6460pos_big: 6461 fadd.x PPIBY2(%pc),%fp0 6462 bra t_pinx2 6463 6464ATANHUGE: 6465#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY 6466 tst.b (%a0) 6467 bpl.b pos_huge 6468 6469neg_huge: 6470 fmov.x NPIBY2(%pc),%fp0 6471 fmov.l %d0,%fpcr 6472 fadd.x PTINY(%pc),%fp0 6473 bra t_minx2 6474 6475pos_huge: 6476 fmov.x PPIBY2(%pc),%fp0 6477 fmov.l %d0,%fpcr 6478 fadd.x NTINY(%pc),%fp0 6479 bra t_pinx2 6480 6481 global satand 6482#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT 6483satand: 6484 bra t_extdnrm 6485 6486######################################################################### 6487# sasin(): computes the inverse sine of a normalized input # 6488# sasind(): computes the inverse sine of a denormalized input # 6489# # 6490# INPUT *************************************************************** # 6491# a0 = pointer to extended precision input # 6492# d0 = round precision,mode # 6493# # 6494# OUTPUT ************************************************************** # 6495# fp0 = arcsin(X) # 6496# # 6497# ACCURACY and MONOTONICITY ******************************************* # 6498# The returned result is within 3 ulps in 64 significant bit, # 6499# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6500# rounded to double precision. The result is provably monotonic # 6501# in double precision. # 6502# # 6503# ALGORITHM *********************************************************** # 6504# # 6505# ASIN # 6506# 1. If |X| >= 1, go to 3. # 6507# # 6508# 2. (|X| < 1) Calculate asin(X) by # 6509# z := sqrt( [1-X][1+X] ) # 6510# asin(X) = atan( x / z ). # 6511# Exit. # 6512# # 6513# 3. If |X| > 1, go to 5. # 6514# # 6515# 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.# 6516# # 6517# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # 6518# Exit. # 6519# # 6520######################################################################### 6521 6522 global sasin 6523sasin: 6524 fmov.x (%a0),%fp0 # LOAD INPUT 6525 6526 mov.l (%a0),%d1 6527 mov.w 4(%a0),%d1 6528 and.l &0x7FFFFFFF,%d1 6529 cmp.l %d1,&0x3FFF8000 6530 bge.b ASINBIG 6531 6532# This catch is added here for the '060 QSP. Originally, the call to 6533# satan() would handle this case by causing the exception which would 6534# not be caught until gen_except(). Now, with the exceptions being 6535# detected inside of satan(), the exception would have been handled there 6536# instead of inside sasin() as expected. 6537 cmp.l %d1,&0x3FD78000 6538 blt.w ASINTINY 6539 6540#--THIS IS THE USUAL CASE, |X| < 1 6541#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) ) 6542 6543ASINMAIN: 6544 fmov.s &0x3F800000,%fp1 6545 fsub.x %fp0,%fp1 # 1-X 6546 fmovm.x &0x4,-(%sp) # {fp2} 6547 fmov.s &0x3F800000,%fp2 6548 fadd.x %fp0,%fp2 # 1+X 6549 fmul.x %fp2,%fp1 # (1+X)(1-X) 6550 fmovm.x (%sp)+,&0x20 # {fp2} 6551 fsqrt.x %fp1 # SQRT([1-X][1+X]) 6552 fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X]) 6553 fmovm.x &0x01,-(%sp) # save X/SQRT(...) 6554 lea (%sp),%a0 # pass ptr to X/SQRT(...) 6555 bsr satan 6556 add.l &0xc,%sp # clear X/SQRT(...) from stack 6557 bra t_inx2 6558 6559ASINBIG: 6560 fabs.x %fp0 # |X| 6561 fcmp.s %fp0,&0x3F800000 6562 fbgt t_operr # cause an operr exception 6563 6564#--|X| = 1, ASIN(X) = +- PI/2. 6565ASINONE: 6566 fmov.x PIBY2(%pc),%fp0 6567 mov.l (%a0),%d1 6568 and.l &0x80000000,%d1 # SIGN BIT OF X 6569 or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT 6570 mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT 6571 fmov.l %d0,%fpcr 6572 fmul.s (%sp)+,%fp0 6573 bra t_inx2 6574 6575#--|X| < 2^(-40), ATAN(X) = X 6576ASINTINY: 6577 fmov.l %d0,%fpcr # restore users rnd mode,prec 6578 mov.b &FMOV_OP,%d1 # last inst is MOVE 6579 fmov.x (%a0),%fp0 # last inst - possible exception 6580 bra t_catch 6581 6582 global sasind 6583#--ASIN(X) = X FOR DENORMALIZED X 6584sasind: 6585 bra t_extdnrm 6586 6587######################################################################### 6588# sacos(): computes the inverse cosine of a normalized input # 6589# sacosd(): computes the inverse cosine of a denormalized input # 6590# # 6591# INPUT *************************************************************** # 6592# a0 = pointer to extended precision input # 6593# d0 = round precision,mode # 6594# # 6595# OUTPUT ************************************************************** # 6596# fp0 = arccos(X) # 6597# # 6598# ACCURACY and MONOTONICITY ******************************************* # 6599# The returned result is within 3 ulps in 64 significant bit, # 6600# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6601# rounded to double precision. The result is provably monotonic # 6602# in double precision. # 6603# # 6604# ALGORITHM *********************************************************** # 6605# # 6606# ACOS # 6607# 1. If |X| >= 1, go to 3. # 6608# # 6609# 2. (|X| < 1) Calculate acos(X) by # 6610# z := (1-X) / (1+X) # 6611# acos(X) = 2 * atan( sqrt(z) ). # 6612# Exit. # 6613# # 6614# 3. If |X| > 1, go to 5. # 6615# # 6616# 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. # 6617# # 6618# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # 6619# Exit. # 6620# # 6621######################################################################### 6622 6623 global sacos 6624sacos: 6625 fmov.x (%a0),%fp0 # LOAD INPUT 6626 6627 mov.l (%a0),%d1 # pack exp w/ upper 16 fraction 6628 mov.w 4(%a0),%d1 6629 and.l &0x7FFFFFFF,%d1 6630 cmp.l %d1,&0x3FFF8000 6631 bge.b ACOSBIG 6632 6633#--THIS IS THE USUAL CASE, |X| < 1 6634#--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) ) 6635 6636ACOSMAIN: 6637 fmov.s &0x3F800000,%fp1 6638 fadd.x %fp0,%fp1 # 1+X 6639 fneg.x %fp0 # -X 6640 fadd.s &0x3F800000,%fp0 # 1-X 6641 fdiv.x %fp1,%fp0 # (1-X)/(1+X) 6642 fsqrt.x %fp0 # SQRT((1-X)/(1+X)) 6643 mov.l %d0,-(%sp) # save original users fpcr 6644 clr.l %d0 6645 fmovm.x &0x01,-(%sp) # save SQRT(...) to stack 6646 lea (%sp),%a0 # pass ptr to sqrt 6647 bsr satan # ATAN(SQRT([1-X]/[1+X])) 6648 add.l &0xc,%sp # clear SQRT(...) from stack 6649 6650 fmov.l (%sp)+,%fpcr # restore users round prec,mode 6651 fadd.x %fp0,%fp0 # 2 * ATAN( STUFF ) 6652 bra t_pinx2 6653 6654ACOSBIG: 6655 fabs.x %fp0 6656 fcmp.s %fp0,&0x3F800000 6657 fbgt t_operr # cause an operr exception 6658 6659#--|X| = 1, ACOS(X) = 0 OR PI 6660 tst.b (%a0) # is X positive or negative? 6661 bpl.b ACOSP1 6662 6663#--X = -1 6664#Returns PI and inexact exception 6665ACOSM1: 6666 fmov.x PI(%pc),%fp0 # load PI 6667 fmov.l %d0,%fpcr # load round mode,prec 6668 fadd.s &0x00800000,%fp0 # add a small value 6669 bra t_pinx2 6670 6671ACOSP1: 6672 bra ld_pzero # answer is positive zero 6673 6674 global sacosd 6675#--ACOS(X) = PI/2 FOR DENORMALIZED X 6676sacosd: 6677 fmov.l %d0,%fpcr # load user's rnd mode/prec 6678 fmov.x PIBY2(%pc),%fp0 6679 bra t_pinx2 6680 6681######################################################################### 6682# setox(): computes the exponential for a normalized input # 6683# setoxd(): computes the exponential for a denormalized input # 6684# setoxm1(): computes the exponential minus 1 for a normalized input # 6685# setoxm1d(): computes the exponential minus 1 for a denormalized input # 6686# # 6687# INPUT *************************************************************** # 6688# a0 = pointer to extended precision input # 6689# d0 = round precision,mode # 6690# # 6691# OUTPUT ************************************************************** # 6692# fp0 = exp(X) or exp(X)-1 # 6693# # 6694# ACCURACY and MONOTONICITY ******************************************* # 6695# The returned result is within 0.85 ulps in 64 significant bit, # 6696# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6697# rounded to double precision. The result is provably monotonic # 6698# in double precision. # 6699# # 6700# ALGORITHM and IMPLEMENTATION **************************************** # 6701# # 6702# setoxd # 6703# ------ # 6704# Step 1. Set ans := 1.0 # 6705# # 6706# Step 2. Return ans := ans + sign(X)*2^(-126). Exit. # 6707# Notes: This will always generate one exception -- inexact. # 6708# # 6709# # 6710# setox # 6711# ----- # 6712# # 6713# Step 1. Filter out extreme cases of input argument. # 6714# 1.1 If |X| >= 2^(-65), go to Step 1.3. # 6715# 1.2 Go to Step 7. # 6716# 1.3 If |X| < 16380 log(2), go to Step 2. # 6717# 1.4 Go to Step 8. # 6718# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.# 6719# To avoid the use of floating-point comparisons, a # 6720# compact representation of |X| is used. This format is a # 6721# 32-bit integer, the upper (more significant) 16 bits # 6722# are the sign and biased exponent field of |X|; the # 6723# lower 16 bits are the 16 most significant fraction # 6724# (including the explicit bit) bits of |X|. Consequently, # 6725# the comparisons in Steps 1.1 and 1.3 can be performed # 6726# by integer comparison. Note also that the constant # 6727# 16380 log(2) used in Step 1.3 is also in the compact # 6728# form. Thus taking the branch to Step 2 guarantees # 6729# |X| < 16380 log(2). There is no harm to have a small # 6730# number of cases where |X| is less than, but close to, # 6731# 16380 log(2) and the branch to Step 9 is taken. # 6732# # 6733# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). # 6734# 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 # 6735# was taken) # 6736# 2.2 N := round-to-nearest-integer( X * 64/log2 ). # 6737# 2.3 Calculate J = N mod 64; so J = 0,1,2,..., # 6738# or 63. # 6739# 2.4 Calculate M = (N - J)/64; so N = 64M + J. # 6740# 2.5 Calculate the address of the stored value of # 6741# 2^(J/64). # 6742# 2.6 Create the value Scale = 2^M. # 6743# Notes: The calculation in 2.2 is really performed by # 6744# Z := X * constant # 6745# N := round-to-nearest-integer(Z) # 6746# where # 6747# constant := single-precision( 64/log 2 ). # 6748# # 6749# Using a single-precision constant avoids memory # 6750# access. Another effect of using a single-precision # 6751# "constant" is that the calculated value Z is # 6752# # 6753# Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). # 6754# # 6755# This error has to be considered later in Steps 3 and 4. # 6756# # 6757# Step 3. Calculate X - N*log2/64. # 6758# 3.1 R := X + N*L1, # 6759# where L1 := single-precision(-log2/64). # 6760# 3.2 R := R + N*L2, # 6761# L2 := extended-precision(-log2/64 - L1).# 6762# Notes: a) The way L1 and L2 are chosen ensures L1+L2 # 6763# approximate the value -log2/64 to 88 bits of accuracy. # 6764# b) N*L1 is exact because N is no longer than 22 bits # 6765# and L1 is no longer than 24 bits. # 6766# c) The calculation X+N*L1 is also exact due to # 6767# cancellation. Thus, R is practically X+N(L1+L2) to full # 6768# 64 bits. # 6769# d) It is important to estimate how large can |R| be # 6770# after Step 3.2. # 6771# # 6772# N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) # 6773# X*64/log2 (1+eps) = N + f, |f| <= 0.5 # 6774# X*64/log2 - N = f - eps*X 64/log2 # 6775# X - N*log2/64 = f*log2/64 - eps*X # 6776# # 6777# # 6778# Now |X| <= 16446 log2, thus # 6779# # 6780# |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 # 6781# <= 0.57 log2/64. # 6782# This bound will be used in Step 4. # 6783# # 6784# Step 4. Approximate exp(R)-1 by a polynomial # 6785# p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) # 6786# Notes: a) In order to reduce memory access, the coefficients # 6787# are made as "short" as possible: A1 (which is 1/2), A4 # 6788# and A5 are single precision; A2 and A3 are double # 6789# precision. # 6790# b) Even with the restrictions above, # 6791# |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. # 6792# Note that 0.0062 is slightly bigger than 0.57 log2/64. # 6793# c) To fully utilize the pipeline, p is separated into # 6794# two independent pieces of roughly equal complexities # 6795# p = [ R + R*S*(A2 + S*A4) ] + # 6796# [ S*(A1 + S*(A3 + S*A5)) ] # 6797# where S = R*R. # 6798# # 6799# Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by # 6800# ans := T + ( T*p + t) # 6801# where T and t are the stored values for 2^(J/64). # 6802# Notes: 2^(J/64) is stored as T and t where T+t approximates # 6803# 2^(J/64) to roughly 85 bits; T is in extended precision # 6804# and t is in single precision. Note also that T is # 6805# rounded to 62 bits so that the last two bits of T are # 6806# zero. The reason for such a special form is that T-1, # 6807# T-2, and T-8 will all be exact --- a property that will # 6808# give much more accurate computation of the function # 6809# EXPM1. # 6810# # 6811# Step 6. Reconstruction of exp(X) # 6812# exp(X) = 2^M * 2^(J/64) * exp(R). # 6813# 6.1 If AdjFlag = 0, go to 6.3 # 6814# 6.2 ans := ans * AdjScale # 6815# 6.3 Restore the user FPCR # 6816# 6.4 Return ans := ans * Scale. Exit. # 6817# Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, # 6818# |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will # 6819# neither overflow nor underflow. If AdjFlag = 1, that # 6820# means that # 6821# X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. # 6822# Hence, exp(X) may overflow or underflow or neither. # 6823# When that is the case, AdjScale = 2^(M1) where M1 is # 6824# approximately M. Thus 6.2 will never cause # 6825# over/underflow. Possible exception in 6.4 is overflow # 6826# or underflow. The inexact exception is not generated in # 6827# 6.4. Although one can argue that the inexact flag # 6828# should always be raised, to simulate that exception # 6829# cost to much than the flag is worth in practical uses. # 6830# # 6831# Step 7. Return 1 + X. # 6832# 7.1 ans := X # 6833# 7.2 Restore user FPCR. # 6834# 7.3 Return ans := 1 + ans. Exit # 6835# Notes: For non-zero X, the inexact exception will always be # 6836# raised by 7.3. That is the only exception raised by 7.3.# 6837# Note also that we use the FMOVEM instruction to move X # 6838# in Step 7.1 to avoid unnecessary trapping. (Although # 6839# the FMOVEM may not seem relevant since X is normalized, # 6840# the precaution will be useful in the library version of # 6841# this code where the separate entry for denormalized # 6842# inputs will be done away with.) # 6843# # 6844# Step 8. Handle exp(X) where |X| >= 16380log2. # 6845# 8.1 If |X| > 16480 log2, go to Step 9. # 6846# (mimic 2.2 - 2.6) # 6847# 8.2 N := round-to-integer( X * 64/log2 ) # 6848# 8.3 Calculate J = N mod 64, J = 0,1,...,63 # 6849# 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, # 6850# AdjFlag := 1. # 6851# 8.5 Calculate the address of the stored value # 6852# 2^(J/64). # 6853# 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. # 6854# 8.7 Go to Step 3. # 6855# Notes: Refer to notes for 2.2 - 2.6. # 6856# # 6857# Step 9. Handle exp(X), |X| > 16480 log2. # 6858# 9.1 If X < 0, go to 9.3 # 6859# 9.2 ans := Huge, go to 9.4 # 6860# 9.3 ans := Tiny. # 6861# 9.4 Restore user FPCR. # 6862# 9.5 Return ans := ans * ans. Exit. # 6863# Notes: Exp(X) will surely overflow or underflow, depending on # 6864# X's sign. "Huge" and "Tiny" are respectively large/tiny # 6865# extended-precision numbers whose square over/underflow # 6866# with an inexact result. Thus, 9.5 always raises the # 6867# inexact together with either overflow or underflow. # 6868# # 6869# setoxm1d # 6870# -------- # 6871# # 6872# Step 1. Set ans := 0 # 6873# # 6874# Step 2. Return ans := X + ans. Exit. # 6875# Notes: This will return X with the appropriate rounding # 6876# precision prescribed by the user FPCR. # 6877# # 6878# setoxm1 # 6879# ------- # 6880# # 6881# Step 1. Check |X| # 6882# 1.1 If |X| >= 1/4, go to Step 1.3. # 6883# 1.2 Go to Step 7. # 6884# 1.3 If |X| < 70 log(2), go to Step 2. # 6885# 1.4 Go to Step 10. # 6886# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.# 6887# However, it is conceivable |X| can be small very often # 6888# because EXPM1 is intended to evaluate exp(X)-1 # 6889# accurately when |X| is small. For further details on # 6890# the comparisons, see the notes on Step 1 of setox. # 6891# # 6892# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). # 6893# 2.1 N := round-to-nearest-integer( X * 64/log2 ). # 6894# 2.2 Calculate J = N mod 64; so J = 0,1,2,..., # 6895# or 63. # 6896# 2.3 Calculate M = (N - J)/64; so N = 64M + J. # 6897# 2.4 Calculate the address of the stored value of # 6898# 2^(J/64). # 6899# 2.5 Create the values Sc = 2^M and # 6900# OnebySc := -2^(-M). # 6901# Notes: See the notes on Step 2 of setox. # 6902# # 6903# Step 3. Calculate X - N*log2/64. # 6904# 3.1 R := X + N*L1, # 6905# where L1 := single-precision(-log2/64). # 6906# 3.2 R := R + N*L2, # 6907# L2 := extended-precision(-log2/64 - L1).# 6908# Notes: Applying the analysis of Step 3 of setox in this case # 6909# shows that |R| <= 0.0055 (note that |X| <= 70 log2 in # 6910# this case). # 6911# # 6912# Step 4. Approximate exp(R)-1 by a polynomial # 6913# p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) # 6914# Notes: a) In order to reduce memory access, the coefficients # 6915# are made as "short" as possible: A1 (which is 1/2), A5 # 6916# and A6 are single precision; A2, A3 and A4 are double # 6917# precision. # 6918# b) Even with the restriction above, # 6919# |p - (exp(R)-1)| < |R| * 2^(-72.7) # 6920# for all |R| <= 0.0055. # 6921# c) To fully utilize the pipeline, p is separated into # 6922# two independent pieces of roughly equal complexity # 6923# p = [ R*S*(A2 + S*(A4 + S*A6)) ] + # 6924# [ R + S*(A1 + S*(A3 + S*A5)) ] # 6925# where S = R*R. # 6926# # 6927# Step 5. Compute 2^(J/64)*p by # 6928# p := T*p # 6929# where T and t are the stored values for 2^(J/64). # 6930# Notes: 2^(J/64) is stored as T and t where T+t approximates # 6931# 2^(J/64) to roughly 85 bits; T is in extended precision # 6932# and t is in single precision. Note also that T is # 6933# rounded to 62 bits so that the last two bits of T are # 6934# zero. The reason for such a special form is that T-1, # 6935# T-2, and T-8 will all be exact --- a property that will # 6936# be exploited in Step 6 below. The total relative error # 6937# in p is no bigger than 2^(-67.7) compared to the final # 6938# result. # 6939# # 6940# Step 6. Reconstruction of exp(X)-1 # 6941# exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). # 6942# 6.1 If M <= 63, go to Step 6.3. # 6943# 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 # 6944# 6.3 If M >= -3, go to 6.5. # 6945# 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 # 6946# 6.5 ans := (T + OnebySc) + (p + t). # 6947# 6.6 Restore user FPCR. # 6948# 6.7 Return ans := Sc * ans. Exit. # 6949# Notes: The various arrangements of the expressions give # 6950# accurate evaluations. # 6951# # 6952# Step 7. exp(X)-1 for |X| < 1/4. # 6953# 7.1 If |X| >= 2^(-65), go to Step 9. # 6954# 7.2 Go to Step 8. # 6955# # 6956# Step 8. Calculate exp(X)-1, |X| < 2^(-65). # 6957# 8.1 If |X| < 2^(-16312), goto 8.3 # 6958# 8.2 Restore FPCR; return ans := X - 2^(-16382). # 6959# Exit. # 6960# 8.3 X := X * 2^(140). # 6961# 8.4 Restore FPCR; ans := ans - 2^(-16382). # 6962# Return ans := ans*2^(140). Exit # 6963# Notes: The idea is to return "X - tiny" under the user # 6964# precision and rounding modes. To avoid unnecessary # 6965# inefficiency, we stay away from denormalized numbers # 6966# the best we can. For |X| >= 2^(-16312), the # 6967# straightforward 8.2 generates the inexact exception as # 6968# the case warrants. # 6969# # 6970# Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial # 6971# p = X + X*X*(B1 + X*(B2 + ... + X*B12)) # 6972# Notes: a) In order to reduce memory access, the coefficients # 6973# are made as "short" as possible: B1 (which is 1/2), B9 # 6974# to B12 are single precision; B3 to B8 are double # 6975# precision; and B2 is double extended. # 6976# b) Even with the restriction above, # 6977# |p - (exp(X)-1)| < |X| 2^(-70.6) # 6978# for all |X| <= 0.251. # 6979# Note that 0.251 is slightly bigger than 1/4. # 6980# c) To fully preserve accuracy, the polynomial is # 6981# computed as # 6982# X + ( S*B1 + Q ) where S = X*X and # 6983# Q = X*S*(B2 + X*(B3 + ... + X*B12)) # 6984# d) To fully utilize the pipeline, Q is separated into # 6985# two independent pieces of roughly equal complexity # 6986# Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + # 6987# [ S*S*(B3 + S*(B5 + ... + S*B11)) ] # 6988# # 6989# Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. # 6990# 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all # 6991# practical purposes. Therefore, go to Step 1 of setox. # 6992# 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical # 6993# purposes. # 6994# ans := -1 # 6995# Restore user FPCR # 6996# Return ans := ans + 2^(-126). Exit. # 6997# Notes: 10.2 will always create an inexact and return -1 + tiny # 6998# in the user rounding precision and mode. # 6999# # 7000######################################################################### 7001 7002L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000 7003 7004EEXPA3: long 0x3FA55555,0x55554CC1 7005EEXPA2: long 0x3FC55555,0x55554A54 7006 7007EM1A4: long 0x3F811111,0x11174385 7008EM1A3: long 0x3FA55555,0x55554F5A 7009 7010EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000 7011 7012EM1B8: long 0x3EC71DE3,0xA5774682 7013EM1B7: long 0x3EFA01A0,0x19D7CB68 7014 7015EM1B6: long 0x3F2A01A0,0x1A019DF3 7016EM1B5: long 0x3F56C16C,0x16C170E2 7017 7018EM1B4: long 0x3F811111,0x11111111 7019EM1B3: long 0x3FA55555,0x55555555 7020 7021EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB 7022 long 0x00000000 7023 7024TWO140: long 0x48B00000,0x00000000 7025TWON140: 7026 long 0x37300000,0x00000000 7027 7028EEXPTBL: 7029 long 0x3FFF0000,0x80000000,0x00000000,0x00000000 7030 long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B 7031 long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9 7032 long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369 7033 long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C 7034 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F 7035 long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729 7036 long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF 7037 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF 7038 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA 7039 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051 7040 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029 7041 long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494 7042 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0 7043 long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D 7044 long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537 7045 long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD 7046 long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087 7047 long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818 7048 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D 7049 long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890 7050 long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C 7051 long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05 7052 long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126 7053 long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140 7054 long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA 7055 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A 7056 long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC 7057 long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC 7058 long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610 7059 long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90 7060 long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A 7061 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13 7062 long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30 7063 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC 7064 long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6 7065 long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70 7066 long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518 7067 long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41 7068 long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B 7069 long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568 7070 long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E 7071 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03 7072 long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D 7073 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4 7074 long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C 7075 long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9 7076 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21 7077 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F 7078 long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F 7079 long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207 7080 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175 7081 long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B 7082 long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5 7083 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A 7084 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22 7085 long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945 7086 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B 7087 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3 7088 long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05 7089 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19 7090 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5 7091 long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22 7092 long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A 7093 7094 set ADJFLAG,L_SCR2 7095 set SCALE,FP_SCR0 7096 set ADJSCALE,FP_SCR1 7097 set SC,FP_SCR0 7098 set ONEBYSC,FP_SCR1 7099 7100 global setox 7101setox: 7102#--entry point for EXP(X), here X is finite, non-zero, and not NaN's 7103 7104#--Step 1. 7105 mov.l (%a0),%d1 # load part of input X 7106 and.l &0x7FFF0000,%d1 # biased expo. of X 7107 cmp.l %d1,&0x3FBE0000 # 2^(-65) 7108 bge.b EXPC1 # normal case 7109 bra EXPSM 7110 7111EXPC1: 7112#--The case |X| >= 2^(-65) 7113 mov.w 4(%a0),%d1 # expo. and partial sig. of |X| 7114 cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits 7115 blt.b EXPMAIN # normal case 7116 bra EEXPBIG 7117 7118EXPMAIN: 7119#--Step 2. 7120#--This is the normal branch: 2^(-65) <= |X| < 16380 log2. 7121 fmov.x (%a0),%fp0 # load input from (a0) 7122 7123 fmov.x %fp0,%fp1 7124 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X 7125 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7126 mov.l &0,ADJFLAG(%a6) 7127 fmov.l %fp0,%d1 # N = int( X * 64/log2 ) 7128 lea EEXPTBL(%pc),%a1 7129 fmov.l %d1,%fp0 # convert to floating-format 7130 7131 mov.l %d1,L_SCR1(%a6) # save N temporarily 7132 and.l &0x3F,%d1 # D0 is J = N mod 64 7133 lsl.l &4,%d1 7134 add.l %d1,%a1 # address of 2^(J/64) 7135 mov.l L_SCR1(%a6),%d1 7136 asr.l &6,%d1 # D0 is M 7137 add.w &0x3FFF,%d1 # biased expo. of 2^(M) 7138 mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB 7139 7140EXPCONT1: 7141#--Step 3. 7142#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, 7143#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M) 7144 fmov.x %fp0,%fp2 7145 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64) 7146 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64 7147 fadd.x %fp1,%fp0 # X + N*L1 7148 fadd.x %fp2,%fp0 # fp0 is R, reduced arg. 7149 7150#--Step 4. 7151#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL 7152#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) 7153#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R 7154#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))] 7155 7156 fmov.x %fp0,%fp1 7157 fmul.x %fp1,%fp1 # fp1 IS S = R*R 7158 7159 fmov.s &0x3AB60B70,%fp2 # fp2 IS A5 7160 7161 fmul.x %fp1,%fp2 # fp2 IS S*A5 7162 fmov.x %fp1,%fp3 7163 fmul.s &0x3C088895,%fp3 # fp3 IS S*A4 7164 7165 fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5 7166 fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4 7167 7168 fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5) 7169 mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended 7170 mov.l &0x80000000,SCALE+4(%a6) 7171 clr.l SCALE+8(%a6) 7172 7173 fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4) 7174 7175 fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5) 7176 fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4) 7177 7178 fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5)) 7179 fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4), 7180 7181 fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64) 7182 fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1 7183 7184#--Step 5 7185#--final reconstruction process 7186#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) ) 7187 7188 fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1) 7189 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} 7190 fadd.s (%a1),%fp0 # accurate 2^(J/64) 7191 7192 fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*... 7193 mov.l ADJFLAG(%a6),%d1 7194 7195#--Step 6 7196 tst.l %d1 7197 beq.b NORMAL 7198ADJUST: 7199 fmul.x ADJSCALE(%a6),%fp0 7200NORMAL: 7201 fmov.l %d0,%fpcr # restore user FPCR 7202 mov.b &FMUL_OP,%d1 # last inst is MUL 7203 fmul.x SCALE(%a6),%fp0 # multiply 2^(M) 7204 bra t_catch 7205 7206EXPSM: 7207#--Step 7 7208 fmovm.x (%a0),&0x80 # load X 7209 fmov.l %d0,%fpcr 7210 fadd.s &0x3F800000,%fp0 # 1+X in user mode 7211 bra t_pinx2 7212 7213EEXPBIG: 7214#--Step 8 7215 cmp.l %d1,&0x400CB27C # 16480 log2 7216 bgt.b EXP2BIG 7217#--Steps 8.2 -- 8.6 7218 fmov.x (%a0),%fp0 # load input from (a0) 7219 7220 fmov.x %fp0,%fp1 7221 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X 7222 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7223 mov.l &1,ADJFLAG(%a6) 7224 fmov.l %fp0,%d1 # N = int( X * 64/log2 ) 7225 lea EEXPTBL(%pc),%a1 7226 fmov.l %d1,%fp0 # convert to floating-format 7227 mov.l %d1,L_SCR1(%a6) # save N temporarily 7228 and.l &0x3F,%d1 # D0 is J = N mod 64 7229 lsl.l &4,%d1 7230 add.l %d1,%a1 # address of 2^(J/64) 7231 mov.l L_SCR1(%a6),%d1 7232 asr.l &6,%d1 # D0 is K 7233 mov.l %d1,L_SCR1(%a6) # save K temporarily 7234 asr.l &1,%d1 # D0 is M1 7235 sub.l %d1,L_SCR1(%a6) # a1 is M 7236 add.w &0x3FFF,%d1 # biased expo. of 2^(M1) 7237 mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1) 7238 mov.l &0x80000000,ADJSCALE+4(%a6) 7239 clr.l ADJSCALE+8(%a6) 7240 mov.l L_SCR1(%a6),%d1 # D0 is M 7241 add.w &0x3FFF,%d1 # biased expo. of 2^(M) 7242 bra.w EXPCONT1 # go back to Step 3 7243 7244EXP2BIG: 7245#--Step 9 7246 tst.b (%a0) # is X positive or negative? 7247 bmi t_unfl2 7248 bra t_ovfl2 7249 7250 global setoxd 7251setoxd: 7252#--entry point for EXP(X), X is denormalized 7253 mov.l (%a0),-(%sp) 7254 andi.l &0x80000000,(%sp) 7255 ori.l &0x00800000,(%sp) # sign(X)*2^(-126) 7256 7257 fmov.s &0x3F800000,%fp0 7258 7259 fmov.l %d0,%fpcr 7260 fadd.s (%sp)+,%fp0 7261 bra t_pinx2 7262 7263 global setoxm1 7264setoxm1: 7265#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN 7266 7267#--Step 1. 7268#--Step 1.1 7269 mov.l (%a0),%d1 # load part of input X 7270 and.l &0x7FFF0000,%d1 # biased expo. of X 7271 cmp.l %d1,&0x3FFD0000 # 1/4 7272 bge.b EM1CON1 # |X| >= 1/4 7273 bra EM1SM 7274 7275EM1CON1: 7276#--Step 1.3 7277#--The case |X| >= 1/4 7278 mov.w 4(%a0),%d1 # expo. and partial sig. of |X| 7279 cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits 7280 ble.b EM1MAIN # 1/4 <= |X| <= 70log2 7281 bra EM1BIG 7282 7283EM1MAIN: 7284#--Step 2. 7285#--This is the case: 1/4 <= |X| <= 70 log2. 7286 fmov.x (%a0),%fp0 # load input from (a0) 7287 7288 fmov.x %fp0,%fp1 7289 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X 7290 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7291 fmov.l %fp0,%d1 # N = int( X * 64/log2 ) 7292 lea EEXPTBL(%pc),%a1 7293 fmov.l %d1,%fp0 # convert to floating-format 7294 7295 mov.l %d1,L_SCR1(%a6) # save N temporarily 7296 and.l &0x3F,%d1 # D0 is J = N mod 64 7297 lsl.l &4,%d1 7298 add.l %d1,%a1 # address of 2^(J/64) 7299 mov.l L_SCR1(%a6),%d1 7300 asr.l &6,%d1 # D0 is M 7301 mov.l %d1,L_SCR1(%a6) # save a copy of M 7302 7303#--Step 3. 7304#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, 7305#--a0 points to 2^(J/64), D0 and a1 both contain M 7306 fmov.x %fp0,%fp2 7307 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64) 7308 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64 7309 fadd.x %fp1,%fp0 # X + N*L1 7310 fadd.x %fp2,%fp0 # fp0 is R, reduced arg. 7311 add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M 7312 7313#--Step 4. 7314#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL 7315#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6))))) 7316#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R 7317#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))] 7318 7319 fmov.x %fp0,%fp1 7320 fmul.x %fp1,%fp1 # fp1 IS S = R*R 7321 7322 fmov.s &0x3950097B,%fp2 # fp2 IS a6 7323 7324 fmul.x %fp1,%fp2 # fp2 IS S*A6 7325 fmov.x %fp1,%fp3 7326 fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5 7327 7328 fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6 7329 fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5 7330 mov.w %d1,SC(%a6) # SC is 2^(M) in extended 7331 mov.l &0x80000000,SC+4(%a6) 7332 clr.l SC+8(%a6) 7333 7334 fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6) 7335 mov.l L_SCR1(%a6),%d1 # D0 is M 7336 neg.w %d1 # D0 is -M 7337 fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5) 7338 add.w &0x3FFF,%d1 # biased expo. of 2^(-M) 7339 fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6) 7340 fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5) 7341 7342 fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6)) 7343 or.w &0x8000,%d1 # signed/expo. of -2^(-M) 7344 mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M) 7345 mov.l &0x80000000,ONEBYSC+4(%a6) 7346 clr.l ONEBYSC+8(%a6) 7347 fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5)) 7348 7349 fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6)) 7350 fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5)) 7351 7352 fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1 7353 7354 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} 7355 7356#--Step 5 7357#--Compute 2^(J/64)*p 7358 7359 fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1) 7360 7361#--Step 6 7362#--Step 6.1 7363 mov.l L_SCR1(%a6),%d1 # retrieve M 7364 cmp.l %d1,&63 7365 ble.b MLE63 7366#--Step 6.2 M >= 64 7367 fmov.s 12(%a1),%fp1 # fp1 is t 7368 fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc 7369 fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released 7370 fadd.x (%a1),%fp0 # T+(p+(t+OnebySc)) 7371 bra EM1SCALE 7372MLE63: 7373#--Step 6.3 M <= 63 7374 cmp.l %d1,&-3 7375 bge.b MGEN3 7376MLTN3: 7377#--Step 6.4 M <= -4 7378 fadd.s 12(%a1),%fp0 # p+t 7379 fadd.x (%a1),%fp0 # T+(p+t) 7380 fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t)) 7381 bra EM1SCALE 7382MGEN3: 7383#--Step 6.5 -3 <= M <= 63 7384 fmov.x (%a1)+,%fp1 # fp1 is T 7385 fadd.s (%a1),%fp0 # fp0 is p+t 7386 fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc 7387 fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t) 7388 7389EM1SCALE: 7390#--Step 6.6 7391 fmov.l %d0,%fpcr 7392 fmul.x SC(%a6),%fp0 7393 bra t_inx2 7394 7395EM1SM: 7396#--Step 7 |X| < 1/4. 7397 cmp.l %d1,&0x3FBE0000 # 2^(-65) 7398 bge.b EM1POLY 7399 7400EM1TINY: 7401#--Step 8 |X| < 2^(-65) 7402 cmp.l %d1,&0x00330000 # 2^(-16312) 7403 blt.b EM12TINY 7404#--Step 8.2 7405 mov.l &0x80010000,SC(%a6) # SC is -2^(-16382) 7406 mov.l &0x80000000,SC+4(%a6) 7407 clr.l SC+8(%a6) 7408 fmov.x (%a0),%fp0 7409 fmov.l %d0,%fpcr 7410 mov.b &FADD_OP,%d1 # last inst is ADD 7411 fadd.x SC(%a6),%fp0 7412 bra t_catch 7413 7414EM12TINY: 7415#--Step 8.3 7416 fmov.x (%a0),%fp0 7417 fmul.d TWO140(%pc),%fp0 7418 mov.l &0x80010000,SC(%a6) 7419 mov.l &0x80000000,SC+4(%a6) 7420 clr.l SC+8(%a6) 7421 fadd.x SC(%a6),%fp0 7422 fmov.l %d0,%fpcr 7423 mov.b &FMUL_OP,%d1 # last inst is MUL 7424 fmul.d TWON140(%pc),%fp0 7425 bra t_catch 7426 7427EM1POLY: 7428#--Step 9 exp(X)-1 by a simple polynomial 7429 fmov.x (%a0),%fp0 # fp0 is X 7430 fmul.x %fp0,%fp0 # fp0 is S := X*X 7431 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7432 fmov.s &0x2F30CAA8,%fp1 # fp1 is B12 7433 fmul.x %fp0,%fp1 # fp1 is S*B12 7434 fmov.s &0x310F8290,%fp2 # fp2 is B11 7435 fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12 7436 7437 fmul.x %fp0,%fp2 # fp2 is S*B11 7438 fmul.x %fp0,%fp1 # fp1 is S*(B10 + ... 7439 7440 fadd.s &0x3493F281,%fp2 # fp2 is B9+S*... 7441 fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*... 7442 7443 fmul.x %fp0,%fp2 # fp2 is S*(B9+... 7444 fmul.x %fp0,%fp1 # fp1 is S*(B8+... 7445 7446 fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*... 7447 fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*... 7448 7449 fmul.x %fp0,%fp2 # fp2 is S*(B7+... 7450 fmul.x %fp0,%fp1 # fp1 is S*(B6+... 7451 7452 fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*... 7453 fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*... 7454 7455 fmul.x %fp0,%fp2 # fp2 is S*(B5+... 7456 fmul.x %fp0,%fp1 # fp1 is S*(B4+... 7457 7458 fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*... 7459 fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*... 7460 7461 fmul.x %fp0,%fp2 # fp2 is S*(B3+... 7462 fmul.x %fp0,%fp1 # fp1 is S*(B2+... 7463 7464 fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...) 7465 fmul.x (%a0),%fp1 # fp1 is X*S*(B2... 7466 7467 fmul.s &0x3F000000,%fp0 # fp0 is S*B1 7468 fadd.x %fp2,%fp1 # fp1 is Q 7469 7470 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} 7471 7472 fadd.x %fp1,%fp0 # fp0 is S*B1+Q 7473 7474 fmov.l %d0,%fpcr 7475 fadd.x (%a0),%fp0 7476 bra t_inx2 7477 7478EM1BIG: 7479#--Step 10 |X| > 70 log2 7480 mov.l (%a0),%d1 7481 cmp.l %d1,&0 7482 bgt.w EXPC1 7483#--Step 10.2 7484 fmov.s &0xBF800000,%fp0 # fp0 is -1 7485 fmov.l %d0,%fpcr 7486 fadd.s &0x00800000,%fp0 # -1 + 2^(-126) 7487 bra t_minx2 7488 7489 global setoxm1d 7490setoxm1d: 7491#--entry point for EXPM1(X), here X is denormalized 7492#--Step 0. 7493 bra t_extdnrm 7494 7495######################################################################### 7496# sgetexp(): returns the exponent portion of the input argument. # 7497# The exponent bias is removed and the exponent value is # 7498# returned as an extended precision number in fp0. # 7499# sgetexpd(): handles denormalized numbers. # 7500# # 7501# sgetman(): extracts the mantissa of the input argument. The # 7502# mantissa is converted to an extended precision number w/ # 7503# an exponent of $3fff and is returned in fp0. The range of # 7504# the result is [1.0 - 2.0). # 7505# sgetmand(): handles denormalized numbers. # 7506# # 7507# INPUT *************************************************************** # 7508# a0 = pointer to extended precision input # 7509# # 7510# OUTPUT ************************************************************** # 7511# fp0 = exponent(X) or mantissa(X) # 7512# # 7513######################################################################### 7514 7515 global sgetexp 7516sgetexp: 7517 mov.w SRC_EX(%a0),%d0 # get the exponent 7518 bclr &0xf,%d0 # clear the sign bit 7519 subi.w &0x3fff,%d0 # subtract off the bias 7520 fmov.w %d0,%fp0 # return exp in fp0 7521 blt.b sgetexpn # it's negative 7522 rts 7523 7524sgetexpn: 7525 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 7526 rts 7527 7528 global sgetexpd 7529sgetexpd: 7530 bsr.l norm # normalize 7531 neg.w %d0 # new exp = -(shft amt) 7532 subi.w &0x3fff,%d0 # subtract off the bias 7533 fmov.w %d0,%fp0 # return exp in fp0 7534 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 7535 rts 7536 7537 global sgetman 7538sgetman: 7539 mov.w SRC_EX(%a0),%d0 # get the exp 7540 ori.w &0x7fff,%d0 # clear old exp 7541 bclr &0xe,%d0 # make it the new exp +-3fff 7542 7543# here, we build the result in a tmp location so as not to disturb the input 7544 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc 7545 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc 7546 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 7547 fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0 7548 bmi.b sgetmann # it's negative 7549 rts 7550 7551sgetmann: 7552 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 7553 rts 7554 7555# 7556# For denormalized numbers, shift the mantissa until the j-bit = 1, 7557# then load the exponent with +/1 $3fff. 7558# 7559 global sgetmand 7560sgetmand: 7561 bsr.l norm # normalize exponent 7562 bra.b sgetman 7563 7564######################################################################### 7565# scosh(): computes the hyperbolic cosine of a normalized input # 7566# scoshd(): computes the hyperbolic cosine of a denormalized input # 7567# # 7568# INPUT *************************************************************** # 7569# a0 = pointer to extended precision input # 7570# d0 = round precision,mode # 7571# # 7572# OUTPUT ************************************************************** # 7573# fp0 = cosh(X) # 7574# # 7575# ACCURACY and MONOTONICITY ******************************************* # 7576# The returned result is within 3 ulps in 64 significant bit, # 7577# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 7578# rounded to double precision. The result is provably monotonic # 7579# in double precision. # 7580# # 7581# ALGORITHM *********************************************************** # 7582# # 7583# COSH # 7584# 1. If |X| > 16380 log2, go to 3. # 7585# # 7586# 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae # 7587# y = |X|, z = exp(Y), and # 7588# cosh(X) = (1/2)*( z + 1/z ). # 7589# Exit. # 7590# # 7591# 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. # 7592# # 7593# 4. (16380 log2 < |X| <= 16480 log2) # 7594# cosh(X) = sign(X) * exp(|X|)/2. # 7595# However, invoking exp(|X|) may cause premature # 7596# overflow. Thus, we calculate sinh(X) as follows: # 7597# Y := |X| # 7598# Fact := 2**(16380) # 7599# Y' := Y - 16381 log2 # 7600# cosh(X) := Fact * exp(Y'). # 7601# Exit. # 7602# # 7603# 5. (|X| > 16480 log2) sinh(X) must overflow. Return # 7604# Huge*Huge to generate overflow and an infinity with # 7605# the appropriate sign. Huge is the largest finite number # 7606# in extended format. Exit. # 7607# # 7608######################################################################### 7609 7610TWO16380: 7611 long 0x7FFB0000,0x80000000,0x00000000,0x00000000 7612 7613 global scosh 7614scosh: 7615 fmov.x (%a0),%fp0 # LOAD INPUT 7616 7617 mov.l (%a0),%d1 7618 mov.w 4(%a0),%d1 7619 and.l &0x7FFFFFFF,%d1 7620 cmp.l %d1,&0x400CB167 7621 bgt.b COSHBIG 7622 7623#--THIS IS THE USUAL CASE, |X| < 16380 LOG2 7624#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) ) 7625 7626 fabs.x %fp0 # |X| 7627 7628 mov.l %d0,-(%sp) 7629 clr.l %d0 7630 fmovm.x &0x01,-(%sp) # save |X| to stack 7631 lea (%sp),%a0 # pass ptr to |X| 7632 bsr setox # FP0 IS EXP(|X|) 7633 add.l &0xc,%sp # erase |X| from stack 7634 fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|) 7635 mov.l (%sp)+,%d0 7636 7637 fmov.s &0x3E800000,%fp1 # (1/4) 7638 fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|)) 7639 7640 fmov.l %d0,%fpcr 7641 mov.b &FADD_OP,%d1 # last inst is ADD 7642 fadd.x %fp1,%fp0 7643 bra t_catch 7644 7645COSHBIG: 7646 cmp.l %d1,&0x400CB2B3 7647 bgt.b COSHHUGE 7648 7649 fabs.x %fp0 7650 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD) 7651 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE 7652 7653 mov.l %d0,-(%sp) 7654 clr.l %d0 7655 fmovm.x &0x01,-(%sp) # save fp0 to stack 7656 lea (%sp),%a0 # pass ptr to fp0 7657 bsr setox 7658 add.l &0xc,%sp # clear fp0 from stack 7659 mov.l (%sp)+,%d0 7660 7661 fmov.l %d0,%fpcr 7662 mov.b &FMUL_OP,%d1 # last inst is MUL 7663 fmul.x TWO16380(%pc),%fp0 7664 bra t_catch 7665 7666COSHHUGE: 7667 bra t_ovfl2 7668 7669 global scoshd 7670#--COSH(X) = 1 FOR DENORMALIZED X 7671scoshd: 7672 fmov.s &0x3F800000,%fp0 7673 7674 fmov.l %d0,%fpcr 7675 fadd.s &0x00800000,%fp0 7676 bra t_pinx2 7677 7678######################################################################### 7679# ssinh(): computes the hyperbolic sine of a normalized input # 7680# ssinhd(): computes the hyperbolic sine of a denormalized input # 7681# # 7682# INPUT *************************************************************** # 7683# a0 = pointer to extended precision input # 7684# d0 = round precision,mode # 7685# # 7686# OUTPUT ************************************************************** # 7687# fp0 = sinh(X) # 7688# # 7689# ACCURACY and MONOTONICITY ******************************************* # 7690# The returned result is within 3 ulps in 64 significant bit, # 7691# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 7692# rounded to double precision. The result is provably monotonic # 7693# in double precision. # 7694# # 7695# ALGORITHM *********************************************************** # 7696# # 7697# SINH # 7698# 1. If |X| > 16380 log2, go to 3. # 7699# # 7700# 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula # 7701# y = |X|, sgn = sign(X), and z = expm1(Y), # 7702# sinh(X) = sgn*(1/2)*( z + z/(1+z) ). # 7703# Exit. # 7704# # 7705# 3. If |X| > 16480 log2, go to 5. # 7706# # 7707# 4. (16380 log2 < |X| <= 16480 log2) # 7708# sinh(X) = sign(X) * exp(|X|)/2. # 7709# However, invoking exp(|X|) may cause premature overflow. # 7710# Thus, we calculate sinh(X) as follows: # 7711# Y := |X| # 7712# sgn := sign(X) # 7713# sgnFact := sgn * 2**(16380) # 7714# Y' := Y - 16381 log2 # 7715# sinh(X) := sgnFact * exp(Y'). # 7716# Exit. # 7717# # 7718# 5. (|X| > 16480 log2) sinh(X) must overflow. Return # 7719# sign(X)*Huge*Huge to generate overflow and an infinity with # 7720# the appropriate sign. Huge is the largest finite number in # 7721# extended format. Exit. # 7722# # 7723######################################################################### 7724 7725 global ssinh 7726ssinh: 7727 fmov.x (%a0),%fp0 # LOAD INPUT 7728 7729 mov.l (%a0),%d1 7730 mov.w 4(%a0),%d1 7731 mov.l %d1,%a1 # save (compacted) operand 7732 and.l &0x7FFFFFFF,%d1 7733 cmp.l %d1,&0x400CB167 7734 bgt.b SINHBIG 7735 7736#--THIS IS THE USUAL CASE, |X| < 16380 LOG2 7737#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) ) 7738 7739 fabs.x %fp0 # Y = |X| 7740 7741 movm.l &0x8040,-(%sp) # {a1/d0} 7742 fmovm.x &0x01,-(%sp) # save Y on stack 7743 lea (%sp),%a0 # pass ptr to Y 7744 clr.l %d0 7745 bsr setoxm1 # FP0 IS Z = EXPM1(Y) 7746 add.l &0xc,%sp # clear Y from stack 7747 fmov.l &0,%fpcr 7748 movm.l (%sp)+,&0x0201 # {a1/d0} 7749 7750 fmov.x %fp0,%fp1 7751 fadd.s &0x3F800000,%fp1 # 1+Z 7752 fmov.x %fp0,-(%sp) 7753 fdiv.x %fp1,%fp0 # Z/(1+Z) 7754 mov.l %a1,%d1 7755 and.l &0x80000000,%d1 7756 or.l &0x3F000000,%d1 7757 fadd.x (%sp)+,%fp0 7758 mov.l %d1,-(%sp) 7759 7760 fmov.l %d0,%fpcr 7761 mov.b &FMUL_OP,%d1 # last inst is MUL 7762 fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set 7763 bra t_catch 7764 7765SINHBIG: 7766 cmp.l %d1,&0x400CB2B3 7767 bgt t_ovfl 7768 fabs.x %fp0 7769 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD) 7770 mov.l &0,-(%sp) 7771 mov.l &0x80000000,-(%sp) 7772 mov.l %a1,%d1 7773 and.l &0x80000000,%d1 7774 or.l &0x7FFB0000,%d1 7775 mov.l %d1,-(%sp) # EXTENDED FMT 7776 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE 7777 7778 mov.l %d0,-(%sp) 7779 clr.l %d0 7780 fmovm.x &0x01,-(%sp) # save fp0 on stack 7781 lea (%sp),%a0 # pass ptr to fp0 7782 bsr setox 7783 add.l &0xc,%sp # clear fp0 from stack 7784 7785 mov.l (%sp)+,%d0 7786 fmov.l %d0,%fpcr 7787 mov.b &FMUL_OP,%d1 # last inst is MUL 7788 fmul.x (%sp)+,%fp0 # possible exception 7789 bra t_catch 7790 7791 global ssinhd 7792#--SINH(X) = X FOR DENORMALIZED X 7793ssinhd: 7794 bra t_extdnrm 7795 7796######################################################################### 7797# stanh(): computes the hyperbolic tangent of a normalized input # 7798# stanhd(): computes the hyperbolic tangent of a denormalized input # 7799# # 7800# INPUT *************************************************************** # 7801# a0 = pointer to extended precision input # 7802# d0 = round precision,mode # 7803# # 7804# OUTPUT ************************************************************** # 7805# fp0 = tanh(X) # 7806# # 7807# ACCURACY and MONOTONICITY ******************************************* # 7808# The returned result is within 3 ulps in 64 significant bit, # 7809# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 7810# rounded to double precision. The result is provably monotonic # 7811# in double precision. # 7812# # 7813# ALGORITHM *********************************************************** # 7814# # 7815# TANH # 7816# 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. # 7817# # 7818# 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by # 7819# sgn := sign(X), y := 2|X|, z := expm1(Y), and # 7820# tanh(X) = sgn*( z/(2+z) ). # 7821# Exit. # 7822# # 7823# 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, # 7824# go to 7. # 7825# # 7826# 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. # 7827# # 7828# 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by # 7829# sgn := sign(X), y := 2|X|, z := exp(Y), # 7830# tanh(X) = sgn - [ sgn*2/(1+z) ]. # 7831# Exit. # 7832# # 7833# 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we # 7834# calculate Tanh(X) by # 7835# sgn := sign(X), Tiny := 2**(-126), # 7836# tanh(X) := sgn - sgn*Tiny. # 7837# Exit. # 7838# # 7839# 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. # 7840# # 7841######################################################################### 7842 7843 set X,FP_SCR0 7844 set XFRAC,X+4 7845 7846 set SGN,L_SCR3 7847 7848 set V,FP_SCR0 7849 7850 global stanh 7851stanh: 7852 fmov.x (%a0),%fp0 # LOAD INPUT 7853 7854 fmov.x %fp0,X(%a6) 7855 mov.l (%a0),%d1 7856 mov.w 4(%a0),%d1 7857 mov.l %d1,X(%a6) 7858 and.l &0x7FFFFFFF,%d1 7859 cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)? 7860 blt.w TANHBORS # yes 7861 cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2? 7862 bgt.w TANHBORS # yes 7863 7864#--THIS IS THE USUAL CASE 7865#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2). 7866 7867 mov.l X(%a6),%d1 7868 mov.l %d1,SGN(%a6) 7869 and.l &0x7FFF0000,%d1 7870 add.l &0x00010000,%d1 # EXPONENT OF 2|X| 7871 mov.l %d1,X(%a6) 7872 and.l &0x80000000,SGN(%a6) 7873 fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X| 7874 7875 mov.l %d0,-(%sp) 7876 clr.l %d0 7877 fmovm.x &0x1,-(%sp) # save Y on stack 7878 lea (%sp),%a0 # pass ptr to Y 7879 bsr setoxm1 # FP0 IS Z = EXPM1(Y) 7880 add.l &0xc,%sp # clear Y from stack 7881 mov.l (%sp)+,%d0 7882 7883 fmov.x %fp0,%fp1 7884 fadd.s &0x40000000,%fp1 # Z+2 7885 mov.l SGN(%a6),%d1 7886 fmov.x %fp1,V(%a6) 7887 eor.l %d1,V(%a6) 7888 7889 fmov.l %d0,%fpcr # restore users round prec,mode 7890 fdiv.x V(%a6),%fp0 7891 bra t_inx2 7892 7893TANHBORS: 7894 cmp.l %d1,&0x3FFF8000 7895 blt.w TANHSM 7896 7897 cmp.l %d1,&0x40048AA1 7898 bgt.w TANHHUGE 7899 7900#-- (5/2) LOG2 < |X| < 50 LOG2, 7901#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X), 7902#--TANH(X) = SGN - SGN*2/[EXP(Y)+1]. 7903 7904 mov.l X(%a6),%d1 7905 mov.l %d1,SGN(%a6) 7906 and.l &0x7FFF0000,%d1 7907 add.l &0x00010000,%d1 # EXPO OF 2|X| 7908 mov.l %d1,X(%a6) # Y = 2|X| 7909 and.l &0x80000000,SGN(%a6) 7910 mov.l SGN(%a6),%d1 7911 fmov.x X(%a6),%fp0 # Y = 2|X| 7912 7913 mov.l %d0,-(%sp) 7914 clr.l %d0 7915 fmovm.x &0x01,-(%sp) # save Y on stack 7916 lea (%sp),%a0 # pass ptr to Y 7917 bsr setox # FP0 IS EXP(Y) 7918 add.l &0xc,%sp # clear Y from stack 7919 mov.l (%sp)+,%d0 7920 mov.l SGN(%a6),%d1 7921 fadd.s &0x3F800000,%fp0 # EXP(Y)+1 7922 7923 eor.l &0xC0000000,%d1 # -SIGN(X)*2 7924 fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT 7925 fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ] 7926 7927 mov.l SGN(%a6),%d1 7928 or.l &0x3F800000,%d1 # SGN 7929 fmov.s %d1,%fp0 # SGN IN SGL FMT 7930 7931 fmov.l %d0,%fpcr # restore users round prec,mode 7932 mov.b &FADD_OP,%d1 # last inst is ADD 7933 fadd.x %fp1,%fp0 7934 bra t_inx2 7935 7936TANHSM: 7937 fmov.l %d0,%fpcr # restore users round prec,mode 7938 mov.b &FMOV_OP,%d1 # last inst is MOVE 7939 fmov.x X(%a6),%fp0 # last inst - possible exception set 7940 bra t_catch 7941 7942#---RETURN SGN(X) - SGN(X)EPS 7943TANHHUGE: 7944 mov.l X(%a6),%d1 7945 and.l &0x80000000,%d1 7946 or.l &0x3F800000,%d1 7947 fmov.s %d1,%fp0 7948 and.l &0x80000000,%d1 7949 eor.l &0x80800000,%d1 # -SIGN(X)*EPS 7950 7951 fmov.l %d0,%fpcr # restore users round prec,mode 7952 fadd.s %d1,%fp0 7953 bra t_inx2 7954 7955 global stanhd 7956#--TANH(X) = X FOR DENORMALIZED X 7957stanhd: 7958 bra t_extdnrm 7959 7960######################################################################### 7961# slogn(): computes the natural logarithm of a normalized input # 7962# slognd(): computes the natural logarithm of a denormalized input # 7963# slognp1(): computes the log(1+X) of a normalized input # 7964# slognp1d(): computes the log(1+X) of a denormalized input # 7965# # 7966# INPUT *************************************************************** # 7967# a0 = pointer to extended precision input # 7968# d0 = round precision,mode # 7969# # 7970# OUTPUT ************************************************************** # 7971# fp0 = log(X) or log(1+X) # 7972# # 7973# ACCURACY and MONOTONICITY ******************************************* # 7974# The returned result is within 2 ulps in 64 significant bit, # 7975# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 7976# rounded to double precision. The result is provably monotonic # 7977# in double precision. # 7978# # 7979# ALGORITHM *********************************************************** # 7980# LOGN: # 7981# Step 1. If |X-1| < 1/16, approximate log(X) by an odd # 7982# polynomial in u, where u = 2(X-1)/(X+1). Otherwise, # 7983# move on to Step 2. # 7984# # 7985# Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first # 7986# seven significant bits of Y plus 2**(-7), i.e. # 7987# F = 1.xxxxxx1 in base 2 where the six "x" match those # 7988# of Y. Note that |Y-F| <= 2**(-7). # 7989# # 7990# Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a # 7991# polynomial in u, log(1+u) = poly. # 7992# # 7993# Step 4. Reconstruct # 7994# log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) # 7995# by k*log(2) + (log(F) + poly). The values of log(F) are # 7996# calculated beforehand and stored in the program. # 7997# # 7998# lognp1: # 7999# Step 1: If |X| < 1/16, approximate log(1+X) by an odd # 8000# polynomial in u where u = 2X/(2+X). Otherwise, move on # 8001# to Step 2. # 8002# # 8003# Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done # 8004# in Step 2 of the algorithm for LOGN and compute # 8005# log(1+X) as k*log(2) + log(F) + poly where poly # 8006# approximates log(1+u), u = (Y-F)/F. # 8007# # 8008# Implementation Notes: # 8009# Note 1. There are 64 different possible values for F, thus 64 # 8010# log(F)'s need to be tabulated. Moreover, the values of # 8011# 1/F are also tabulated so that the division in (Y-F)/F # 8012# can be performed by a multiplication. # 8013# # 8014# Note 2. In Step 2 of lognp1, in order to preserved accuracy, # 8015# the value Y-F has to be calculated carefully when # 8016# 1/2 <= X < 3/2. # 8017# # 8018# Note 3. To fully exploit the pipeline, polynomials are usually # 8019# separated into two parts evaluated independently before # 8020# being added up. # 8021# # 8022######################################################################### 8023LOGOF2: 8024 long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 8025 8026one: 8027 long 0x3F800000 8028zero: 8029 long 0x00000000 8030infty: 8031 long 0x7F800000 8032negone: 8033 long 0xBF800000 8034 8035LOGA6: 8036 long 0x3FC2499A,0xB5E4040B 8037LOGA5: 8038 long 0xBFC555B5,0x848CB7DB 8039 8040LOGA4: 8041 long 0x3FC99999,0x987D8730 8042LOGA3: 8043 long 0xBFCFFFFF,0xFF6F7E97 8044 8045LOGA2: 8046 long 0x3FD55555,0x555555A4 8047LOGA1: 8048 long 0xBFE00000,0x00000008 8049 8050LOGB5: 8051 long 0x3F175496,0xADD7DAD6 8052LOGB4: 8053 long 0x3F3C71C2,0xFE80C7E0 8054 8055LOGB3: 8056 long 0x3F624924,0x928BCCFF 8057LOGB2: 8058 long 0x3F899999,0x999995EC 8059 8060LOGB1: 8061 long 0x3FB55555,0x55555555 8062TWO: 8063 long 0x40000000,0x00000000 8064 8065LTHOLD: 8066 long 0x3f990000,0x80000000,0x00000000,0x00000000 8067 8068LOGTBL: 8069 long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000 8070 long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000 8071 long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000 8072 long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000 8073 long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000 8074 long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000 8075 long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000 8076 long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000 8077 long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000 8078 long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000 8079 long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000 8080 long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000 8081 long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000 8082 long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000 8083 long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000 8084 long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000 8085 long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000 8086 long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000 8087 long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000 8088 long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000 8089 long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000 8090 long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000 8091 long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000 8092 long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000 8093 long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000 8094 long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000 8095 long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000 8096 long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000 8097 long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000 8098 long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000 8099 long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000 8100 long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000 8101 long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000 8102 long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000 8103 long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000 8104 long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000 8105 long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000 8106 long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000 8107 long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000 8108 long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000 8109 long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000 8110 long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000 8111 long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000 8112 long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000 8113 long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000 8114 long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000 8115 long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000 8116 long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000 8117 long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000 8118 long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000 8119 long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000 8120 long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000 8121 long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000 8122 long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000 8123 long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000 8124 long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000 8125 long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000 8126 long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000 8127 long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000 8128 long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000 8129 long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000 8130 long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000 8131 long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000 8132 long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000 8133 long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000 8134 long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000 8135 long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000 8136 long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000 8137 long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000 8138 long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000 8139 long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000 8140 long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000 8141 long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000 8142 long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000 8143 long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000 8144 long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000 8145 long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000 8146 long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000 8147 long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000 8148 long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000 8149 long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000 8150 long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000 8151 long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000 8152 long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000 8153 long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000 8154 long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000 8155 long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000 8156 long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000 8157 long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000 8158 long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000 8159 long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000 8160 long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000 8161 long 0x3FFE0000,0x94458094,0x45809446,0x00000000 8162 long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000 8163 long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000 8164 long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000 8165 long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000 8166 long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000 8167 long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000 8168 long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000 8169 long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000 8170 long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000 8171 long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000 8172 long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000 8173 long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000 8174 long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000 8175 long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000 8176 long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000 8177 long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000 8178 long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000 8179 long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000 8180 long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000 8181 long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000 8182 long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000 8183 long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000 8184 long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000 8185 long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000 8186 long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000 8187 long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000 8188 long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000 8189 long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000 8190 long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000 8191 long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000 8192 long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000 8193 long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000 8194 long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000 8195 long 0x3FFE0000,0x80808080,0x80808081,0x00000000 8196 long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000 8197 8198 set ADJK,L_SCR1 8199 8200 set X,FP_SCR0 8201 set XDCARE,X+2 8202 set XFRAC,X+4 8203 8204 set F,FP_SCR1 8205 set FFRAC,F+4 8206 8207 set KLOG2,FP_SCR0 8208 8209 set SAVEU,FP_SCR0 8210 8211 global slogn 8212#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S 8213slogn: 8214 fmov.x (%a0),%fp0 # LOAD INPUT 8215 mov.l &0x00000000,ADJK(%a6) 8216 8217LOGBGN: 8218#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS 8219#--A FINITE, NON-ZERO, NORMALIZED NUMBER. 8220 8221 mov.l (%a0),%d1 8222 mov.w 4(%a0),%d1 8223 8224 mov.l (%a0),X(%a6) 8225 mov.l 4(%a0),X+4(%a6) 8226 mov.l 8(%a0),X+8(%a6) 8227 8228 cmp.l %d1,&0 # CHECK IF X IS NEGATIVE 8229 blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID 8230# X IS POSITIVE, CHECK IF X IS NEAR 1 8231 cmp.l %d1,&0x3ffef07d # IS X < 15/16? 8232 blt.b LOGMAIN # YES 8233 cmp.l %d1,&0x3fff8841 # IS X > 17/16? 8234 ble.w LOGNEAR1 # NO 8235 8236LOGMAIN: 8237#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 8238 8239#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. 8240#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. 8241#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) 8242#-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). 8243#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING 8244#--LOG(1+U) CAN BE VERY EFFICIENT. 8245#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO 8246#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. 8247 8248#--GET K, Y, F, AND ADDRESS OF 1/F. 8249 asr.l &8,%d1 8250 asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X 8251 sub.l &0x3FFF,%d1 # THIS IS K 8252 add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM. 8253 lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F) 8254 fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT 8255 8256#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F 8257 mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X 8258 mov.l XFRAC(%a6),FFRAC(%a6) 8259 and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y 8260 or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT 8261 mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F 8262 and.l &0x7E000000,%d1 8263 asr.l &8,%d1 8264 asr.l &8,%d1 8265 asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT 8266 add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F 8267 8268 fmov.x X(%a6),%fp0 8269 mov.l &0x3fff0000,F(%a6) 8270 clr.l F+8(%a6) 8271 fsub.x F(%a6),%fp0 # Y-F 8272 fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY 8273#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K 8274#--REGISTERS SAVED: FPCR, FP1, FP2 8275 8276LP1CONT1: 8277#--AN RE-ENTRY POINT FOR LOGNP1 8278 fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F 8279 fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY 8280 fmov.x %fp0,%fp2 8281 fmul.x %fp2,%fp2 # FP2 IS V=U*U 8282 fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP1 8283 8284#--LOG(1+U) IS APPROXIMATED BY 8285#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS 8286#--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] 8287 8288 fmov.x %fp2,%fp3 8289 fmov.x %fp2,%fp1 8290 8291 fmul.d LOGA6(%pc),%fp1 # V*A6 8292 fmul.d LOGA5(%pc),%fp2 # V*A5 8293 8294 fadd.d LOGA4(%pc),%fp1 # A4+V*A6 8295 fadd.d LOGA3(%pc),%fp2 # A3+V*A5 8296 8297 fmul.x %fp3,%fp1 # V*(A4+V*A6) 8298 fmul.x %fp3,%fp2 # V*(A3+V*A5) 8299 8300 fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6) 8301 fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5) 8302 8303 fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6)) 8304 add.l &16,%a0 # ADDRESS OF LOG(F) 8305 fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5)) 8306 8307 fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6)) 8308 fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5)) 8309 8310 fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6)) 8311 fmovm.x (%sp)+,&0x30 # RESTORE FP2-3 8312 fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U) 8313 8314 fmov.l %d0,%fpcr 8315 fadd.x KLOG2(%a6),%fp0 # FINAL ADD 8316 bra t_inx2 8317 8318 8319LOGNEAR1: 8320 8321# if the input is exactly equal to one, then exit through ld_pzero. 8322# if these 2 lines weren't here, the correct answer would be returned 8323# but the INEX2 bit would be set. 8324 fcmp.b %fp0,&0x1 # is it equal to one? 8325 fbeq.l ld_pzero # yes 8326 8327#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. 8328 fmov.x %fp0,%fp1 8329 fsub.s one(%pc),%fp1 # FP1 IS X-1 8330 fadd.s one(%pc),%fp0 # FP0 IS X+1 8331 fadd.x %fp1,%fp1 # FP1 IS 2(X-1) 8332#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL 8333#--IN U, U = 2(X-1)/(X+1) = FP1/FP0 8334 8335LP1CONT2: 8336#--THIS IS AN RE-ENTRY POINT FOR LOGNP1 8337 fdiv.x %fp0,%fp1 # FP1 IS U 8338 fmovm.x &0xc,-(%sp) # SAVE FP2-3 8339#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 8340#--LET V=U*U, W=V*V, CALCULATE 8341#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY 8342#--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) 8343 fmov.x %fp1,%fp0 8344 fmul.x %fp0,%fp0 # FP0 IS V 8345 fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1 8346 fmov.x %fp0,%fp1 8347 fmul.x %fp1,%fp1 # FP1 IS W 8348 8349 fmov.d LOGB5(%pc),%fp3 8350 fmov.d LOGB4(%pc),%fp2 8351 8352 fmul.x %fp1,%fp3 # W*B5 8353 fmul.x %fp1,%fp2 # W*B4 8354 8355 fadd.d LOGB3(%pc),%fp3 # B3+W*B5 8356 fadd.d LOGB2(%pc),%fp2 # B2+W*B4 8357 8358 fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED 8359 8360 fmul.x %fp0,%fp2 # V*(B2+W*B4) 8361 8362 fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5) 8363 fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V 8364 8365 fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED 8366 fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED 8367 8368 fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) 8369 8370 fmov.l %d0,%fpcr 8371 fadd.x SAVEU(%a6),%fp0 8372 bra t_inx2 8373 8374#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID 8375LOGNEG: 8376 bra t_operr 8377 8378 global slognd 8379slognd: 8380#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT 8381 8382 mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0 8383 8384#----normalize the input value by left shifting k bits (k to be determined 8385#----below), adjusting exponent and storing -k to ADJK 8386#----the value TWOTO100 is no longer needed. 8387#----Note that this code assumes the denormalized input is NON-ZERO. 8388 8389 movm.l &0x3f00,-(%sp) # save some registers {d2-d7} 8390 mov.l (%a0),%d3 # D3 is exponent of smallest norm. # 8391 mov.l 4(%a0),%d4 8392 mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X) 8393 clr.l %d2 # D2 used for holding K 8394 8395 tst.l %d4 8396 bne.b Hi_not0 8397 8398Hi_0: 8399 mov.l %d5,%d4 8400 clr.l %d5 8401 mov.l &32,%d2 8402 clr.l %d6 8403 bfffo %d4{&0:&32},%d6 8404 lsl.l %d6,%d4 8405 add.l %d6,%d2 # (D3,D4,D5) is normalized 8406 8407 mov.l %d3,X(%a6) 8408 mov.l %d4,XFRAC(%a6) 8409 mov.l %d5,XFRAC+4(%a6) 8410 neg.l %d2 8411 mov.l %d2,ADJK(%a6) 8412 fmov.x X(%a6),%fp0 8413 movm.l (%sp)+,&0xfc # restore registers {d2-d7} 8414 lea X(%a6),%a0 8415 bra.w LOGBGN # begin regular log(X) 8416 8417Hi_not0: 8418 clr.l %d6 8419 bfffo %d4{&0:&32},%d6 # find first 1 8420 mov.l %d6,%d2 # get k 8421 lsl.l %d6,%d4 8422 mov.l %d5,%d7 # a copy of D5 8423 lsl.l %d6,%d5 8424 neg.l %d6 8425 add.l &32,%d6 8426 lsr.l %d6,%d7 8427 or.l %d7,%d4 # (D3,D4,D5) normalized 8428 8429 mov.l %d3,X(%a6) 8430 mov.l %d4,XFRAC(%a6) 8431 mov.l %d5,XFRAC+4(%a6) 8432 neg.l %d2 8433 mov.l %d2,ADJK(%a6) 8434 fmov.x X(%a6),%fp0 8435 movm.l (%sp)+,&0xfc # restore registers {d2-d7} 8436 lea X(%a6),%a0 8437 bra.w LOGBGN # begin regular log(X) 8438 8439 global slognp1 8440#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S 8441slognp1: 8442 fmov.x (%a0),%fp0 # LOAD INPUT 8443 fabs.x %fp0 # test magnitude 8444 fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold 8445 fbgt.w LP1REAL # if greater, continue 8446 fmov.l %d0,%fpcr 8447 mov.b &FMOV_OP,%d1 # last inst is MOVE 8448 fmov.x (%a0),%fp0 # return signed argument 8449 bra t_catch 8450 8451LP1REAL: 8452 fmov.x (%a0),%fp0 # LOAD INPUT 8453 mov.l &0x00000000,ADJK(%a6) 8454 fmov.x %fp0,%fp1 # FP1 IS INPUT Z 8455 fadd.s one(%pc),%fp0 # X := ROUND(1+Z) 8456 fmov.x %fp0,X(%a6) 8457 mov.w XFRAC(%a6),XDCARE(%a6) 8458 mov.l X(%a6),%d1 8459 cmp.l %d1,&0 8460 ble.w LP1NEG0 # LOG OF ZERO OR -VE 8461 cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]? 8462 blt.w LOGMAIN 8463 cmp.l %d1,&0x3fffc000 8464 bgt.w LOGMAIN 8465#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, 8466#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, 8467#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). 8468 8469LP1NEAR1: 8470#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) 8471 cmp.l %d1,&0x3ffef07d 8472 blt.w LP1CARE 8473 cmp.l %d1,&0x3fff8841 8474 bgt.w LP1CARE 8475 8476LP1ONE16: 8477#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) 8478#--WHERE U = 2Z/(2+Z) = 2Z/(1+X). 8479 fadd.x %fp1,%fp1 # FP1 IS 2Z 8480 fadd.s one(%pc),%fp0 # FP0 IS 1+X 8481#--U = FP1/FP0 8482 bra.w LP1CONT2 8483 8484LP1CARE: 8485#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE 8486#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST 8487#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], 8488#--THERE ARE ONLY TWO CASES. 8489#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z 8490#--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z 8491#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF 8492#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. 8493 8494 mov.l XFRAC(%a6),FFRAC(%a6) 8495 and.l &0xFE000000,FFRAC(%a6) 8496 or.l &0x01000000,FFRAC(%a6) # F OBTAINED 8497 cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1 8498 bge.b KISZERO 8499 8500KISNEG1: 8501 fmov.s TWO(%pc),%fp0 8502 mov.l &0x3fff0000,F(%a6) 8503 clr.l F+8(%a6) 8504 fsub.x F(%a6),%fp0 # 2-F 8505 mov.l FFRAC(%a6),%d1 8506 and.l &0x7E000000,%d1 8507 asr.l &8,%d1 8508 asr.l &8,%d1 8509 asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F 8510 fadd.x %fp1,%fp1 # GET 2Z 8511 fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3} 8512 fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z 8513 lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F 8514 add.l %d1,%a0 8515 fmov.s negone(%pc),%fp1 # FP1 IS K = -1 8516 bra.w LP1CONT1 8517 8518KISZERO: 8519 fmov.s one(%pc),%fp0 8520 mov.l &0x3fff0000,F(%a6) 8521 clr.l F+8(%a6) 8522 fsub.x F(%a6),%fp0 # 1-F 8523 mov.l FFRAC(%a6),%d1 8524 and.l &0x7E000000,%d1 8525 asr.l &8,%d1 8526 asr.l &8,%d1 8527 asr.l &4,%d1 8528 fadd.x %fp1,%fp0 # FP0 IS Y-F 8529 fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3} 8530 lea LOGTBL(%pc),%a0 8531 add.l %d1,%a0 # A0 IS ADDRESS OF 1/F 8532 fmov.s zero(%pc),%fp1 # FP1 IS K = 0 8533 bra.w LP1CONT1 8534 8535LP1NEG0: 8536#--FPCR SAVED. D0 IS X IN COMPACT FORM. 8537 cmp.l %d1,&0 8538 blt.b LP1NEG 8539LP1ZERO: 8540 fmov.s negone(%pc),%fp0 8541 8542 fmov.l %d0,%fpcr 8543 bra t_dz 8544 8545LP1NEG: 8546 fmov.s zero(%pc),%fp0 8547 8548 fmov.l %d0,%fpcr 8549 bra t_operr 8550 8551 global slognp1d 8552#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT 8553# Simply return the denorm 8554slognp1d: 8555 bra t_extdnrm 8556 8557######################################################################### 8558# satanh(): computes the inverse hyperbolic tangent of a norm input # 8559# satanhd(): computes the inverse hyperbolic tangent of a denorm input # 8560# # 8561# INPUT *************************************************************** # 8562# a0 = pointer to extended precision input # 8563# d0 = round precision,mode # 8564# # 8565# OUTPUT ************************************************************** # 8566# fp0 = arctanh(X) # 8567# # 8568# ACCURACY and MONOTONICITY ******************************************* # 8569# The returned result is within 3 ulps in 64 significant bit, # 8570# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 8571# rounded to double precision. The result is provably monotonic # 8572# in double precision. # 8573# # 8574# ALGORITHM *********************************************************** # 8575# # 8576# ATANH # 8577# 1. If |X| >= 1, go to 3. # 8578# # 8579# 2. (|X| < 1) Calculate atanh(X) by # 8580# sgn := sign(X) # 8581# y := |X| # 8582# z := 2y/(1-y) # 8583# atanh(X) := sgn * (1/2) * logp1(z) # 8584# Exit. # 8585# # 8586# 3. If |X| > 1, go to 5. # 8587# # 8588# 4. (|X| = 1) Generate infinity with an appropriate sign and # 8589# divide-by-zero by # 8590# sgn := sign(X) # 8591# atan(X) := sgn / (+0). # 8592# Exit. # 8593# # 8594# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # 8595# Exit. # 8596# # 8597######################################################################### 8598 8599 global satanh 8600satanh: 8601 mov.l (%a0),%d1 8602 mov.w 4(%a0),%d1 8603 and.l &0x7FFFFFFF,%d1 8604 cmp.l %d1,&0x3FFF8000 8605 bge.b ATANHBIG 8606 8607#--THIS IS THE USUAL CASE, |X| < 1 8608#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z). 8609 8610 fabs.x (%a0),%fp0 # Y = |X| 8611 fmov.x %fp0,%fp1 8612 fneg.x %fp1 # -Y 8613 fadd.x %fp0,%fp0 # 2Y 8614 fadd.s &0x3F800000,%fp1 # 1-Y 8615 fdiv.x %fp1,%fp0 # 2Y/(1-Y) 8616 mov.l (%a0),%d1 8617 and.l &0x80000000,%d1 8618 or.l &0x3F000000,%d1 # SIGN(X)*HALF 8619 mov.l %d1,-(%sp) 8620 8621 mov.l %d0,-(%sp) # save rnd prec,mode 8622 clr.l %d0 # pass ext prec,RN 8623 fmovm.x &0x01,-(%sp) # save Z on stack 8624 lea (%sp),%a0 # pass ptr to Z 8625 bsr slognp1 # LOG1P(Z) 8626 add.l &0xc,%sp # clear Z from stack 8627 8628 mov.l (%sp)+,%d0 # fetch old prec,mode 8629 fmov.l %d0,%fpcr # load it 8630 mov.b &FMUL_OP,%d1 # last inst is MUL 8631 fmul.s (%sp)+,%fp0 8632 bra t_catch 8633 8634ATANHBIG: 8635 fabs.x (%a0),%fp0 # |X| 8636 fcmp.s %fp0,&0x3F800000 8637 fbgt t_operr 8638 bra t_dz 8639 8640 global satanhd 8641#--ATANH(X) = X FOR DENORMALIZED X 8642satanhd: 8643 bra t_extdnrm 8644 8645######################################################################### 8646# slog10(): computes the base-10 logarithm of a normalized input # 8647# slog10d(): computes the base-10 logarithm of a denormalized input # 8648# slog2(): computes the base-2 logarithm of a normalized input # 8649# slog2d(): computes the base-2 logarithm of a denormalized input # 8650# # 8651# INPUT *************************************************************** # 8652# a0 = pointer to extended precision input # 8653# d0 = round precision,mode # 8654# # 8655# OUTPUT ************************************************************** # 8656# fp0 = log_10(X) or log_2(X) # 8657# # 8658# ACCURACY and MONOTONICITY ******************************************* # 8659# The returned result is within 1.7 ulps in 64 significant bit, # 8660# i.e. within 0.5003 ulp to 53 bits if the result is subsequently # 8661# rounded to double precision. The result is provably monotonic # 8662# in double precision. # 8663# # 8664# ALGORITHM *********************************************************** # 8665# # 8666# slog10d: # 8667# # 8668# Step 0. If X < 0, create a NaN and raise the invalid operation # 8669# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8670# Notes: Default means round-to-nearest mode, no floating-point # 8671# traps, and precision control = double extended. # 8672# # 8673# Step 1. Call slognd to obtain Y = log(X), the natural log of X. # 8674# Notes: Even if X is denormalized, log(X) is always normalized. # 8675# # 8676# Step 2. Compute log_10(X) = log(X) * (1/log(10)). # 8677# 2.1 Restore the user FPCR # 8678# 2.2 Return ans := Y * INV_L10. # 8679# # 8680# slog10: # 8681# # 8682# Step 0. If X < 0, create a NaN and raise the invalid operation # 8683# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8684# Notes: Default means round-to-nearest mode, no floating-point # 8685# traps, and precision control = double extended. # 8686# # 8687# Step 1. Call sLogN to obtain Y = log(X), the natural log of X. # 8688# # 8689# Step 2. Compute log_10(X) = log(X) * (1/log(10)). # 8690# 2.1 Restore the user FPCR # 8691# 2.2 Return ans := Y * INV_L10. # 8692# # 8693# sLog2d: # 8694# # 8695# Step 0. If X < 0, create a NaN and raise the invalid operation # 8696# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8697# Notes: Default means round-to-nearest mode, no floating-point # 8698# traps, and precision control = double extended. # 8699# # 8700# Step 1. Call slognd to obtain Y = log(X), the natural log of X. # 8701# Notes: Even if X is denormalized, log(X) is always normalized. # 8702# # 8703# Step 2. Compute log_10(X) = log(X) * (1/log(2)). # 8704# 2.1 Restore the user FPCR # 8705# 2.2 Return ans := Y * INV_L2. # 8706# # 8707# sLog2: # 8708# # 8709# Step 0. If X < 0, create a NaN and raise the invalid operation # 8710# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8711# Notes: Default means round-to-nearest mode, no floating-point # 8712# traps, and precision control = double extended. # 8713# # 8714# Step 1. If X is not an integer power of two, i.e., X != 2^k, # 8715# go to Step 3. # 8716# # 8717# Step 2. Return k. # 8718# 2.1 Get integer k, X = 2^k. # 8719# 2.2 Restore the user FPCR. # 8720# 2.3 Return ans := convert-to-double-extended(k). # 8721# # 8722# Step 3. Call sLogN to obtain Y = log(X), the natural log of X. # 8723# # 8724# Step 4. Compute log_2(X) = log(X) * (1/log(2)). # 8725# 4.1 Restore the user FPCR # 8726# 4.2 Return ans := Y * INV_L2. # 8727# # 8728######################################################################### 8729 8730INV_L10: 8731 long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000 8732 8733INV_L2: 8734 long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000 8735 8736 global slog10 8737#--entry point for Log10(X), X is normalized 8738slog10: 8739 fmov.b &0x1,%fp0 8740 fcmp.x %fp0,(%a0) # if operand == 1, 8741 fbeq.l ld_pzero # return an EXACT zero 8742 8743 mov.l (%a0),%d1 8744 blt.w invalid 8745 mov.l %d0,-(%sp) 8746 clr.l %d0 8747 bsr slogn # log(X), X normal. 8748 fmov.l (%sp)+,%fpcr 8749 fmul.x INV_L10(%pc),%fp0 8750 bra t_inx2 8751 8752 global slog10d 8753#--entry point for Log10(X), X is denormalized 8754slog10d: 8755 mov.l (%a0),%d1 8756 blt.w invalid 8757 mov.l %d0,-(%sp) 8758 clr.l %d0 8759 bsr slognd # log(X), X denorm. 8760 fmov.l (%sp)+,%fpcr 8761 fmul.x INV_L10(%pc),%fp0 8762 bra t_minx2 8763 8764 global slog2 8765#--entry point for Log2(X), X is normalized 8766slog2: 8767 mov.l (%a0),%d1 8768 blt.w invalid 8769 8770 mov.l 8(%a0),%d1 8771 bne.b continue # X is not 2^k 8772 8773 mov.l 4(%a0),%d1 8774 and.l &0x7FFFFFFF,%d1 8775 bne.b continue 8776 8777#--X = 2^k. 8778 mov.w (%a0),%d1 8779 and.l &0x00007FFF,%d1 8780 sub.l &0x3FFF,%d1 8781 beq.l ld_pzero 8782 fmov.l %d0,%fpcr 8783 fmov.l %d1,%fp0 8784 bra t_inx2 8785 8786continue: 8787 mov.l %d0,-(%sp) 8788 clr.l %d0 8789 bsr slogn # log(X), X normal. 8790 fmov.l (%sp)+,%fpcr 8791 fmul.x INV_L2(%pc),%fp0 8792 bra t_inx2 8793 8794invalid: 8795 bra t_operr 8796 8797 global slog2d 8798#--entry point for Log2(X), X is denormalized 8799slog2d: 8800 mov.l (%a0),%d1 8801 blt.w invalid 8802 mov.l %d0,-(%sp) 8803 clr.l %d0 8804 bsr slognd # log(X), X denorm. 8805 fmov.l (%sp)+,%fpcr 8806 fmul.x INV_L2(%pc),%fp0 8807 bra t_minx2 8808 8809######################################################################### 8810# stwotox(): computes 2**X for a normalized input # 8811# stwotoxd(): computes 2**X for a denormalized input # 8812# stentox(): computes 10**X for a normalized input # 8813# stentoxd(): computes 10**X for a denormalized input # 8814# # 8815# INPUT *************************************************************** # 8816# a0 = pointer to extended precision input # 8817# d0 = round precision,mode # 8818# # 8819# OUTPUT ************************************************************** # 8820# fp0 = 2**X or 10**X # 8821# # 8822# ACCURACY and MONOTONICITY ******************************************* # 8823# The returned result is within 2 ulps in 64 significant bit, # 8824# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 8825# rounded to double precision. The result is provably monotonic # 8826# in double precision. # 8827# # 8828# ALGORITHM *********************************************************** # 8829# # 8830# twotox # 8831# 1. If |X| > 16480, go to ExpBig. # 8832# # 8833# 2. If |X| < 2**(-70), go to ExpSm. # 8834# # 8835# 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore # 8836# decompose N as # 8837# N = 64(M + M') + j, j = 0,1,2,...,63. # 8838# # 8839# 4. Overwrite r := r * log2. Then # 8840# 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). # 8841# Go to expr to compute that expression. # 8842# # 8843# tentox # 8844# 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. # 8845# # 8846# 2. If |X| < 2**(-70), go to ExpSm. # 8847# # 8848# 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set # 8849# N := round-to-int(y). Decompose N as # 8850# N = 64(M + M') + j, j = 0,1,2,...,63. # 8851# # 8852# 4. Define r as # 8853# r := ((X - N*L1)-N*L2) * L10 # 8854# where L1, L2 are the leading and trailing parts of # 8855# log_10(2)/64 and L10 is the natural log of 10. Then # 8856# 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). # 8857# Go to expr to compute that expression. # 8858# # 8859# expr # 8860# 1. Fetch 2**(j/64) from table as Fact1 and Fact2. # 8861# # 8862# 2. Overwrite Fact1 and Fact2 by # 8863# Fact1 := 2**(M) * Fact1 # 8864# Fact2 := 2**(M) * Fact2 # 8865# Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). # 8866# # 8867# 3. Calculate P where 1 + P approximates exp(r): # 8868# P = r + r*r*(A1+r*(A2+...+r*A5)). # 8869# # 8870# 4. Let AdjFact := 2**(M'). Return # 8871# AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). # 8872# Exit. # 8873# # 8874# ExpBig # 8875# 1. Generate overflow by Huge * Huge if X > 0; otherwise, # 8876# generate underflow by Tiny * Tiny. # 8877# # 8878# ExpSm # 8879# 1. Return 1 + X. # 8880# # 8881######################################################################### 8882 8883L2TEN64: 8884 long 0x406A934F,0x0979A371 # 64LOG10/LOG2 8885L10TWO1: 8886 long 0x3F734413,0x509F8000 # LOG2/64LOG10 8887 8888L10TWO2: 8889 long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000 8890 8891LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000 8892 8893LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 8894 8895EXPA5: long 0x3F56C16D,0x6F7BD0B2 8896EXPA4: long 0x3F811112,0x302C712C 8897EXPA3: long 0x3FA55555,0x55554CC1 8898EXPA2: long 0x3FC55555,0x55554A54 8899EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000 8900 8901TEXPTBL: 8902 long 0x3FFF0000,0x80000000,0x00000000,0x3F738000 8903 long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA 8904 long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9 8905 long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9 8906 long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA 8907 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C 8908 long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1 8909 long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA 8910 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373 8911 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670 8912 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700 8913 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0 8914 long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D 8915 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319 8916 long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B 8917 long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5 8918 long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A 8919 long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B 8920 long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF 8921 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA 8922 long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD 8923 long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E 8924 long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B 8925 long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB 8926 long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB 8927 long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274 8928 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C 8929 long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00 8930 long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301 8931 long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367 8932 long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F 8933 long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C 8934 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB 8935 long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB 8936 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C 8937 long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA 8938 long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD 8939 long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51 8940 long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A 8941 long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2 8942 long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB 8943 long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17 8944 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C 8945 long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8 8946 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53 8947 long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE 8948 long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124 8949 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243 8950 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A 8951 long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61 8952 long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610 8953 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1 8954 long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12 8955 long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE 8956 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4 8957 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F 8958 long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A 8959 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A 8960 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC 8961 long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F 8962 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A 8963 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795 8964 long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B 8965 long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581 8966 8967 set INT,L_SCR1 8968 8969 set X,FP_SCR0 8970 set XDCARE,X+2 8971 set XFRAC,X+4 8972 8973 set ADJFACT,FP_SCR0 8974 8975 set FACT1,FP_SCR0 8976 set FACT1HI,FACT1+4 8977 set FACT1LOW,FACT1+8 8978 8979 set FACT2,FP_SCR1 8980 set FACT2HI,FACT2+4 8981 set FACT2LOW,FACT2+8 8982 8983 global stwotox 8984#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 8985stwotox: 8986 fmovm.x (%a0),&0x80 # LOAD INPUT 8987 8988 mov.l (%a0),%d1 8989 mov.w 4(%a0),%d1 8990 fmov.x %fp0,X(%a6) 8991 and.l &0x7FFFFFFF,%d1 8992 8993 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)? 8994 bge.b TWOOK1 8995 bra.w EXPBORS 8996 8997TWOOK1: 8998 cmp.l %d1,&0x400D80C0 # |X| > 16480? 8999 ble.b TWOMAIN 9000 bra.w EXPBORS 9001 9002TWOMAIN: 9003#--USUAL CASE, 2^(-70) <= |X| <= 16480 9004 9005 fmov.x %fp0,%fp1 9006 fmul.s &0x42800000,%fp1 # 64 * X 9007 fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X) 9008 mov.l %d2,-(%sp) 9009 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64) 9010 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT 9011 mov.l INT(%a6),%d1 9012 mov.l %d1,%d2 9013 and.l &0x3F,%d1 # D0 IS J 9014 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64) 9015 add.l %d1,%a1 # ADDRESS FOR 2^(J/64) 9016 asr.l &6,%d2 # d2 IS L, N = 64L + J 9017 mov.l %d2,%d1 9018 asr.l &1,%d1 # D0 IS M 9019 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J 9020 add.l &0x3FFF,%d2 9021 9022#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), 9023#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. 9024#--ADJFACT = 2^(M'). 9025#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. 9026 9027 fmovm.x &0x0c,-(%sp) # save fp2/fp3 9028 9029 fmul.s &0x3C800000,%fp1 # (1/64)*N 9030 mov.l (%a1)+,FACT1(%a6) 9031 mov.l (%a1)+,FACT1HI(%a6) 9032 mov.l (%a1)+,FACT1LOW(%a6) 9033 mov.w (%a1)+,FACT2(%a6) 9034 9035 fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X) 9036 9037 mov.w (%a1)+,FACT2HI(%a6) 9038 clr.w FACT2HI+2(%a6) 9039 clr.l FACT2LOW(%a6) 9040 add.w %d1,FACT1(%a6) 9041 fmul.x LOG2(%pc),%fp0 # FP0 IS R 9042 add.w %d1,FACT2(%a6) 9043 9044 bra.w expr 9045 9046EXPBORS: 9047#--FPCR, D0 SAVED 9048 cmp.l %d1,&0x3FFF8000 9049 bgt.b TEXPBIG 9050 9051#--|X| IS SMALL, RETURN 1 + X 9052 9053 fmov.l %d0,%fpcr # restore users round prec,mode 9054 fadd.s &0x3F800000,%fp0 # RETURN 1 + X 9055 bra t_pinx2 9056 9057TEXPBIG: 9058#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW 9059#--REGISTERS SAVE SO FAR ARE FPCR AND D0 9060 mov.l X(%a6),%d1 9061 cmp.l %d1,&0 9062 blt.b EXPNEG 9063 9064 bra t_ovfl2 # t_ovfl expects positive value 9065 9066EXPNEG: 9067 bra t_unfl2 # t_unfl expects positive value 9068 9069 global stwotoxd 9070stwotoxd: 9071#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT 9072 9073 fmov.l %d0,%fpcr # set user's rounding mode/precision 9074 fmov.s &0x3F800000,%fp0 # RETURN 1 + X 9075 mov.l (%a0),%d1 9076 or.l &0x00800001,%d1 9077 fadd.s %d1,%fp0 9078 bra t_pinx2 9079 9080 global stentox 9081#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 9082stentox: 9083 fmovm.x (%a0),&0x80 # LOAD INPUT 9084 9085 mov.l (%a0),%d1 9086 mov.w 4(%a0),%d1 9087 fmov.x %fp0,X(%a6) 9088 and.l &0x7FFFFFFF,%d1 9089 9090 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)? 9091 bge.b TENOK1 9092 bra.w EXPBORS 9093 9094TENOK1: 9095 cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ? 9096 ble.b TENMAIN 9097 bra.w EXPBORS 9098 9099TENMAIN: 9100#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 9101 9102 fmov.x %fp0,%fp1 9103 fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG2 9104 fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2) 9105 mov.l %d2,-(%sp) 9106 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64) 9107 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT 9108 mov.l INT(%a6),%d1 9109 mov.l %d1,%d2 9110 and.l &0x3F,%d1 # D0 IS J 9111 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64) 9112 add.l %d1,%a1 # ADDRESS FOR 2^(J/64) 9113 asr.l &6,%d2 # d2 IS L, N = 64L + J 9114 mov.l %d2,%d1 9115 asr.l &1,%d1 # D0 IS M 9116 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J 9117 add.l &0x3FFF,%d2 9118 9119#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), 9120#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. 9121#--ADJFACT = 2^(M'). 9122#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. 9123 fmovm.x &0x0c,-(%sp) # save fp2/fp3 9124 9125 fmov.x %fp1,%fp2 9126 9127 fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD 9128 mov.l (%a1)+,FACT1(%a6) 9129 9130 fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL 9131 9132 mov.l (%a1)+,FACT1HI(%a6) 9133 mov.l (%a1)+,FACT1LOW(%a6) 9134 fsub.x %fp1,%fp0 # X - N L_LEAD 9135 mov.w (%a1)+,FACT2(%a6) 9136 9137 fsub.x %fp2,%fp0 # X - N L_TRAIL 9138 9139 mov.w (%a1)+,FACT2HI(%a6) 9140 clr.w FACT2HI+2(%a6) 9141 clr.l FACT2LOW(%a6) 9142 9143 fmul.x LOG10(%pc),%fp0 # FP0 IS R 9144 add.w %d1,FACT1(%a6) 9145 add.w %d1,FACT2(%a6) 9146 9147expr: 9148#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN. 9149#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64). 9150#--FP0 IS R. THE FOLLOWING CODE COMPUTES 9151#-- 2**(M'+M) * 2**(J/64) * EXP(R) 9152 9153 fmov.x %fp0,%fp1 9154 fmul.x %fp1,%fp1 # FP1 IS S = R*R 9155 9156 fmov.d EXPA5(%pc),%fp2 # FP2 IS A5 9157 fmov.d EXPA4(%pc),%fp3 # FP3 IS A4 9158 9159 fmul.x %fp1,%fp2 # FP2 IS S*A5 9160 fmul.x %fp1,%fp3 # FP3 IS S*A4 9161 9162 fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5 9163 fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4 9164 9165 fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5) 9166 fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4) 9167 9168 fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5) 9169 fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4) 9170 9171 fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5)) 9172 fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4) 9173 fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1 9174 9175 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 9176 9177#--FINAL RECONSTRUCTION PROCESS 9178#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0) 9179 9180 fmul.x FACT1(%a6),%fp0 9181 fadd.x FACT2(%a6),%fp0 9182 fadd.x FACT1(%a6),%fp0 9183 9184 fmov.l %d0,%fpcr # restore users round prec,mode 9185 mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT 9186 mov.l (%sp)+,%d2 9187 mov.l &0x80000000,ADJFACT+4(%a6) 9188 clr.l ADJFACT+8(%a6) 9189 mov.b &FMUL_OP,%d1 # last inst is MUL 9190 fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT 9191 bra t_catch 9192 9193 global stentoxd 9194stentoxd: 9195#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT 9196 9197 fmov.l %d0,%fpcr # set user's rounding mode/precision 9198 fmov.s &0x3F800000,%fp0 # RETURN 1 + X 9199 mov.l (%a0),%d1 9200 or.l &0x00800001,%d1 9201 fadd.s %d1,%fp0 9202 bra t_pinx2 9203 9204######################################################################### 9205# sscale(): computes the destination operand scaled by the source # 9206# operand. If the absoulute value of the source operand is # 9207# >= 2^14, an overflow or underflow is returned. # 9208# # 9209# INPUT *************************************************************** # 9210# a0 = pointer to double-extended source operand X # 9211# a1 = pointer to double-extended destination operand Y # 9212# # 9213# OUTPUT ************************************************************** # 9214# fp0 = scale(X,Y) # 9215# # 9216######################################################################### 9217 9218set SIGN, L_SCR1 9219 9220 global sscale 9221sscale: 9222 mov.l %d0,-(%sp) # store off ctrl bits for now 9223 9224 mov.w DST_EX(%a1),%d1 # get dst exponent 9225 smi.b SIGN(%a6) # use SIGN to hold dst sign 9226 andi.l &0x00007fff,%d1 # strip sign from dst exp 9227 9228 mov.w SRC_EX(%a0),%d0 # check src bounds 9229 andi.w &0x7fff,%d0 # clr src sign bit 9230 cmpi.w %d0,&0x3fff # is src ~ ZERO? 9231 blt.w src_small # yes 9232 cmpi.w %d0,&0x400c # no; is src too big? 9233 bgt.w src_out # yes 9234 9235# 9236# Source is within 2^14 range. 9237# 9238src_ok: 9239 fintrz.x SRC(%a0),%fp0 # calc int of src 9240 fmov.l %fp0,%d0 # int src to d0 9241# don't want any accrued bits from the fintrz showing up later since 9242# we may need to read the fpsr for the last fp op in t_catch2(). 9243 fmov.l &0x0,%fpsr 9244 9245 tst.b DST_HI(%a1) # is dst denormalized? 9246 bmi.b sok_norm 9247 9248# the dst is a DENORM. normalize the DENORM and add the adjustment to 9249# the src value. then, jump to the norm part of the routine. 9250sok_dnrm: 9251 mov.l %d0,-(%sp) # save src for now 9252 9253 mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy 9254 mov.l DST_HI(%a1),FP_SCR0_HI(%a6) 9255 mov.l DST_LO(%a1),FP_SCR0_LO(%a6) 9256 9257 lea FP_SCR0(%a6),%a0 # pass ptr to DENORM 9258 bsr.l norm # normalize the DENORM 9259 neg.l %d0 9260 add.l (%sp)+,%d0 # add adjustment to src 9261 9262 fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM 9263 9264 cmpi.w %d0,&-0x3fff # is the shft amt really low? 9265 bge.b sok_norm2 # thank goodness no 9266 9267# the multiply factor that we're trying to create should be a denorm 9268# for the multiply to work. therefore, we're going to actually do a 9269# multiply with a denorm which will cause an unimplemented data type 9270# exception to be put into the machine which will be caught and corrected 9271# later. we don't do this with the DENORMs above because this method 9272# is slower. but, don't fret, I don't see it being used much either. 9273 fmov.l (%sp)+,%fpcr # restore user fpcr 9274 mov.l &0x80000000,%d1 # load normalized mantissa 9275 subi.l &-0x3fff,%d0 # how many should we shift? 9276 neg.l %d0 # make it positive 9277 cmpi.b %d0,&0x20 # is it > 32? 9278 bge.b sok_dnrm_32 # yes 9279 lsr.l %d0,%d1 # no; bit stays in upper lw 9280 clr.l -(%sp) # insert zero low mantissa 9281 mov.l %d1,-(%sp) # insert new high mantissa 9282 clr.l -(%sp) # make zero exponent 9283 bra.b sok_norm_cont 9284sok_dnrm_32: 9285 subi.b &0x20,%d0 # get shift count 9286 lsr.l %d0,%d1 # make low mantissa longword 9287 mov.l %d1,-(%sp) # insert new low mantissa 9288 clr.l -(%sp) # insert zero high mantissa 9289 clr.l -(%sp) # make zero exponent 9290 bra.b sok_norm_cont 9291 9292# the src will force the dst to a DENORM value or worse. so, let's 9293# create an fp multiply that will create the result. 9294sok_norm: 9295 fmovm.x DST(%a1),&0x80 # load fp0 with normalized src 9296sok_norm2: 9297 fmov.l (%sp)+,%fpcr # restore user fpcr 9298 9299 addi.w &0x3fff,%d0 # turn src amt into exp value 9300 swap %d0 # put exponent in high word 9301 clr.l -(%sp) # insert new exponent 9302 mov.l &0x80000000,-(%sp) # insert new high mantissa 9303 mov.l %d0,-(%sp) # insert new lo mantissa 9304 9305sok_norm_cont: 9306 fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2 9307 mov.b &FMUL_OP,%d1 # last inst is MUL 9308 fmul.x (%sp)+,%fp0 # do the multiply 9309 bra t_catch2 # catch any exceptions 9310 9311# 9312# Source is outside of 2^14 range. Test the sign and branch 9313# to the appropriate exception handler. 9314# 9315src_out: 9316 mov.l (%sp)+,%d0 # restore ctrl bits 9317 exg %a0,%a1 # swap src,dst ptrs 9318 tst.b SRC_EX(%a1) # is src negative? 9319 bmi t_unfl # yes; underflow 9320 bra t_ovfl_sc # no; overflow 9321 9322# 9323# The source input is below 1, so we check for denormalized numbers 9324# and set unfl. 9325# 9326src_small: 9327 tst.b DST_HI(%a1) # is dst denormalized? 9328 bpl.b ssmall_done # yes 9329 9330 mov.l (%sp)+,%d0 9331 fmov.l %d0,%fpcr # no; load control bits 9332 mov.b &FMOV_OP,%d1 # last inst is MOVE 9333 fmov.x DST(%a1),%fp0 # simply return dest 9334 bra t_catch2 9335ssmall_done: 9336 mov.l (%sp)+,%d0 # load control bits into d1 9337 mov.l %a1,%a0 # pass ptr to dst 9338 bra t_resdnrm 9339 9340######################################################################### 9341# smod(): computes the fp MOD of the input values X,Y. # 9342# srem(): computes the fp (IEEE) REM of the input values X,Y. # 9343# # 9344# INPUT *************************************************************** # 9345# a0 = pointer to extended precision input X # 9346# a1 = pointer to extended precision input Y # 9347# d0 = round precision,mode # 9348# # 9349# The input operands X and Y can be either normalized or # 9350# denormalized. # 9351# # 9352# OUTPUT ************************************************************** # 9353# fp0 = FREM(X,Y) or FMOD(X,Y) # 9354# # 9355# ALGORITHM *********************************************************** # 9356# # 9357# Step 1. Save and strip signs of X and Y: signX := sign(X), # 9358# signY := sign(Y), X := |X|, Y := |Y|, # 9359# signQ := signX EOR signY. Record whether MOD or REM # 9360# is requested. # 9361# # 9362# Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. # 9363# If (L < 0) then # 9364# R := X, go to Step 4. # 9365# else # 9366# R := 2^(-L)X, j := L. # 9367# endif # 9368# # 9369# Step 3. Perform MOD(X,Y) # 9370# 3.1 If R = Y, go to Step 9. # 9371# 3.2 If R > Y, then { R := R - Y, Q := Q + 1} # 9372# 3.3 If j = 0, go to Step 4. # 9373# 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to # 9374# Step 3.1. # 9375# # 9376# Step 4. At this point, R = X - QY = MOD(X,Y). Set # 9377# Last_Subtract := false (used in Step 7 below). If # 9378# MOD is requested, go to Step 6. # 9379# # 9380# Step 5. R = MOD(X,Y), but REM(X,Y) is requested. # 9381# 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to # 9382# Step 6. # 9383# 5.2 If R > Y/2, then { set Last_Subtract := true, # 9384# Q := Q + 1, Y := signY*Y }. Go to Step 6. # 9385# 5.3 This is the tricky case of R = Y/2. If Q is odd, # 9386# then { Q := Q + 1, signX := -signX }. # 9387# # 9388# Step 6. R := signX*R. # 9389# # 9390# Step 7. If Last_Subtract = true, R := R - Y. # 9391# # 9392# Step 8. Return signQ, last 7 bits of Q, and R as required. # 9393# # 9394# Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, # 9395# X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), # 9396# R := 0. Return signQ, last 7 bits of Q, and R. # 9397# # 9398######################################################################### 9399 9400 set Mod_Flag,L_SCR3 9401 set Sc_Flag,L_SCR3+1 9402 9403 set SignY,L_SCR2 9404 set SignX,L_SCR2+2 9405 set SignQ,L_SCR3+2 9406 9407 set Y,FP_SCR0 9408 set Y_Hi,Y+4 9409 set Y_Lo,Y+8 9410 9411 set R,FP_SCR1 9412 set R_Hi,R+4 9413 set R_Lo,R+8 9414 9415Scale: 9416 long 0x00010000,0x80000000,0x00000000,0x00000000 9417 9418 global smod 9419smod: 9420 clr.b FPSR_QBYTE(%a6) 9421 mov.l %d0,-(%sp) # save ctrl bits 9422 clr.b Mod_Flag(%a6) 9423 bra.b Mod_Rem 9424 9425 global srem 9426srem: 9427 clr.b FPSR_QBYTE(%a6) 9428 mov.l %d0,-(%sp) # save ctrl bits 9429 mov.b &0x1,Mod_Flag(%a6) 9430 9431Mod_Rem: 9432#..Save sign of X and Y 9433 movm.l &0x3f00,-(%sp) # save data registers 9434 mov.w SRC_EX(%a0),%d3 9435 mov.w %d3,SignY(%a6) 9436 and.l &0x00007FFF,%d3 # Y := |Y| 9437 9438# 9439 mov.l SRC_HI(%a0),%d4 9440 mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y| 9441 9442 tst.l %d3 9443 bne.b Y_Normal 9444 9445 mov.l &0x00003FFE,%d3 # $3FFD + 1 9446 tst.l %d4 9447 bne.b HiY_not0 9448 9449HiY_0: 9450 mov.l %d5,%d4 9451 clr.l %d5 9452 sub.l &32,%d3 9453 clr.l %d6 9454 bfffo %d4{&0:&32},%d6 9455 lsl.l %d6,%d4 9456 sub.l %d6,%d3 # (D3,D4,D5) is normalized 9457# ...with bias $7FFD 9458 bra.b Chk_X 9459 9460HiY_not0: 9461 clr.l %d6 9462 bfffo %d4{&0:&32},%d6 9463 sub.l %d6,%d3 9464 lsl.l %d6,%d4 9465 mov.l %d5,%d7 # a copy of D5 9466 lsl.l %d6,%d5 9467 neg.l %d6 9468 add.l &32,%d6 9469 lsr.l %d6,%d7 9470 or.l %d7,%d4 # (D3,D4,D5) normalized 9471# ...with bias $7FFD 9472 bra.b Chk_X 9473 9474Y_Normal: 9475 add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized 9476# ...with bias $7FFD 9477 9478Chk_X: 9479 mov.w DST_EX(%a1),%d0 9480 mov.w %d0,SignX(%a6) 9481 mov.w SignY(%a6),%d1 9482 eor.l %d0,%d1 9483 and.l &0x00008000,%d1 9484 mov.w %d1,SignQ(%a6) # sign(Q) obtained 9485 and.l &0x00007FFF,%d0 9486 mov.l DST_HI(%a1),%d1 9487 mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X| 9488 tst.l %d0 9489 bne.b X_Normal 9490 mov.l &0x00003FFE,%d0 9491 tst.l %d1 9492 bne.b HiX_not0 9493 9494HiX_0: 9495 mov.l %d2,%d1 9496 clr.l %d2 9497 sub.l &32,%d0 9498 clr.l %d6 9499 bfffo %d1{&0:&32},%d6 9500 lsl.l %d6,%d1 9501 sub.l %d6,%d0 # (D0,D1,D2) is normalized 9502# ...with bias $7FFD 9503 bra.b Init 9504 9505HiX_not0: 9506 clr.l %d6 9507 bfffo %d1{&0:&32},%d6 9508 sub.l %d6,%d0 9509 lsl.l %d6,%d1 9510 mov.l %d2,%d7 # a copy of D2 9511 lsl.l %d6,%d2 9512 neg.l %d6 9513 add.l &32,%d6 9514 lsr.l %d6,%d7 9515 or.l %d7,%d1 # (D0,D1,D2) normalized 9516# ...with bias $7FFD 9517 bra.b Init 9518 9519X_Normal: 9520 add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized 9521# ...with bias $7FFD 9522 9523Init: 9524# 9525 mov.l %d3,L_SCR1(%a6) # save biased exp(Y) 9526 mov.l %d0,-(%sp) # save biased exp(X) 9527 sub.l %d3,%d0 # L := expo(X)-expo(Y) 9528 9529 clr.l %d6 # D6 := carry <- 0 9530 clr.l %d3 # D3 is Q 9531 mov.l &0,%a1 # A1 is k; j+k=L, Q=0 9532 9533#..(Carry,D1,D2) is R 9534 tst.l %d0 9535 bge.b Mod_Loop_pre 9536 9537#..expo(X) < expo(Y). Thus X = mod(X,Y) 9538# 9539 mov.l (%sp)+,%d0 # restore d0 9540 bra.w Get_Mod 9541 9542Mod_Loop_pre: 9543 addq.l &0x4,%sp # erase exp(X) 9544#..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L 9545Mod_Loop: 9546 tst.l %d6 # test carry bit 9547 bgt.b R_GT_Y 9548 9549#..At this point carry = 0, R = (D1,D2), Y = (D4,D5) 9550 cmp.l %d1,%d4 # compare hi(R) and hi(Y) 9551 bne.b R_NE_Y 9552 cmp.l %d2,%d5 # compare lo(R) and lo(Y) 9553 bne.b R_NE_Y 9554 9555#..At this point, R = Y 9556 bra.w Rem_is_0 9557 9558R_NE_Y: 9559#..use the borrow of the previous compare 9560 bcs.b R_LT_Y # borrow is set iff R < Y 9561 9562R_GT_Y: 9563#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0 9564#..and Y < (D1,D2) < 2Y. Either way, perform R - Y 9565 sub.l %d5,%d2 # lo(R) - lo(Y) 9566 subx.l %d4,%d1 # hi(R) - hi(Y) 9567 clr.l %d6 # clear carry 9568 addq.l &1,%d3 # Q := Q + 1 9569 9570R_LT_Y: 9571#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0. 9572 tst.l %d0 # see if j = 0. 9573 beq.b PostLoop 9574 9575 add.l %d3,%d3 # Q := 2Q 9576 add.l %d2,%d2 # lo(R) = 2lo(R) 9577 roxl.l &1,%d1 # hi(R) = 2hi(R) + carry 9578 scs %d6 # set Carry if 2(R) overflows 9579 addq.l &1,%a1 # k := k+1 9580 subq.l &1,%d0 # j := j - 1 9581#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y. 9582 9583 bra.b Mod_Loop 9584 9585PostLoop: 9586#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y. 9587 9588#..normalize R. 9589 mov.l L_SCR1(%a6),%d0 # new biased expo of R 9590 tst.l %d1 9591 bne.b HiR_not0 9592 9593HiR_0: 9594 mov.l %d2,%d1 9595 clr.l %d2 9596 sub.l &32,%d0 9597 clr.l %d6 9598 bfffo %d1{&0:&32},%d6 9599 lsl.l %d6,%d1 9600 sub.l %d6,%d0 # (D0,D1,D2) is normalized 9601# ...with bias $7FFD 9602 bra.b Get_Mod 9603 9604HiR_not0: 9605 clr.l %d6 9606 bfffo %d1{&0:&32},%d6 9607 bmi.b Get_Mod # already normalized 9608 sub.l %d6,%d0 9609 lsl.l %d6,%d1 9610 mov.l %d2,%d7 # a copy of D2 9611 lsl.l %d6,%d2 9612 neg.l %d6 9613 add.l &32,%d6 9614 lsr.l %d6,%d7 9615 or.l %d7,%d1 # (D0,D1,D2) normalized 9616 9617# 9618Get_Mod: 9619 cmp.l %d0,&0x000041FE 9620 bge.b No_Scale 9621Do_Scale: 9622 mov.w %d0,R(%a6) 9623 mov.l %d1,R_Hi(%a6) 9624 mov.l %d2,R_Lo(%a6) 9625 mov.l L_SCR1(%a6),%d6 9626 mov.w %d6,Y(%a6) 9627 mov.l %d4,Y_Hi(%a6) 9628 mov.l %d5,Y_Lo(%a6) 9629 fmov.x R(%a6),%fp0 # no exception 9630 mov.b &1,Sc_Flag(%a6) 9631 bra.b ModOrRem 9632No_Scale: 9633 mov.l %d1,R_Hi(%a6) 9634 mov.l %d2,R_Lo(%a6) 9635 sub.l &0x3FFE,%d0 9636 mov.w %d0,R(%a6) 9637 mov.l L_SCR1(%a6),%d6 9638 sub.l &0x3FFE,%d6 9639 mov.l %d6,L_SCR1(%a6) 9640 fmov.x R(%a6),%fp0 9641 mov.w %d6,Y(%a6) 9642 mov.l %d4,Y_Hi(%a6) 9643 mov.l %d5,Y_Lo(%a6) 9644 clr.b Sc_Flag(%a6) 9645 9646# 9647ModOrRem: 9648 tst.b Mod_Flag(%a6) 9649 beq.b Fix_Sign 9650 9651 mov.l L_SCR1(%a6),%d6 # new biased expo(Y) 9652 subq.l &1,%d6 # biased expo(Y/2) 9653 cmp.l %d0,%d6 9654 blt.b Fix_Sign 9655 bgt.b Last_Sub 9656 9657 cmp.l %d1,%d4 9658 bne.b Not_EQ 9659 cmp.l %d2,%d5 9660 bne.b Not_EQ 9661 bra.w Tie_Case 9662 9663Not_EQ: 9664 bcs.b Fix_Sign 9665 9666Last_Sub: 9667# 9668 fsub.x Y(%a6),%fp0 # no exceptions 9669 addq.l &1,%d3 # Q := Q + 1 9670 9671# 9672Fix_Sign: 9673#..Get sign of X 9674 mov.w SignX(%a6),%d6 9675 bge.b Get_Q 9676 fneg.x %fp0 9677 9678#..Get Q 9679# 9680Get_Q: 9681 clr.l %d6 9682 mov.w SignQ(%a6),%d6 # D6 is sign(Q) 9683 mov.l &8,%d7 9684 lsr.l %d7,%d6 9685 and.l &0x0000007F,%d3 # 7 bits of Q 9686 or.l %d6,%d3 # sign and bits of Q 9687# swap %d3 9688# fmov.l %fpsr,%d6 9689# and.l &0xFF00FFFF,%d6 9690# or.l %d3,%d6 9691# fmov.l %d6,%fpsr # put Q in fpsr 9692 mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr 9693 9694# 9695Restore: 9696 movm.l (%sp)+,&0xfc # {%d2-%d7} 9697 mov.l (%sp)+,%d0 9698 fmov.l %d0,%fpcr 9699 tst.b Sc_Flag(%a6) 9700 beq.b Finish 9701 mov.b &FMUL_OP,%d1 # last inst is MUL 9702 fmul.x Scale(%pc),%fp0 # may cause underflow 9703 bra t_catch2 9704# the '040 package did this apparently to see if the dst operand for the 9705# preceding fmul was a denorm. but, it better not have been since the 9706# algorithm just got done playing with fp0 and expected no exceptions 9707# as a result. trust me... 9708# bra t_avoid_unsupp # check for denorm as a 9709# ;result of the scaling 9710 9711Finish: 9712 mov.b &FMOV_OP,%d1 # last inst is MOVE 9713 fmov.x %fp0,%fp0 # capture exceptions & round 9714 bra t_catch2 9715 9716Rem_is_0: 9717#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1) 9718 addq.l &1,%d3 9719 cmp.l %d0,&8 # D0 is j 9720 bge.b Q_Big 9721 9722 lsl.l %d0,%d3 9723 bra.b Set_R_0 9724 9725Q_Big: 9726 clr.l %d3 9727 9728Set_R_0: 9729 fmov.s &0x00000000,%fp0 9730 clr.b Sc_Flag(%a6) 9731 bra.w Fix_Sign 9732 9733Tie_Case: 9734#..Check parity of Q 9735 mov.l %d3,%d6 9736 and.l &0x00000001,%d6 9737 tst.l %d6 9738 beq.w Fix_Sign # Q is even 9739 9740#..Q is odd, Q := Q + 1, signX := -signX 9741 addq.l &1,%d3 9742 mov.w SignX(%a6),%d6 9743 eor.l &0x00008000,%d6 9744 mov.w %d6,SignX(%a6) 9745 bra.w Fix_Sign 9746 9747######################################################################### 9748# XDEF **************************************************************** # 9749# tag(): return the optype of the input ext fp number # 9750# # 9751# This routine is used by the 060FPLSP. # 9752# # 9753# XREF **************************************************************** # 9754# None # 9755# # 9756# INPUT *************************************************************** # 9757# a0 = pointer to extended precision operand # 9758# # 9759# OUTPUT ************************************************************** # 9760# d0 = value of type tag # 9761# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # 9762# # 9763# ALGORITHM *********************************************************** # 9764# Simply test the exponent, j-bit, and mantissa values to # 9765# determine the type of operand. # 9766# If it's an unnormalized zero, alter the operand and force it # 9767# to be a normal zero. # 9768# # 9769######################################################################### 9770 9771 global tag 9772tag: 9773 mov.w FTEMP_EX(%a0), %d0 # extract exponent 9774 andi.w &0x7fff, %d0 # strip off sign 9775 cmpi.w %d0, &0x7fff # is (EXP == MAX)? 9776 beq.b inf_or_nan_x 9777not_inf_or_nan_x: 9778 btst &0x7,FTEMP_HI(%a0) 9779 beq.b not_norm_x 9780is_norm_x: 9781 mov.b &NORM, %d0 9782 rts 9783not_norm_x: 9784 tst.w %d0 # is exponent = 0? 9785 bne.b is_unnorm_x 9786not_unnorm_x: 9787 tst.l FTEMP_HI(%a0) 9788 bne.b is_denorm_x 9789 tst.l FTEMP_LO(%a0) 9790 bne.b is_denorm_x 9791is_zero_x: 9792 mov.b &ZERO, %d0 9793 rts 9794is_denorm_x: 9795 mov.b &DENORM, %d0 9796 rts 9797is_unnorm_x: 9798 bsr.l unnorm_fix # convert to norm,denorm,or zero 9799 rts 9800is_unnorm_reg_x: 9801 mov.b &UNNORM, %d0 9802 rts 9803inf_or_nan_x: 9804 tst.l FTEMP_LO(%a0) 9805 bne.b is_nan_x 9806 mov.l FTEMP_HI(%a0), %d0 9807 and.l &0x7fffffff, %d0 # msb is a don't care! 9808 bne.b is_nan_x 9809is_inf_x: 9810 mov.b &INF, %d0 9811 rts 9812is_nan_x: 9813 mov.b &QNAN, %d0 9814 rts 9815 9816############################################################# 9817 9818qnan: long 0x7fff0000, 0xffffffff, 0xffffffff 9819 9820######################################################################### 9821# XDEF **************************************************************** # 9822# t_dz(): Handle 060FPLSP dz exception for "flogn" emulation. # 9823# t_dz2(): Handle 060FPLSP dz exception for "fatanh" emulation. # 9824# # 9825# These rouitnes are used by the 060FPLSP package. # 9826# # 9827# XREF **************************************************************** # 9828# None # 9829# # 9830# INPUT *************************************************************** # 9831# a0 = pointer to extended precision source operand. # 9832# # 9833# OUTPUT ************************************************************** # 9834# fp0 = default DZ result. # 9835# # 9836# ALGORITHM *********************************************************** # 9837# Transcendental emulation for the 060FPLSP has detected that # 9838# a DZ exception should occur for the instruction. If DZ is disabled, # 9839# return the default result. # 9840# If DZ is enabled, the dst operand should be returned unscathed # 9841# in fp0 while fp1 is used to create a DZ exception so that the # 9842# operating system can log that such an event occurred. # 9843# # 9844######################################################################### 9845 9846 global t_dz 9847t_dz: 9848 tst.b SRC_EX(%a0) # check sign for neg or pos 9849 bpl.b dz_pinf # branch if pos sign 9850 9851 global t_dz2 9852t_dz2: 9853 ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ 9854 9855 btst &dz_bit,FPCR_ENABLE(%a6) 9856 bne.b dz_minf_ena 9857 9858# dz is disabled. return a -INF. 9859 fmov.s &0xff800000,%fp0 # return -INF 9860 rts 9861 9862# dz is enabled. create a dz exception so the user can record it 9863# but use fp1 instead. return the dst operand unscathed in fp0. 9864dz_minf_ena: 9865 fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed 9866 fmov.l USER_FPCR(%a6),%fpcr 9867 fmov.s &0xbf800000,%fp1 # load -1 9868 fdiv.s &0x00000000,%fp1 # -1 / 0 9869 rts 9870 9871dz_pinf: 9872 ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ 9873 9874 btst &dz_bit,FPCR_ENABLE(%a6) 9875 bne.b dz_pinf_ena 9876 9877# dz is disabled. return a +INF. 9878 fmov.s &0x7f800000,%fp0 # return +INF 9879 rts 9880 9881# dz is enabled. create a dz exception so the user can record it 9882# but use fp1 instead. return the dst operand unscathed in fp0. 9883dz_pinf_ena: 9884 fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed 9885 fmov.l USER_FPCR(%a6),%fpcr 9886 fmov.s &0x3f800000,%fp1 # load +1 9887 fdiv.s &0x00000000,%fp1 # +1 / 0 9888 rts 9889 9890######################################################################### 9891# XDEF **************************************************************** # 9892# t_operr(): Handle 060FPLSP OPERR exception during emulation. # 9893# # 9894# This routine is used by the 060FPLSP package. # 9895# # 9896# XREF **************************************************************** # 9897# None. # 9898# # 9899# INPUT *************************************************************** # 9900# fp1 = source operand # 9901# # 9902# OUTPUT ************************************************************** # 9903# fp0 = default result # 9904# fp1 = unchanged # 9905# # 9906# ALGORITHM *********************************************************** # 9907# An operand error should occur as the result of transcendental # 9908# emulation in the 060FPLSP. If OPERR is disabled, just return a NAN # 9909# in fp0. If OPERR is enabled, return the dst operand unscathed in fp0 # 9910# and the source operand in fp1. Use fp2 to create an OPERR exception # 9911# so that the operating system can log the event. # 9912# # 9913######################################################################### 9914 9915 global t_operr 9916t_operr: 9917 ori.l &opnan_mask,USER_FPSR(%a6) # set NAN/OPERR/AIOP 9918 9919 btst &operr_bit,FPCR_ENABLE(%a6) 9920 bne.b operr_ena 9921 9922# operr is disabled. return a QNAN in fp0 9923 fmovm.x qnan(%pc),&0x80 # return QNAN 9924 rts 9925 9926# operr is enabled. create an operr exception so the user can record it 9927# but use fp2 instead. return the dst operand unscathed in fp0. 9928operr_ena: 9929 fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed 9930 fmov.l USER_FPCR(%a6),%fpcr 9931 fmovm.x &0x04,-(%sp) # save fp2 9932 fmov.s &0x7f800000,%fp2 # load +INF 9933 fmul.s &0x00000000,%fp2 # +INF x 0 9934 fmovm.x (%sp)+,&0x20 # restore fp2 9935 rts 9936 9937pls_huge: 9938 long 0x7ffe0000,0xffffffff,0xffffffff 9939mns_huge: 9940 long 0xfffe0000,0xffffffff,0xffffffff 9941pls_tiny: 9942 long 0x00000000,0x80000000,0x00000000 9943mns_tiny: 9944 long 0x80000000,0x80000000,0x00000000 9945 9946######################################################################### 9947# XDEF **************************************************************** # 9948# t_unfl(): Handle 060FPLSP underflow exception during emulation. # 9949# t_unfl2(): Handle 060FPLSP underflow exception during # 9950# emulation. result always positive. # 9951# # 9952# This routine is used by the 060FPLSP package. # 9953# # 9954# XREF **************************************************************** # 9955# None. # 9956# # 9957# INPUT *************************************************************** # 9958# a0 = pointer to extended precision source operand # 9959# # 9960# OUTPUT ************************************************************** # 9961# fp0 = default underflow result # 9962# # 9963# ALGORITHM *********************************************************** # 9964# An underflow should occur as the result of transcendental # 9965# emulation in the 060FPLSP. Create an underflow by using "fmul" # 9966# and two very small numbers of appropriate sign so the operating # 9967# system can log the event. # 9968# # 9969######################################################################### 9970 9971 global t_unfl 9972t_unfl: 9973 tst.b SRC_EX(%a0) 9974 bpl.b unf_pos 9975 9976 global t_unfl2 9977t_unfl2: 9978 ori.l &unfinx_mask+neg_mask,USER_FPSR(%a6) # set N/UNFL/INEX2/AUNFL/AINEX 9979 9980 fmov.l USER_FPCR(%a6),%fpcr 9981 fmovm.x mns_tiny(%pc),&0x80 9982 fmul.x pls_tiny(%pc),%fp0 9983 9984 fmov.l %fpsr,%d0 9985 rol.l &0x8,%d0 9986 mov.b %d0,FPSR_CC(%a6) 9987 rts 9988unf_pos: 9989 ori.w &unfinx_mask,FPSR_EXCEPT(%a6) # set UNFL/INEX2/AUNFL/AINEX 9990 9991 fmov.l USER_FPCR(%a6),%fpcr 9992 fmovm.x pls_tiny(%pc),&0x80 9993 fmul.x %fp0,%fp0 9994 9995 fmov.l %fpsr,%d0 9996 rol.l &0x8,%d0 9997 mov.b %d0,FPSR_CC(%a6) 9998 rts 9999 10000######################################################################### 10001# XDEF **************************************************************** # 10002# t_ovfl(): Handle 060FPLSP overflow exception during emulation. # 10003# (monadic) # 10004# t_ovfl2(): Handle 060FPLSP overflow exception during # 10005# emulation. result always positive. (dyadic) # 10006# t_ovfl_sc(): Handle 060FPLSP overflow exception during # 10007# emulation for "fscale". # 10008# # 10009# This routine is used by the 060FPLSP package. # 10010# # 10011# XREF **************************************************************** # 10012# None. # 10013# # 10014# INPUT *************************************************************** # 10015# a0 = pointer to extended precision source operand # 10016# # 10017# OUTPUT ************************************************************** # 10018# fp0 = default underflow result # 10019# # 10020# ALGORITHM *********************************************************** # 10021# An overflow should occur as the result of transcendental # 10022# emulation in the 060FPLSP. Create an overflow by using "fmul" # 10023# and two very lareg numbers of appropriate sign so the operating # 10024# system can log the event. # 10025# For t_ovfl_sc() we take special care not to lose the INEX2 bit. # 10026# # 10027######################################################################### 10028 10029 global t_ovfl_sc 10030t_ovfl_sc: 10031 ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX 10032 10033 mov.b %d0,%d1 # fetch rnd prec,mode 10034 andi.b &0xc0,%d1 # extract prec 10035 beq.w ovfl_work 10036 10037# dst op is a DENORM. we have to normalize the mantissa to see if the 10038# result would be inexact for the given precision. make a copy of the 10039# dst so we don't screw up the version passed to us. 10040 mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6) 10041 mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6) 10042 mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6) 10043 lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0 10044 movm.l &0xc080,-(%sp) # save d0-d1/a0 10045 bsr.l norm # normalize mantissa 10046 movm.l (%sp)+,&0x0103 # restore d0-d1/a0 10047 10048 cmpi.b %d1,&0x40 # is precision sgl? 10049 bne.b ovfl_sc_dbl # no; dbl 10050ovfl_sc_sgl: 10051 tst.l LOCAL_LO(%a0) # is lo lw of sgl set? 10052 bne.b ovfl_sc_inx # yes 10053 tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set? 10054 bne.b ovfl_sc_inx # yes 10055 bra.w ovfl_work # don't set INEX2 10056ovfl_sc_dbl: 10057 mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of 10058 andi.l &0x7ff,%d1 # dbl mantissa set? 10059 beq.w ovfl_work # no; don't set INEX2 10060ovfl_sc_inx: 10061 ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2 10062 bra.b ovfl_work # continue 10063 10064 global t_ovfl 10065t_ovfl: 10066 ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX 10067ovfl_work: 10068 tst.b SRC_EX(%a0) 10069 bpl.b ovfl_p 10070ovfl_m: 10071 fmov.l USER_FPCR(%a6),%fpcr 10072 fmovm.x mns_huge(%pc),&0x80 10073 fmul.x pls_huge(%pc),%fp0 10074 10075 fmov.l %fpsr,%d0 10076 rol.l &0x8,%d0 10077 ori.b &neg_mask,%d0 10078 mov.b %d0,FPSR_CC(%a6) 10079 rts 10080ovfl_p: 10081 fmov.l USER_FPCR(%a6),%fpcr 10082 fmovm.x pls_huge(%pc),&0x80 10083 fmul.x pls_huge(%pc),%fp0 10084 10085 fmov.l %fpsr,%d0 10086 rol.l &0x8,%d0 10087 mov.b %d0,FPSR_CC(%a6) 10088 rts 10089 10090 global t_ovfl2 10091t_ovfl2: 10092 ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX 10093 fmov.l USER_FPCR(%a6),%fpcr 10094 fmovm.x pls_huge(%pc),&0x80 10095 fmul.x pls_huge(%pc),%fp0 10096 10097 fmov.l %fpsr,%d0 10098 rol.l &0x8,%d0 10099 mov.b %d0,FPSR_CC(%a6) 10100 rts 10101 10102######################################################################### 10103# XDEF **************************************************************** # 10104# t_catch(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during # 10105# emulation. # 10106# t_catch2(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during # 10107# emulation. # 10108# # 10109# These routines are used by the 060FPLSP package. # 10110# # 10111# XREF **************************************************************** # 10112# None. # 10113# # 10114# INPUT *************************************************************** # 10115# fp0 = default underflow or overflow result # 10116# # 10117# OUTPUT ************************************************************** # 10118# fp0 = default result # 10119# # 10120# ALGORITHM *********************************************************** # 10121# If an overflow or underflow occurred during the last # 10122# instruction of transcendental 060FPLSP emulation, then it has already # 10123# occurred and has been logged. Now we need to see if an inexact # 10124# exception should occur. # 10125# # 10126######################################################################### 10127 10128 global t_catch2 10129t_catch2: 10130 fmov.l %fpsr,%d0 10131 or.l %d0,USER_FPSR(%a6) 10132 bra.b inx2_work 10133 10134 global t_catch 10135t_catch: 10136 fmov.l %fpsr,%d0 10137 or.l %d0,USER_FPSR(%a6) 10138 10139######################################################################### 10140# XDEF **************************************************************** # 10141# t_inx2(): Handle inexact 060FPLSP exception during emulation. # 10142# t_pinx2(): Handle inexact 060FPLSP exception for "+" results. # 10143# t_minx2(): Handle inexact 060FPLSP exception for "-" results. # 10144# # 10145# XREF **************************************************************** # 10146# None. # 10147# # 10148# INPUT *************************************************************** # 10149# fp0 = default result # 10150# # 10151# OUTPUT ************************************************************** # 10152# fp0 = default result # 10153# # 10154# ALGORITHM *********************************************************** # 10155# The last instruction of transcendental emulation for the # 10156# 060FPLSP should be inexact. So, if inexact is enabled, then we create # 10157# the event here by adding a large and very small number together # 10158# so that the operating system can log the event. # 10159# Must check, too, if the result was zero, in which case we just # 10160# set the FPSR bits and return. # 10161# # 10162######################################################################### 10163 10164 global t_inx2 10165t_inx2: 10166 fblt.w t_minx2 10167 fbeq.w inx2_zero 10168 10169 global t_pinx2 10170t_pinx2: 10171 ori.w &inx2a_mask,FPSR_EXCEPT(%a6) # set INEX2/AINEX 10172 bra.b inx2_work 10173 10174 global t_minx2 10175t_minx2: 10176 ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6) 10177 10178inx2_work: 10179 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled? 10180 bne.b inx2_work_ena # yes 10181 rts 10182inx2_work_ena: 10183 fmov.l USER_FPCR(%a6),%fpcr # insert user's exceptions 10184 fmov.s &0x3f800000,%fp1 # load +1 10185 fadd.x pls_tiny(%pc),%fp1 # cause exception 10186 rts 10187 10188inx2_zero: 10189 mov.b &z_bmask,FPSR_CC(%a6) 10190 ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX/AINEX 10191 rts 10192 10193######################################################################### 10194# XDEF **************************************************************** # 10195# t_extdnrm(): Handle DENORM inputs in 060FPLSP. # 10196# t_resdnrm(): Handle DENORM inputs in 060FPLSP for "fscale". # 10197# # 10198# This routine is used by the 060FPLSP package. # 10199# # 10200# XREF **************************************************************** # 10201# None. # 10202# # 10203# INPUT *************************************************************** # 10204# a0 = pointer to extended precision input operand # 10205# # 10206# OUTPUT ************************************************************** # 10207# fp0 = default result # 10208# # 10209# ALGORITHM *********************************************************** # 10210# For all functions that have a denormalized input and that # 10211# f(x)=x, this is the entry point. # 10212# DENORM value is moved using "fmove" which triggers an exception # 10213# if enabled so the operating system can log the event. # 10214# # 10215######################################################################### 10216 10217 global t_extdnrm 10218t_extdnrm: 10219 fmov.l USER_FPCR(%a6),%fpcr 10220 fmov.x SRC_EX(%a0),%fp0 10221 fmov.l %fpsr,%d0 10222 ori.l &unfinx_mask,%d0 10223 or.l %d0,USER_FPSR(%a6) 10224 rts 10225 10226 global t_resdnrm 10227t_resdnrm: 10228 fmov.l USER_FPCR(%a6),%fpcr 10229 fmov.x SRC_EX(%a0),%fp0 10230 fmov.l %fpsr,%d0 10231 or.l %d0,USER_FPSR(%a6) 10232 rts 10233 10234########################################## 10235 10236# 10237# sto_cos: 10238# This is used by fsincos library emulation. The correct 10239# values are already in fp0 and fp1 so we do nothing here. 10240# 10241 global sto_cos 10242sto_cos: 10243 rts 10244 10245########################################## 10246 10247# 10248# dst_qnan --- force result when destination is a NaN 10249# 10250 global dst_qnan 10251dst_qnan: 10252 fmov.x DST(%a1),%fp0 10253 tst.b DST_EX(%a1) 10254 bmi.b dst_qnan_m 10255dst_qnan_p: 10256 mov.b &nan_bmask,FPSR_CC(%a6) 10257 rts 10258dst_qnan_m: 10259 mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6) 10260 rts 10261 10262# 10263# src_qnan --- force result when source is a NaN 10264# 10265 global src_qnan 10266src_qnan: 10267 fmov.x SRC(%a0),%fp0 10268 tst.b SRC_EX(%a0) 10269 bmi.b src_qnan_m 10270src_qnan_p: 10271 mov.b &nan_bmask,FPSR_CC(%a6) 10272 rts 10273src_qnan_m: 10274 mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6) 10275 rts 10276 10277########################################## 10278 10279# 10280# Native instruction support 10281# 10282# Some systems may need entry points even for 68060 native 10283# instructions. These routines are provided for 10284# convenience. 10285# 10286 global _fadds_ 10287_fadds_: 10288 fmov.l %fpcr,-(%sp) # save fpcr 10289 fmov.l &0x00000000,%fpcr # clear fpcr for load 10290 fmov.s 0x8(%sp),%fp0 # load sgl dst 10291 fmov.l (%sp)+,%fpcr # restore fpcr 10292 fadd.s 0x8(%sp),%fp0 # fadd w/ sgl src 10293 rts 10294 10295 global _faddd_ 10296_faddd_: 10297 fmov.l %fpcr,-(%sp) # save fpcr 10298 fmov.l &0x00000000,%fpcr # clear fpcr for load 10299 fmov.d 0x8(%sp),%fp0 # load dbl dst 10300 fmov.l (%sp)+,%fpcr # restore fpcr 10301 fadd.d 0xc(%sp),%fp0 # fadd w/ dbl src 10302 rts 10303 10304 global _faddx_ 10305_faddx_: 10306 fmovm.x 0x4(%sp),&0x80 # load ext dst 10307 fadd.x 0x10(%sp),%fp0 # fadd w/ ext src 10308 rts 10309 10310 global _fsubs_ 10311_fsubs_: 10312 fmov.l %fpcr,-(%sp) # save fpcr 10313 fmov.l &0x00000000,%fpcr # clear fpcr for load 10314 fmov.s 0x8(%sp),%fp0 # load sgl dst 10315 fmov.l (%sp)+,%fpcr # restore fpcr 10316 fsub.s 0x8(%sp),%fp0 # fsub w/ sgl src 10317 rts 10318 10319 global _fsubd_ 10320_fsubd_: 10321 fmov.l %fpcr,-(%sp) # save fpcr 10322 fmov.l &0x00000000,%fpcr # clear fpcr for load 10323 fmov.d 0x8(%sp),%fp0 # load dbl dst 10324 fmov.l (%sp)+,%fpcr # restore fpcr 10325 fsub.d 0xc(%sp),%fp0 # fsub w/ dbl src 10326 rts 10327 10328 global _fsubx_ 10329_fsubx_: 10330 fmovm.x 0x4(%sp),&0x80 # load ext dst 10331 fsub.x 0x10(%sp),%fp0 # fsub w/ ext src 10332 rts 10333 10334 global _fmuls_ 10335_fmuls_: 10336 fmov.l %fpcr,-(%sp) # save fpcr 10337 fmov.l &0x00000000,%fpcr # clear fpcr for load 10338 fmov.s 0x8(%sp),%fp0 # load sgl dst 10339 fmov.l (%sp)+,%fpcr # restore fpcr 10340 fmul.s 0x8(%sp),%fp0 # fmul w/ sgl src 10341 rts 10342 10343 global _fmuld_ 10344_fmuld_: 10345 fmov.l %fpcr,-(%sp) # save fpcr 10346 fmov.l &0x00000000,%fpcr # clear fpcr for load 10347 fmov.d 0x8(%sp),%fp0 # load dbl dst 10348 fmov.l (%sp)+,%fpcr # restore fpcr 10349 fmul.d 0xc(%sp),%fp0 # fmul w/ dbl src 10350 rts 10351 10352 global _fmulx_ 10353_fmulx_: 10354 fmovm.x 0x4(%sp),&0x80 # load ext dst 10355 fmul.x 0x10(%sp),%fp0 # fmul w/ ext src 10356 rts 10357 10358 global _fdivs_ 10359_fdivs_: 10360 fmov.l %fpcr,-(%sp) # save fpcr 10361 fmov.l &0x00000000,%fpcr # clear fpcr for load 10362 fmov.s 0x8(%sp),%fp0 # load sgl dst 10363 fmov.l (%sp)+,%fpcr # restore fpcr 10364 fdiv.s 0x8(%sp),%fp0 # fdiv w/ sgl src 10365 rts 10366 10367 global _fdivd_ 10368_fdivd_: 10369 fmov.l %fpcr,-(%sp) # save fpcr 10370 fmov.l &0x00000000,%fpcr # clear fpcr for load 10371 fmov.d 0x8(%sp),%fp0 # load dbl dst 10372 fmov.l (%sp)+,%fpcr # restore fpcr 10373 fdiv.d 0xc(%sp),%fp0 # fdiv w/ dbl src 10374 rts 10375 10376 global _fdivx_ 10377_fdivx_: 10378 fmovm.x 0x4(%sp),&0x80 # load ext dst 10379 fdiv.x 0x10(%sp),%fp0 # fdiv w/ ext src 10380 rts 10381 10382 global _fabss_ 10383_fabss_: 10384 fabs.s 0x4(%sp),%fp0 # fabs w/ sgl src 10385 rts 10386 10387 global _fabsd_ 10388_fabsd_: 10389 fabs.d 0x4(%sp),%fp0 # fabs w/ dbl src 10390 rts 10391 10392 global _fabsx_ 10393_fabsx_: 10394 fabs.x 0x4(%sp),%fp0 # fabs w/ ext src 10395 rts 10396 10397 global _fnegs_ 10398_fnegs_: 10399 fneg.s 0x4(%sp),%fp0 # fneg w/ sgl src 10400 rts 10401 10402 global _fnegd_ 10403_fnegd_: 10404 fneg.d 0x4(%sp),%fp0 # fneg w/ dbl src 10405 rts 10406 10407 global _fnegx_ 10408_fnegx_: 10409 fneg.x 0x4(%sp),%fp0 # fneg w/ ext src 10410 rts 10411 10412 global _fsqrts_ 10413_fsqrts_: 10414 fsqrt.s 0x4(%sp),%fp0 # fsqrt w/ sgl src 10415 rts 10416 10417 global _fsqrtd_ 10418_fsqrtd_: 10419 fsqrt.d 0x4(%sp),%fp0 # fsqrt w/ dbl src 10420 rts 10421 10422 global _fsqrtx_ 10423_fsqrtx_: 10424 fsqrt.x 0x4(%sp),%fp0 # fsqrt w/ ext src 10425 rts 10426 10427 global _fints_ 10428_fints_: 10429 fint.s 0x4(%sp),%fp0 # fint w/ sgl src 10430 rts 10431 10432 global _fintd_ 10433_fintd_: 10434 fint.d 0x4(%sp),%fp0 # fint w/ dbl src 10435 rts 10436 10437 global _fintx_ 10438_fintx_: 10439 fint.x 0x4(%sp),%fp0 # fint w/ ext src 10440 rts 10441 10442 global _fintrzs_ 10443_fintrzs_: 10444 fintrz.s 0x4(%sp),%fp0 # fintrz w/ sgl src 10445 rts 10446 10447 global _fintrzd_ 10448_fintrzd_: 10449 fintrz.d 0x4(%sp),%fp0 # fintrx w/ dbl src 10450 rts 10451 10452 global _fintrzx_ 10453_fintrzx_: 10454 fintrz.x 0x4(%sp),%fp0 # fintrz w/ ext src 10455 rts 10456 10457######################################################################## 10458 10459######################################################################### 10460# src_zero(): Return signed zero according to sign of src operand. # 10461######################################################################### 10462 global src_zero 10463src_zero: 10464 tst.b SRC_EX(%a0) # get sign of src operand 10465 bmi.b ld_mzero # if neg, load neg zero 10466 10467# 10468# ld_pzero(): return a positive zero. 10469# 10470 global ld_pzero 10471ld_pzero: 10472 fmov.s &0x00000000,%fp0 # load +0 10473 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 10474 rts 10475 10476# ld_mzero(): return a negative zero. 10477 global ld_mzero 10478ld_mzero: 10479 fmov.s &0x80000000,%fp0 # load -0 10480 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits 10481 rts 10482 10483######################################################################### 10484# dst_zero(): Return signed zero according to sign of dst operand. # 10485######################################################################### 10486 global dst_zero 10487dst_zero: 10488 tst.b DST_EX(%a1) # get sign of dst operand 10489 bmi.b ld_mzero # if neg, load neg zero 10490 bra.b ld_pzero # load positive zero 10491 10492######################################################################### 10493# src_inf(): Return signed inf according to sign of src operand. # 10494######################################################################### 10495 global src_inf 10496src_inf: 10497 tst.b SRC_EX(%a0) # get sign of src operand 10498 bmi.b ld_minf # if negative branch 10499 10500# 10501# ld_pinf(): return a positive infinity. 10502# 10503 global ld_pinf 10504ld_pinf: 10505 fmov.s &0x7f800000,%fp0 # load +INF 10506 mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit 10507 rts 10508 10509# 10510# ld_minf():return a negative infinity. 10511# 10512 global ld_minf 10513ld_minf: 10514 fmov.s &0xff800000,%fp0 # load -INF 10515 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits 10516 rts 10517 10518######################################################################### 10519# dst_inf(): Return signed inf according to sign of dst operand. # 10520######################################################################### 10521 global dst_inf 10522dst_inf: 10523 tst.b DST_EX(%a1) # get sign of dst operand 10524 bmi.b ld_minf # if negative branch 10525 bra.b ld_pinf 10526 10527 global szr_inf 10528################################################################# 10529# szr_inf(): Return +ZERO for a negative src operand or # 10530# +INF for a positive src operand. # 10531# Routine used for fetox, ftwotox, and ftentox. # 10532################################################################# 10533szr_inf: 10534 tst.b SRC_EX(%a0) # check sign of source 10535 bmi.b ld_pzero 10536 bra.b ld_pinf 10537 10538######################################################################### 10539# sopr_inf(): Return +INF for a positive src operand or # 10540# jump to operand error routine for a negative src operand. # 10541# Routine used for flogn, flognp1, flog10, and flog2. # 10542######################################################################### 10543 global sopr_inf 10544sopr_inf: 10545 tst.b SRC_EX(%a0) # check sign of source 10546 bmi.w t_operr 10547 bra.b ld_pinf 10548 10549################################################################# 10550# setoxm1i(): Return minus one for a negative src operand or # 10551# positive infinity for a positive src operand. # 10552# Routine used for fetoxm1. # 10553################################################################# 10554 global setoxm1i 10555setoxm1i: 10556 tst.b SRC_EX(%a0) # check sign of source 10557 bmi.b ld_mone 10558 bra.b ld_pinf 10559 10560######################################################################### 10561# src_one(): Return signed one according to sign of src operand. # 10562######################################################################### 10563 global src_one 10564src_one: 10565 tst.b SRC_EX(%a0) # check sign of source 10566 bmi.b ld_mone 10567 10568# 10569# ld_pone(): return positive one. 10570# 10571 global ld_pone 10572ld_pone: 10573 fmov.s &0x3f800000,%fp0 # load +1 10574 clr.b FPSR_CC(%a6) 10575 rts 10576 10577# 10578# ld_mone(): return negative one. 10579# 10580 global ld_mone 10581ld_mone: 10582 fmov.s &0xbf800000,%fp0 # load -1 10583 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 10584 rts 10585 10586ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235 10587mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235 10588 10589################################################################# 10590# spi_2(): Return signed PI/2 according to sign of src operand. # 10591################################################################# 10592 global spi_2 10593spi_2: 10594 tst.b SRC_EX(%a0) # check sign of source 10595 bmi.b ld_mpi2 10596 10597# 10598# ld_ppi2(): return positive PI/2. 10599# 10600 global ld_ppi2 10601ld_ppi2: 10602 fmov.l %d0,%fpcr 10603 fmov.x ppiby2(%pc),%fp0 # load +pi/2 10604 bra.w t_pinx2 # set INEX2 10605 10606# 10607# ld_mpi2(): return negative PI/2. 10608# 10609 global ld_mpi2 10610ld_mpi2: 10611 fmov.l %d0,%fpcr 10612 fmov.x mpiby2(%pc),%fp0 # load -pi/2 10613 bra.w t_minx2 # set INEX2 10614 10615#################################################### 10616# The following routines give support for fsincos. # 10617#################################################### 10618 10619# 10620# ssincosz(): When the src operand is ZERO, store a one in the 10621# cosine register and return a ZERO in fp0 w/ the same sign 10622# as the src operand. 10623# 10624 global ssincosz 10625ssincosz: 10626 fmov.s &0x3f800000,%fp1 10627 tst.b SRC_EX(%a0) # test sign 10628 bpl.b sincoszp 10629 fmov.s &0x80000000,%fp0 # return sin result in fp0 10630 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) 10631 rts 10632sincoszp: 10633 fmov.s &0x00000000,%fp0 # return sin result in fp0 10634 mov.b &z_bmask,FPSR_CC(%a6) 10635 rts 10636 10637# 10638# ssincosi(): When the src operand is INF, store a QNAN in the cosine 10639# register and jump to the operand error routine for negative 10640# src operands. 10641# 10642 global ssincosi 10643ssincosi: 10644 fmov.x qnan(%pc),%fp1 # load NAN 10645 bra.w t_operr 10646 10647# 10648# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine 10649# register and branch to the src QNAN routine. 10650# 10651 global ssincosqnan 10652ssincosqnan: 10653 fmov.x LOCAL_EX(%a0),%fp1 10654 bra.w src_qnan 10655 10656######################################################################## 10657 10658 global smod_sdnrm 10659 global smod_snorm 10660smod_sdnrm: 10661smod_snorm: 10662 mov.b DTAG(%a6),%d1 10663 beq.l smod 10664 cmpi.b %d1,&ZERO 10665 beq.w smod_zro 10666 cmpi.b %d1,&INF 10667 beq.l t_operr 10668 cmpi.b %d1,&DENORM 10669 beq.l smod 10670 bra.l dst_qnan 10671 10672 global smod_szero 10673smod_szero: 10674 mov.b DTAG(%a6),%d1 10675 beq.l t_operr 10676 cmpi.b %d1,&ZERO 10677 beq.l t_operr 10678 cmpi.b %d1,&INF 10679 beq.l t_operr 10680 cmpi.b %d1,&DENORM 10681 beq.l t_operr 10682 bra.l dst_qnan 10683 10684 global smod_sinf 10685smod_sinf: 10686 mov.b DTAG(%a6),%d1 10687 beq.l smod_fpn 10688 cmpi.b %d1,&ZERO 10689 beq.l smod_zro 10690 cmpi.b %d1,&INF 10691 beq.l t_operr 10692 cmpi.b %d1,&DENORM 10693 beq.l smod_fpn 10694 bra.l dst_qnan 10695 10696smod_zro: 10697srem_zro: 10698 mov.b SRC_EX(%a0),%d1 # get src sign 10699 mov.b DST_EX(%a1),%d0 # get dst sign 10700 eor.b %d0,%d1 # get qbyte sign 10701 andi.b &0x80,%d1 10702 mov.b %d1,FPSR_QBYTE(%a6) 10703 tst.b %d0 10704 bpl.w ld_pzero 10705 bra.w ld_mzero 10706 10707smod_fpn: 10708srem_fpn: 10709 clr.b FPSR_QBYTE(%a6) 10710 mov.l %d0,-(%sp) 10711 mov.b SRC_EX(%a0),%d1 # get src sign 10712 mov.b DST_EX(%a1),%d0 # get dst sign 10713 eor.b %d0,%d1 # get qbyte sign 10714 andi.b &0x80,%d1 10715 mov.b %d1,FPSR_QBYTE(%a6) 10716 cmpi.b DTAG(%a6),&DENORM 10717 bne.b smod_nrm 10718 lea DST(%a1),%a0 10719 mov.l (%sp)+,%d0 10720 bra t_resdnrm 10721smod_nrm: 10722 fmov.l (%sp)+,%fpcr 10723 fmov.x DST(%a1),%fp0 10724 tst.b DST_EX(%a1) 10725 bmi.b smod_nrm_neg 10726 rts 10727 10728smod_nrm_neg: 10729 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' code 10730 rts 10731 10732######################################################################### 10733 global srem_snorm 10734 global srem_sdnrm 10735srem_sdnrm: 10736srem_snorm: 10737 mov.b DTAG(%a6),%d1 10738 beq.l srem 10739 cmpi.b %d1,&ZERO 10740 beq.w srem_zro 10741 cmpi.b %d1,&INF 10742 beq.l t_operr 10743 cmpi.b %d1,&DENORM 10744 beq.l srem 10745 bra.l dst_qnan 10746 10747 global srem_szero 10748srem_szero: 10749 mov.b DTAG(%a6),%d1 10750 beq.l t_operr 10751 cmpi.b %d1,&ZERO 10752 beq.l t_operr 10753 cmpi.b %d1,&INF 10754 beq.l t_operr 10755 cmpi.b %d1,&DENORM 10756 beq.l t_operr 10757 bra.l dst_qnan 10758 10759 global srem_sinf 10760srem_sinf: 10761 mov.b DTAG(%a6),%d1 10762 beq.w srem_fpn 10763 cmpi.b %d1,&ZERO 10764 beq.w srem_zro 10765 cmpi.b %d1,&INF 10766 beq.l t_operr 10767 cmpi.b %d1,&DENORM 10768 beq.l srem_fpn 10769 bra.l dst_qnan 10770 10771######################################################################### 10772 10773 global sscale_snorm 10774 global sscale_sdnrm 10775sscale_snorm: 10776sscale_sdnrm: 10777 mov.b DTAG(%a6),%d1 10778 beq.l sscale 10779 cmpi.b %d1,&ZERO 10780 beq.l dst_zero 10781 cmpi.b %d1,&INF 10782 beq.l dst_inf 10783 cmpi.b %d1,&DENORM 10784 beq.l sscale 10785 bra.l dst_qnan 10786 10787 global sscale_szero 10788sscale_szero: 10789 mov.b DTAG(%a6),%d1 10790 beq.l sscale 10791 cmpi.b %d1,&ZERO 10792 beq.l dst_zero 10793 cmpi.b %d1,&INF 10794 beq.l dst_inf 10795 cmpi.b %d1,&DENORM 10796 beq.l sscale 10797 bra.l dst_qnan 10798 10799 global sscale_sinf 10800sscale_sinf: 10801 mov.b DTAG(%a6),%d1 10802 beq.l t_operr 10803 cmpi.b %d1,&QNAN 10804 beq.l dst_qnan 10805 bra.l t_operr 10806 10807######################################################################## 10808 10809 global sop_sqnan 10810sop_sqnan: 10811 mov.b DTAG(%a6),%d1 10812 cmpi.b %d1,&QNAN 10813 beq.l dst_qnan 10814 bra.l src_qnan 10815 10816######################################################################### 10817# norm(): normalize the mantissa of an extended precision input. the # 10818# input operand should not be normalized already. # 10819# # 10820# XDEF **************************************************************** # 10821# norm() # 10822# # 10823# XREF **************************************************************** # 10824# none # 10825# # 10826# INPUT *************************************************************** # 10827# a0 = pointer fp extended precision operand to normalize # 10828# # 10829# OUTPUT ************************************************************** # 10830# d0 = number of bit positions the mantissa was shifted # 10831# a0 = the input operand's mantissa is normalized; the exponent # 10832# is unchanged. # 10833# # 10834######################################################################### 10835 global norm 10836norm: 10837 mov.l %d2, -(%sp) # create some temp regs 10838 mov.l %d3, -(%sp) 10839 10840 mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa) 10841 mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa) 10842 10843 bfffo %d0{&0:&32}, %d2 # how many places to shift? 10844 beq.b norm_lo # hi(man) is all zeroes! 10845 10846norm_hi: 10847 lsl.l %d2, %d0 # left shift hi(man) 10848 bfextu %d1{&0:%d2}, %d3 # extract lo bits 10849 10850 or.l %d3, %d0 # create hi(man) 10851 lsl.l %d2, %d1 # create lo(man) 10852 10853 mov.l %d0, FTEMP_HI(%a0) # store new hi(man) 10854 mov.l %d1, FTEMP_LO(%a0) # store new lo(man) 10855 10856 mov.l %d2, %d0 # return shift amount 10857 10858 mov.l (%sp)+, %d3 # restore temp regs 10859 mov.l (%sp)+, %d2 10860 10861 rts 10862 10863norm_lo: 10864 bfffo %d1{&0:&32}, %d2 # how many places to shift? 10865 lsl.l %d2, %d1 # shift lo(man) 10866 add.l &32, %d2 # add 32 to shft amount 10867 10868 mov.l %d1, FTEMP_HI(%a0) # store hi(man) 10869 clr.l FTEMP_LO(%a0) # lo(man) is now zero 10870 10871 mov.l %d2, %d0 # return shift amount 10872 10873 mov.l (%sp)+, %d3 # restore temp regs 10874 mov.l (%sp)+, %d2 10875 10876 rts 10877 10878######################################################################### 10879# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO # 10880# - returns corresponding optype tag # 10881# # 10882# XDEF **************************************************************** # 10883# unnorm_fix() # 10884# # 10885# XREF **************************************************************** # 10886# norm() - normalize the mantissa # 10887# # 10888# INPUT *************************************************************** # 10889# a0 = pointer to unnormalized extended precision number # 10890# # 10891# OUTPUT ************************************************************** # 10892# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO # 10893# a0 = input operand has been converted to a norm, denorm, or # 10894# zero; both the exponent and mantissa are changed. # 10895# # 10896######################################################################### 10897 10898 global unnorm_fix 10899unnorm_fix: 10900 bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed? 10901 bne.b unnorm_shift # hi(man) is not all zeroes 10902 10903# 10904# hi(man) is all zeroes so see if any bits in lo(man) are set 10905# 10906unnorm_chk_lo: 10907 bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero? 10908 beq.w unnorm_zero # yes 10909 10910 add.w &32, %d0 # no; fix shift distance 10911 10912# 10913# d0 = # shifts needed for complete normalization 10914# 10915unnorm_shift: 10916 clr.l %d1 # clear top word 10917 mov.w FTEMP_EX(%a0), %d1 # extract exponent 10918 and.w &0x7fff, %d1 # strip off sgn 10919 10920 cmp.w %d0, %d1 # will denorm push exp < 0? 10921 bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0 10922 10923# 10924# exponent would not go < 0. therefore, number stays normalized 10925# 10926 sub.w %d0, %d1 # shift exponent value 10927 mov.w FTEMP_EX(%a0), %d0 # load old exponent 10928 and.w &0x8000, %d0 # save old sign 10929 or.w %d0, %d1 # {sgn,new exp} 10930 mov.w %d1, FTEMP_EX(%a0) # insert new exponent 10931 10932 bsr.l norm # normalize UNNORM 10933 10934 mov.b &NORM, %d0 # return new optype tag 10935 rts 10936 10937# 10938# exponent would go < 0, so only denormalize until exp = 0 10939# 10940unnorm_nrm_zero: 10941 cmp.b %d1, &32 # is exp <= 32? 10942 bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent 10943 10944 bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man) 10945 mov.l %d0, FTEMP_HI(%a0) # save new hi(man) 10946 10947 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) 10948 lsl.l %d1, %d0 # extract new lo(man) 10949 mov.l %d0, FTEMP_LO(%a0) # save new lo(man) 10950 10951 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 10952 10953 mov.b &DENORM, %d0 # return new optype tag 10954 rts 10955 10956# 10957# only mantissa bits set are in lo(man) 10958# 10959unnorm_nrm_zero_lrg: 10960 sub.w &32, %d1 # adjust shft amt by 32 10961 10962 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) 10963 lsl.l %d1, %d0 # left shift lo(man) 10964 10965 mov.l %d0, FTEMP_HI(%a0) # store new hi(man) 10966 clr.l FTEMP_LO(%a0) # lo(man) = 0 10967 10968 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 10969 10970 mov.b &DENORM, %d0 # return new optype tag 10971 rts 10972 10973# 10974# whole mantissa is zero so this UNNORM is actually a zero 10975# 10976unnorm_zero: 10977 and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero 10978 10979 mov.b &ZERO, %d0 # fix optype tag 10980 rts