fpsp.S (761577B)
1~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP 3M68000 Hi-Performance Microprocessor Division 4M68060 Software Package 5Production Release P1.00 -- October 10, 1994 6 7M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. 8 9THE SOFTWARE is provided on an "AS IS" basis and without warranty. 10To the maximum extent permitted by applicable law, 11MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, 12INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE 13and any warranty against infringement with regard to the SOFTWARE 14(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. 15 16To the maximum extent permitted by applicable law, 17IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER 18(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, 19BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) 20ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. 21Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. 22 23You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE 24so long as this entire notice is retained without alteration in any modified and/or 25redistributed versions, and that such modified versions are clearly identified as such. 26No licenses are granted by implication, estoppel or otherwise under any patents 27or trademarks of Motorola, Inc. 28~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29# 30# freal.s: 31# This file is appended to the top of the 060FPSP package 32# and contains the entry points into the package. The user, in 33# effect, branches to one of the branch table entries located 34# after _060FPSP_TABLE. 35# Also, subroutine stubs exist in this file (_fpsp_done for 36# example) that are referenced by the FPSP package itself in order 37# to call a given routine. The stub routine actually performs the 38# callout. The FPSP code does a "bsr" to the stub routine. This 39# extra layer of hierarchy adds a slight performance penalty but 40# it makes the FPSP code easier to read and more mainatinable. 41# 42 43set _off_bsun, 0x00 44set _off_snan, 0x04 45set _off_operr, 0x08 46set _off_ovfl, 0x0c 47set _off_unfl, 0x10 48set _off_dz, 0x14 49set _off_inex, 0x18 50set _off_fline, 0x1c 51set _off_fpu_dis, 0x20 52set _off_trap, 0x24 53set _off_trace, 0x28 54set _off_access, 0x2c 55set _off_done, 0x30 56 57set _off_imr, 0x40 58set _off_dmr, 0x44 59set _off_dmw, 0x48 60set _off_irw, 0x4c 61set _off_irl, 0x50 62set _off_drb, 0x54 63set _off_drw, 0x58 64set _off_drl, 0x5c 65set _off_dwb, 0x60 66set _off_dww, 0x64 67set _off_dwl, 0x68 68 69_060FPSP_TABLE: 70 71############################################################### 72 73# Here's the table of ENTRY POINTS for those linking the package. 74 bra.l _fpsp_snan 75 short 0x0000 76 bra.l _fpsp_operr 77 short 0x0000 78 bra.l _fpsp_ovfl 79 short 0x0000 80 bra.l _fpsp_unfl 81 short 0x0000 82 bra.l _fpsp_dz 83 short 0x0000 84 bra.l _fpsp_inex 85 short 0x0000 86 bra.l _fpsp_fline 87 short 0x0000 88 bra.l _fpsp_unsupp 89 short 0x0000 90 bra.l _fpsp_effadd 91 short 0x0000 92 93 space 56 94 95############################################################### 96 global _fpsp_done 97_fpsp_done: 98 mov.l %d0,-(%sp) 99 mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0 100 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 101 mov.l 0x4(%sp),%d0 102 rtd &0x4 103 104 global _real_ovfl 105_real_ovfl: 106 mov.l %d0,-(%sp) 107 mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0 108 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 109 mov.l 0x4(%sp),%d0 110 rtd &0x4 111 112 global _real_unfl 113_real_unfl: 114 mov.l %d0,-(%sp) 115 mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0 116 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 117 mov.l 0x4(%sp),%d0 118 rtd &0x4 119 120 global _real_inex 121_real_inex: 122 mov.l %d0,-(%sp) 123 mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0 124 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 125 mov.l 0x4(%sp),%d0 126 rtd &0x4 127 128 global _real_bsun 129_real_bsun: 130 mov.l %d0,-(%sp) 131 mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0 132 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 133 mov.l 0x4(%sp),%d0 134 rtd &0x4 135 136 global _real_operr 137_real_operr: 138 mov.l %d0,-(%sp) 139 mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0 140 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 141 mov.l 0x4(%sp),%d0 142 rtd &0x4 143 144 global _real_snan 145_real_snan: 146 mov.l %d0,-(%sp) 147 mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0 148 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 149 mov.l 0x4(%sp),%d0 150 rtd &0x4 151 152 global _real_dz 153_real_dz: 154 mov.l %d0,-(%sp) 155 mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0 156 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 157 mov.l 0x4(%sp),%d0 158 rtd &0x4 159 160 global _real_fline 161_real_fline: 162 mov.l %d0,-(%sp) 163 mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0 164 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 165 mov.l 0x4(%sp),%d0 166 rtd &0x4 167 168 global _real_fpu_disabled 169_real_fpu_disabled: 170 mov.l %d0,-(%sp) 171 mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0 172 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 173 mov.l 0x4(%sp),%d0 174 rtd &0x4 175 176 global _real_trap 177_real_trap: 178 mov.l %d0,-(%sp) 179 mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0 180 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 181 mov.l 0x4(%sp),%d0 182 rtd &0x4 183 184 global _real_trace 185_real_trace: 186 mov.l %d0,-(%sp) 187 mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0 188 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 189 mov.l 0x4(%sp),%d0 190 rtd &0x4 191 192 global _real_access 193_real_access: 194 mov.l %d0,-(%sp) 195 mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0 196 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 197 mov.l 0x4(%sp),%d0 198 rtd &0x4 199 200####################################### 201 202 global _imem_read 203_imem_read: 204 mov.l %d0,-(%sp) 205 mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0 206 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 207 mov.l 0x4(%sp),%d0 208 rtd &0x4 209 210 global _dmem_read 211_dmem_read: 212 mov.l %d0,-(%sp) 213 mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0 214 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 215 mov.l 0x4(%sp),%d0 216 rtd &0x4 217 218 global _dmem_write 219_dmem_write: 220 mov.l %d0,-(%sp) 221 mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0 222 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 223 mov.l 0x4(%sp),%d0 224 rtd &0x4 225 226 global _imem_read_word 227_imem_read_word: 228 mov.l %d0,-(%sp) 229 mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0 230 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 231 mov.l 0x4(%sp),%d0 232 rtd &0x4 233 234 global _imem_read_long 235_imem_read_long: 236 mov.l %d0,-(%sp) 237 mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0 238 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 239 mov.l 0x4(%sp),%d0 240 rtd &0x4 241 242 global _dmem_read_byte 243_dmem_read_byte: 244 mov.l %d0,-(%sp) 245 mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0 246 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 247 mov.l 0x4(%sp),%d0 248 rtd &0x4 249 250 global _dmem_read_word 251_dmem_read_word: 252 mov.l %d0,-(%sp) 253 mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0 254 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 255 mov.l 0x4(%sp),%d0 256 rtd &0x4 257 258 global _dmem_read_long 259_dmem_read_long: 260 mov.l %d0,-(%sp) 261 mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0 262 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 263 mov.l 0x4(%sp),%d0 264 rtd &0x4 265 266 global _dmem_write_byte 267_dmem_write_byte: 268 mov.l %d0,-(%sp) 269 mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0 270 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 271 mov.l 0x4(%sp),%d0 272 rtd &0x4 273 274 global _dmem_write_word 275_dmem_write_word: 276 mov.l %d0,-(%sp) 277 mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0 278 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 279 mov.l 0x4(%sp),%d0 280 rtd &0x4 281 282 global _dmem_write_long 283_dmem_write_long: 284 mov.l %d0,-(%sp) 285 mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0 286 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 287 mov.l 0x4(%sp),%d0 288 rtd &0x4 289 290# 291# This file contains a set of define statements for constants 292# in order to promote readability within the corecode itself. 293# 294 295set LOCAL_SIZE, 192 # stack frame size(bytes) 296set LV, -LOCAL_SIZE # stack offset 297 298set EXC_SR, 0x4 # stack status register 299set EXC_PC, 0x6 # stack pc 300set EXC_VOFF, 0xa # stacked vector offset 301set EXC_EA, 0xc # stacked <ea> 302 303set EXC_FP, 0x0 # frame pointer 304 305set EXC_AREGS, -68 # offset of all address regs 306set EXC_DREGS, -100 # offset of all data regs 307set EXC_FPREGS, -36 # offset of all fp regs 308 309set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7 310set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7 311set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6 312set EXC_A5, EXC_AREGS+(5*4) 313set EXC_A4, EXC_AREGS+(4*4) 314set EXC_A3, EXC_AREGS+(3*4) 315set EXC_A2, EXC_AREGS+(2*4) 316set EXC_A1, EXC_AREGS+(1*4) 317set EXC_A0, EXC_AREGS+(0*4) 318set EXC_D7, EXC_DREGS+(7*4) 319set EXC_D6, EXC_DREGS+(6*4) 320set EXC_D5, EXC_DREGS+(5*4) 321set EXC_D4, EXC_DREGS+(4*4) 322set EXC_D3, EXC_DREGS+(3*4) 323set EXC_D2, EXC_DREGS+(2*4) 324set EXC_D1, EXC_DREGS+(1*4) 325set EXC_D0, EXC_DREGS+(0*4) 326 327set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0 328set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1 329set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used) 330 331set FP_SCR1, LV+80 # fp scratch 1 332set FP_SCR1_EX, FP_SCR1+0 333set FP_SCR1_SGN, FP_SCR1+2 334set FP_SCR1_HI, FP_SCR1+4 335set FP_SCR1_LO, FP_SCR1+8 336 337set FP_SCR0, LV+68 # fp scratch 0 338set FP_SCR0_EX, FP_SCR0+0 339set FP_SCR0_SGN, FP_SCR0+2 340set FP_SCR0_HI, FP_SCR0+4 341set FP_SCR0_LO, FP_SCR0+8 342 343set FP_DST, LV+56 # fp destination operand 344set FP_DST_EX, FP_DST+0 345set FP_DST_SGN, FP_DST+2 346set FP_DST_HI, FP_DST+4 347set FP_DST_LO, FP_DST+8 348 349set FP_SRC, LV+44 # fp source operand 350set FP_SRC_EX, FP_SRC+0 351set FP_SRC_SGN, FP_SRC+2 352set FP_SRC_HI, FP_SRC+4 353set FP_SRC_LO, FP_SRC+8 354 355set USER_FPIAR, LV+40 # FP instr address register 356 357set USER_FPSR, LV+36 # FP status register 358set FPSR_CC, USER_FPSR+0 # FPSR condition codes 359set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte 360set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte 361set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte 362 363set USER_FPCR, LV+32 # FP control register 364set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable 365set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control 366 367set L_SCR3, LV+28 # integer scratch 3 368set L_SCR2, LV+24 # integer scratch 2 369set L_SCR1, LV+20 # integer scratch 1 370 371set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst) 372 373set EXC_TEMP2, LV+24 # temporary space 374set EXC_TEMP, LV+16 # temporary space 375 376set DTAG, LV+15 # destination operand type 377set STAG, LV+14 # source operand type 378 379set SPCOND_FLG, LV+10 # flag: special case (see below) 380 381set EXC_CC, LV+8 # saved condition codes 382set EXC_EXTWPTR, LV+4 # saved current PC (active) 383set EXC_EXTWORD, LV+2 # saved extension word 384set EXC_CMDREG, LV+2 # saved extension word 385set EXC_OPWORD, LV+0 # saved operation word 386 387################################ 388 389# Helpful macros 390 391set FTEMP, 0 # offsets within an 392set FTEMP_EX, 0 # extended precision 393set FTEMP_SGN, 2 # value saved in memory. 394set FTEMP_HI, 4 395set FTEMP_LO, 8 396set FTEMP_GRS, 12 397 398set LOCAL, 0 # offsets within an 399set LOCAL_EX, 0 # extended precision 400set LOCAL_SGN, 2 # value saved in memory. 401set LOCAL_HI, 4 402set LOCAL_LO, 8 403set LOCAL_GRS, 12 404 405set DST, 0 # offsets within an 406set DST_EX, 0 # extended precision 407set DST_HI, 4 # value saved in memory. 408set DST_LO, 8 409 410set SRC, 0 # offsets within an 411set SRC_EX, 0 # extended precision 412set SRC_HI, 4 # value saved in memory. 413set SRC_LO, 8 414 415set SGL_LO, 0x3f81 # min sgl prec exponent 416set SGL_HI, 0x407e # max sgl prec exponent 417set DBL_LO, 0x3c01 # min dbl prec exponent 418set DBL_HI, 0x43fe # max dbl prec exponent 419set EXT_LO, 0x0 # min ext prec exponent 420set EXT_HI, 0x7ffe # max ext prec exponent 421 422set EXT_BIAS, 0x3fff # extended precision bias 423set SGL_BIAS, 0x007f # single precision bias 424set DBL_BIAS, 0x03ff # double precision bias 425 426set NORM, 0x00 # operand type for STAG/DTAG 427set ZERO, 0x01 # operand type for STAG/DTAG 428set INF, 0x02 # operand type for STAG/DTAG 429set QNAN, 0x03 # operand type for STAG/DTAG 430set DENORM, 0x04 # operand type for STAG/DTAG 431set SNAN, 0x05 # operand type for STAG/DTAG 432set UNNORM, 0x06 # operand type for STAG/DTAG 433 434################## 435# FPSR/FPCR bits # 436################## 437set neg_bit, 0x3 # negative result 438set z_bit, 0x2 # zero result 439set inf_bit, 0x1 # infinite result 440set nan_bit, 0x0 # NAN result 441 442set q_sn_bit, 0x7 # sign bit of quotient byte 443 444set bsun_bit, 7 # branch on unordered 445set snan_bit, 6 # signalling NAN 446set operr_bit, 5 # operand error 447set ovfl_bit, 4 # overflow 448set unfl_bit, 3 # underflow 449set dz_bit, 2 # divide by zero 450set inex2_bit, 1 # inexact result 2 451set inex1_bit, 0 # inexact result 1 452 453set aiop_bit, 7 # accrued inexact operation bit 454set aovfl_bit, 6 # accrued overflow bit 455set aunfl_bit, 5 # accrued underflow bit 456set adz_bit, 4 # accrued dz bit 457set ainex_bit, 3 # accrued inexact bit 458 459############################# 460# FPSR individual bit masks # 461############################# 462set neg_mask, 0x08000000 # negative bit mask (lw) 463set inf_mask, 0x02000000 # infinity bit mask (lw) 464set z_mask, 0x04000000 # zero bit mask (lw) 465set nan_mask, 0x01000000 # nan bit mask (lw) 466 467set neg_bmask, 0x08 # negative bit mask (byte) 468set inf_bmask, 0x02 # infinity bit mask (byte) 469set z_bmask, 0x04 # zero bit mask (byte) 470set nan_bmask, 0x01 # nan bit mask (byte) 471 472set bsun_mask, 0x00008000 # bsun exception mask 473set snan_mask, 0x00004000 # snan exception mask 474set operr_mask, 0x00002000 # operr exception mask 475set ovfl_mask, 0x00001000 # overflow exception mask 476set unfl_mask, 0x00000800 # underflow exception mask 477set dz_mask, 0x00000400 # dz exception mask 478set inex2_mask, 0x00000200 # inex2 exception mask 479set inex1_mask, 0x00000100 # inex1 exception mask 480 481set aiop_mask, 0x00000080 # accrued illegal operation 482set aovfl_mask, 0x00000040 # accrued overflow 483set aunfl_mask, 0x00000020 # accrued underflow 484set adz_mask, 0x00000010 # accrued divide by zero 485set ainex_mask, 0x00000008 # accrued inexact 486 487###################################### 488# FPSR combinations used in the FPSP # 489###################################### 490set dzinf_mask, inf_mask+dz_mask+adz_mask 491set opnan_mask, nan_mask+operr_mask+aiop_mask 492set nzi_mask, 0x01ffffff #clears N, Z, and I 493set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask 494set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask 495set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask 496set inx1a_mask, inex1_mask+ainex_mask 497set inx2a_mask, inex2_mask+ainex_mask 498set snaniop_mask, nan_mask+snan_mask+aiop_mask 499set snaniop2_mask, snan_mask+aiop_mask 500set naniop_mask, nan_mask+aiop_mask 501set neginf_mask, neg_mask+inf_mask 502set infaiop_mask, inf_mask+aiop_mask 503set negz_mask, neg_mask+z_mask 504set opaop_mask, operr_mask+aiop_mask 505set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask 506set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask 507 508######### 509# misc. # 510######### 511set rnd_stky_bit, 29 # stky bit pos in longword 512 513set sign_bit, 0x7 # sign bit 514set signan_bit, 0x6 # signalling nan bit 515 516set sgl_thresh, 0x3f81 # minimum sgl exponent 517set dbl_thresh, 0x3c01 # minimum dbl exponent 518 519set x_mode, 0x0 # extended precision 520set s_mode, 0x4 # single precision 521set d_mode, 0x8 # double precision 522 523set rn_mode, 0x0 # round-to-nearest 524set rz_mode, 0x1 # round-to-zero 525set rm_mode, 0x2 # round-tp-minus-infinity 526set rp_mode, 0x3 # round-to-plus-infinity 527 528set mantissalen, 64 # length of mantissa in bits 529 530set BYTE, 1 # len(byte) == 1 byte 531set WORD, 2 # len(word) == 2 bytes 532set LONG, 4 # len(longword) == 2 bytes 533 534set BSUN_VEC, 0xc0 # bsun vector offset 535set INEX_VEC, 0xc4 # inexact vector offset 536set DZ_VEC, 0xc8 # dz vector offset 537set UNFL_VEC, 0xcc # unfl vector offset 538set OPERR_VEC, 0xd0 # operr vector offset 539set OVFL_VEC, 0xd4 # ovfl vector offset 540set SNAN_VEC, 0xd8 # snan vector offset 541 542########################### 543# SPecial CONDition FLaGs # 544########################### 545set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception 546set fbsun_flg, 0x02 # flag bit: bsun exception 547set mia7_flg, 0x04 # flag bit: (a7)+ <ea> 548set mda7_flg, 0x08 # flag bit: -(a7) <ea> 549set fmovm_flg, 0x40 # flag bit: fmovm instruction 550set immed_flg, 0x80 # flag bit: &<data> <ea> 551 552set ftrapcc_bit, 0x0 553set fbsun_bit, 0x1 554set mia7_bit, 0x2 555set mda7_bit, 0x3 556set immed_bit, 0x7 557 558################################## 559# TRANSCENDENTAL "LAST-OP" FLAGS # 560################################## 561set FMUL_OP, 0x0 # fmul instr performed last 562set FDIV_OP, 0x1 # fdiv performed last 563set FADD_OP, 0x2 # fadd performed last 564set FMOV_OP, 0x3 # fmov performed last 565 566############# 567# CONSTANTS # 568############# 569T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD 570T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL 571 572PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000 573PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 574 575TWOBYPI: 576 long 0x3FE45F30,0x6DC9C883 577 578######################################################################### 579# XDEF **************************************************************** # 580# _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. # 581# # 582# This handler should be the first code executed upon taking the # 583# FP Overflow exception in an operating system. # 584# # 585# XREF **************************************************************** # 586# _imem_read_long() - read instruction longword # 587# fix_skewed_ops() - adjust src operand in fsave frame # 588# set_tag_x() - determine optype of src/dst operands # 589# store_fpreg() - store opclass 0 or 2 result to FP regfile # 590# unnorm_fix() - change UNNORM operands to NORM or ZERO # 591# load_fpn2() - load dst operand from FP regfile # 592# fout() - emulate an opclass 3 instruction # 593# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 594# _fpsp_done() - "callout" for 060FPSP exit (all work done!) # 595# _real_ovfl() - "callout" for Overflow exception enabled code # 596# _real_inex() - "callout" for Inexact exception enabled code # 597# _real_trace() - "callout" for Trace exception code # 598# # 599# INPUT *************************************************************** # 600# - The system stack contains the FP Ovfl exception stack frame # 601# - The fsave frame contains the source operand # 602# # 603# OUTPUT ************************************************************** # 604# Overflow Exception enabled: # 605# - The system stack is unchanged # 606# - The fsave frame contains the adjusted src op for opclass 0,2 # 607# Overflow Exception disabled: # 608# - The system stack is unchanged # 609# - The "exception present" flag in the fsave frame is cleared # 610# # 611# ALGORITHM *********************************************************** # 612# On the 060, if an FP overflow is present as the result of any # 613# instruction, the 060 will take an overflow exception whether the # 614# exception is enabled or disabled in the FPCR. For the disabled case, # 615# This handler emulates the instruction to determine what the correct # 616# default result should be for the operation. This default result is # 617# then stored in either the FP regfile, data regfile, or memory. # 618# Finally, the handler exits through the "callout" _fpsp_done() # 619# denoting that no exceptional conditions exist within the machine. # 620# If the exception is enabled, then this handler must create the # 621# exceptional operand and plave it in the fsave state frame, and store # 622# the default result (only if the instruction is opclass 3). For # 623# exceptions enabled, this handler must exit through the "callout" # 624# _real_ovfl() so that the operating system enabled overflow handler # 625# can handle this case. # 626# Two other conditions exist. First, if overflow was disabled # 627# but the inexact exception was enabled, this handler must exit # 628# through the "callout" _real_inex() regardless of whether the result # 629# was inexact. # 630# Also, in the case of an opclass three instruction where # 631# overflow was disabled and the trace exception was enabled, this # 632# handler must exit through the "callout" _real_trace(). # 633# # 634######################################################################### 635 636 global _fpsp_ovfl 637_fpsp_ovfl: 638 639#$# sub.l &24,%sp # make room for src/dst 640 641 link.w %a6,&-LOCAL_SIZE # init stack frame 642 643 fsave FP_SRC(%a6) # grab the "busy" frame 644 645 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 646 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 647 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 648 649# the FPIAR holds the "current PC" of the faulting instruction 650 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 651 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 652 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 653 bsr.l _imem_read_long # fetch the instruction words 654 mov.l %d0,EXC_OPWORD(%a6) 655 656############################################################################## 657 658 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out? 659 bne.w fovfl_out 660 661 662 lea FP_SRC(%a6),%a0 # pass: ptr to src op 663 bsr.l fix_skewed_ops # fix src op 664 665# since, I believe, only NORMs and DENORMs can come through here, 666# maybe we can avoid the subroutine call. 667 lea FP_SRC(%a6),%a0 # pass: ptr to src op 668 bsr.l set_tag_x # tag the operand type 669 mov.b %d0,STAG(%a6) # maybe NORM,DENORM 670 671# bit five of the fp extension word separates the monadic and dyadic operations 672# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos 673# will never take this exception. 674 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 675 beq.b fovfl_extract # monadic 676 677 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 678 bsr.l load_fpn2 # load dst into FP_DST 679 680 lea FP_DST(%a6),%a0 # pass: ptr to dst op 681 bsr.l set_tag_x # tag the operand type 682 cmpi.b %d0,&UNNORM # is operand an UNNORM? 683 bne.b fovfl_op2_done # no 684 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 685fovfl_op2_done: 686 mov.b %d0,DTAG(%a6) # save dst optype tag 687 688fovfl_extract: 689 690#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 691#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 692#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 693#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6) 694#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6) 695#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6) 696 697 clr.l %d0 698 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 699 700 mov.b 1+EXC_CMDREG(%a6),%d1 701 andi.w &0x007f,%d1 # extract extension 702 703 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field 704 705 fmov.l &0x0,%fpcr # zero current control regs 706 fmov.l &0x0,%fpsr 707 708 lea FP_SRC(%a6),%a0 709 lea FP_DST(%a6),%a1 710 711# maybe we can make these entry points ONLY the OVFL entry points of each routine. 712 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 713 jsr (tbl_unsupp.l,%pc,%d1.l*1) 714 715# the operation has been emulated. the result is in fp0. 716# the EXOP, if an exception occurred, is in fp1. 717# we must save the default result regardless of whether 718# traps are enabled or disabled. 719 bfextu EXC_CMDREG(%a6){&6:&3},%d0 720 bsr.l store_fpreg 721 722# the exceptional possibilities we have left ourselves with are ONLY overflow 723# and inexact. and, the inexact is such that overflow occurred and was disabled 724# but inexact was enabled. 725 btst &ovfl_bit,FPCR_ENABLE(%a6) 726 bne.b fovfl_ovfl_on 727 728 btst &inex2_bit,FPCR_ENABLE(%a6) 729 bne.b fovfl_inex_on 730 731 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 732 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 733 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 734 735 unlk %a6 736#$# add.l &24,%sp 737 bra.l _fpsp_done 738 739# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP 740# in fp1. now, simply jump to _real_ovfl()! 741fovfl_ovfl_on: 742 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 743 744 mov.w &0xe005,2+FP_SRC(%a6) # save exc status 745 746 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 747 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 748 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 749 750 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 751 752 unlk %a6 753 754 bra.l _real_ovfl 755 756# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, 757# we must jump to real_inex(). 758fovfl_inex_on: 759 760 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 761 762 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4 763 mov.w &0xe001,2+FP_SRC(%a6) # save exc status 764 765 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 766 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 767 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 768 769 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 770 771 unlk %a6 772 773 bra.l _real_inex 774 775######################################################################## 776fovfl_out: 777 778 779#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 780#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 781#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 782 783# the src operand is definitely a NORM(!), so tag it as such 784 mov.b &NORM,STAG(%a6) # set src optype tag 785 786 clr.l %d0 787 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 788 789 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field 790 791 fmov.l &0x0,%fpcr # zero current control regs 792 fmov.l &0x0,%fpsr 793 794 lea FP_SRC(%a6),%a0 # pass ptr to src operand 795 796 bsr.l fout 797 798 btst &ovfl_bit,FPCR_ENABLE(%a6) 799 bne.w fovfl_ovfl_on 800 801 btst &inex2_bit,FPCR_ENABLE(%a6) 802 bne.w fovfl_inex_on 803 804 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 805 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 806 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 807 808 unlk %a6 809#$# add.l &24,%sp 810 811 btst &0x7,(%sp) # is trace on? 812 beq.l _fpsp_done # no 813 814 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 815 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 816 bra.l _real_trace 817 818######################################################################### 819# XDEF **************************************************************** # 820# _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. # 821# # 822# This handler should be the first code executed upon taking the # 823# FP Underflow exception in an operating system. # 824# # 825# XREF **************************************************************** # 826# _imem_read_long() - read instruction longword # 827# fix_skewed_ops() - adjust src operand in fsave frame # 828# set_tag_x() - determine optype of src/dst operands # 829# store_fpreg() - store opclass 0 or 2 result to FP regfile # 830# unnorm_fix() - change UNNORM operands to NORM or ZERO # 831# load_fpn2() - load dst operand from FP regfile # 832# fout() - emulate an opclass 3 instruction # 833# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 834# _fpsp_done() - "callout" for 060FPSP exit (all work done!) # 835# _real_ovfl() - "callout" for Overflow exception enabled code # 836# _real_inex() - "callout" for Inexact exception enabled code # 837# _real_trace() - "callout" for Trace exception code # 838# # 839# INPUT *************************************************************** # 840# - The system stack contains the FP Unfl exception stack frame # 841# - The fsave frame contains the source operand # 842# # 843# OUTPUT ************************************************************** # 844# Underflow Exception enabled: # 845# - The system stack is unchanged # 846# - The fsave frame contains the adjusted src op for opclass 0,2 # 847# Underflow Exception disabled: # 848# - The system stack is unchanged # 849# - The "exception present" flag in the fsave frame is cleared # 850# # 851# ALGORITHM *********************************************************** # 852# On the 060, if an FP underflow is present as the result of any # 853# instruction, the 060 will take an underflow exception whether the # 854# exception is enabled or disabled in the FPCR. For the disabled case, # 855# This handler emulates the instruction to determine what the correct # 856# default result should be for the operation. This default result is # 857# then stored in either the FP regfile, data regfile, or memory. # 858# Finally, the handler exits through the "callout" _fpsp_done() # 859# denoting that no exceptional conditions exist within the machine. # 860# If the exception is enabled, then this handler must create the # 861# exceptional operand and plave it in the fsave state frame, and store # 862# the default result (only if the instruction is opclass 3). For # 863# exceptions enabled, this handler must exit through the "callout" # 864# _real_unfl() so that the operating system enabled overflow handler # 865# can handle this case. # 866# Two other conditions exist. First, if underflow was disabled # 867# but the inexact exception was enabled and the result was inexact, # 868# this handler must exit through the "callout" _real_inex(). # 869# was inexact. # 870# Also, in the case of an opclass three instruction where # 871# underflow was disabled and the trace exception was enabled, this # 872# handler must exit through the "callout" _real_trace(). # 873# # 874######################################################################### 875 876 global _fpsp_unfl 877_fpsp_unfl: 878 879#$# sub.l &24,%sp # make room for src/dst 880 881 link.w %a6,&-LOCAL_SIZE # init stack frame 882 883 fsave FP_SRC(%a6) # grab the "busy" frame 884 885 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 886 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 887 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 888 889# the FPIAR holds the "current PC" of the faulting instruction 890 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 891 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 892 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 893 bsr.l _imem_read_long # fetch the instruction words 894 mov.l %d0,EXC_OPWORD(%a6) 895 896############################################################################## 897 898 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out? 899 bne.w funfl_out 900 901 902 lea FP_SRC(%a6),%a0 # pass: ptr to src op 903 bsr.l fix_skewed_ops # fix src op 904 905 lea FP_SRC(%a6),%a0 # pass: ptr to src op 906 bsr.l set_tag_x # tag the operand type 907 mov.b %d0,STAG(%a6) # maybe NORM,DENORM 908 909# bit five of the fp ext word separates the monadic and dyadic operations 910# that can pass through fpsp_unfl(). remember that fcmp, and ftst 911# will never take this exception. 912 btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic? 913 beq.b funfl_extract # monadic 914 915# now, what's left that's not dyadic is fsincos. we can distinguish it 916# from all dyadics by the '0110xxx pattern 917 btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos? 918 bne.b funfl_extract # yes 919 920 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 921 bsr.l load_fpn2 # load dst into FP_DST 922 923 lea FP_DST(%a6),%a0 # pass: ptr to dst op 924 bsr.l set_tag_x # tag the operand type 925 cmpi.b %d0,&UNNORM # is operand an UNNORM? 926 bne.b funfl_op2_done # no 927 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 928funfl_op2_done: 929 mov.b %d0,DTAG(%a6) # save dst optype tag 930 931funfl_extract: 932 933#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 934#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 935#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 936#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6) 937#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6) 938#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6) 939 940 clr.l %d0 941 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 942 943 mov.b 1+EXC_CMDREG(%a6),%d1 944 andi.w &0x007f,%d1 # extract extension 945 946 andi.l &0x00ff01ff,USER_FPSR(%a6) 947 948 fmov.l &0x0,%fpcr # zero current control regs 949 fmov.l &0x0,%fpsr 950 951 lea FP_SRC(%a6),%a0 952 lea FP_DST(%a6),%a1 953 954# maybe we can make these entry points ONLY the OVFL entry points of each routine. 955 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 956 jsr (tbl_unsupp.l,%pc,%d1.l*1) 957 958 bfextu EXC_CMDREG(%a6){&6:&3},%d0 959 bsr.l store_fpreg 960 961# The `060 FPU multiplier hardware is such that if the result of a 962# multiply operation is the smallest possible normalized number 963# (0x00000000_80000000_00000000), then the machine will take an 964# underflow exception. Since this is incorrect, we need to check 965# if our emulation, after re-doing the operation, decided that 966# no underflow was called for. We do these checks only in 967# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this 968# special case will simply exit gracefully with the correct result. 969 970# the exceptional possibilities we have left ourselves with are ONLY overflow 971# and inexact. and, the inexact is such that overflow occurred and was disabled 972# but inexact was enabled. 973 btst &unfl_bit,FPCR_ENABLE(%a6) 974 bne.b funfl_unfl_on 975 976funfl_chkinex: 977 btst &inex2_bit,FPCR_ENABLE(%a6) 978 bne.b funfl_inex_on 979 980funfl_exit: 981 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 982 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 983 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 984 985 unlk %a6 986#$# add.l &24,%sp 987 bra.l _fpsp_done 988 989# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP 990# in fp1 (don't forget to save fp0). what to do now? 991# well, we simply have to get to go to _real_unfl()! 992funfl_unfl_on: 993 994# The `060 FPU multiplier hardware is such that if the result of a 995# multiply operation is the smallest possible normalized number 996# (0x00000000_80000000_00000000), then the machine will take an 997# underflow exception. Since this is incorrect, we check here to see 998# if our emulation, after re-doing the operation, decided that 999# no underflow was called for. 1000 btst &unfl_bit,FPSR_EXCEPT(%a6) 1001 beq.w funfl_chkinex 1002 1003funfl_unfl_on2: 1004 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 1005 1006 mov.w &0xe003,2+FP_SRC(%a6) # save exc status 1007 1008 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1009 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1010 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1011 1012 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 1013 1014 unlk %a6 1015 1016 bra.l _real_unfl 1017 1018# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, 1019# we must jump to real_inex(). 1020funfl_inex_on: 1021 1022# The `060 FPU multiplier hardware is such that if the result of a 1023# multiply operation is the smallest possible normalized number 1024# (0x00000000_80000000_00000000), then the machine will take an 1025# underflow exception. 1026# But, whether bogus or not, if inexact is enabled AND it occurred, 1027# then we have to branch to real_inex. 1028 1029 btst &inex2_bit,FPSR_EXCEPT(%a6) 1030 beq.w funfl_exit 1031 1032funfl_inex_on2: 1033 1034 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack 1035 1036 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4 1037 mov.w &0xe001,2+FP_SRC(%a6) # save exc status 1038 1039 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1040 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1041 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1042 1043 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 1044 1045 unlk %a6 1046 1047 bra.l _real_inex 1048 1049####################################################################### 1050funfl_out: 1051 1052 1053#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 1054#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 1055#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 1056 1057# the src operand is definitely a NORM(!), so tag it as such 1058 mov.b &NORM,STAG(%a6) # set src optype tag 1059 1060 clr.l %d0 1061 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 1062 1063 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field 1064 1065 fmov.l &0x0,%fpcr # zero current control regs 1066 fmov.l &0x0,%fpsr 1067 1068 lea FP_SRC(%a6),%a0 # pass ptr to src operand 1069 1070 bsr.l fout 1071 1072 btst &unfl_bit,FPCR_ENABLE(%a6) 1073 bne.w funfl_unfl_on2 1074 1075 btst &inex2_bit,FPCR_ENABLE(%a6) 1076 bne.w funfl_inex_on2 1077 1078 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1079 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1080 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1081 1082 unlk %a6 1083#$# add.l &24,%sp 1084 1085 btst &0x7,(%sp) # is trace on? 1086 beq.l _fpsp_done # no 1087 1088 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 1089 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 1090 bra.l _real_trace 1091 1092######################################################################### 1093# XDEF **************************************************************** # 1094# _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented # 1095# Data Type" exception. # 1096# # 1097# This handler should be the first code executed upon taking the # 1098# FP Unimplemented Data Type exception in an operating system. # 1099# # 1100# XREF **************************************************************** # 1101# _imem_read_{word,long}() - read instruction word/longword # 1102# fix_skewed_ops() - adjust src operand in fsave frame # 1103# set_tag_x() - determine optype of src/dst operands # 1104# store_fpreg() - store opclass 0 or 2 result to FP regfile # 1105# unnorm_fix() - change UNNORM operands to NORM or ZERO # 1106# load_fpn2() - load dst operand from FP regfile # 1107# load_fpn1() - load src operand from FP regfile # 1108# fout() - emulate an opclass 3 instruction # 1109# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 1110# _real_inex() - "callout" to operating system inexact handler # 1111# _fpsp_done() - "callout" for exit; work all done # 1112# _real_trace() - "callout" for Trace enabled exception # 1113# funimp_skew() - adjust fsave src ops to "incorrect" value # 1114# _real_snan() - "callout" for SNAN exception # 1115# _real_operr() - "callout" for OPERR exception # 1116# _real_ovfl() - "callout" for OVFL exception # 1117# _real_unfl() - "callout" for UNFL exception # 1118# get_packed() - fetch packed operand from memory # 1119# # 1120# INPUT *************************************************************** # 1121# - The system stack contains the "Unimp Data Type" stk frame # 1122# - The fsave frame contains the ssrc op (for UNNORM/DENORM) # 1123# # 1124# OUTPUT ************************************************************** # 1125# If Inexact exception (opclass 3): # 1126# - The system stack is changed to an Inexact exception stk frame # 1127# If SNAN exception (opclass 3): # 1128# - The system stack is changed to an SNAN exception stk frame # 1129# If OPERR exception (opclass 3): # 1130# - The system stack is changed to an OPERR exception stk frame # 1131# If OVFL exception (opclass 3): # 1132# - The system stack is changed to an OVFL exception stk frame # 1133# If UNFL exception (opclass 3): # 1134# - The system stack is changed to an UNFL exception stack frame # 1135# If Trace exception enabled: # 1136# - The system stack is changed to a Trace exception stack frame # 1137# Else: (normal case) # 1138# - Correct result has been stored as appropriate # 1139# # 1140# ALGORITHM *********************************************************** # 1141# Two main instruction types can enter here: (1) DENORM or UNNORM # 1142# unimplemented data types. These can be either opclass 0,2 or 3 # 1143# instructions, and (2) PACKED unimplemented data format instructions # 1144# also of opclasses 0,2, or 3. # 1145# For UNNORM/DENORM opclass 0 and 2, the handler fetches the src # 1146# operand from the fsave state frame and the dst operand (if dyadic) # 1147# from the FP register file. The instruction is then emulated by # 1148# choosing an emulation routine from a table of routines indexed by # 1149# instruction type. Once the instruction has been emulated and result # 1150# saved, then we check to see if any enabled exceptions resulted from # 1151# instruction emulation. If none, then we exit through the "callout" # 1152# _fpsp_done(). If there is an enabled FP exception, then we insert # 1153# this exception into the FPU in the fsave state frame and then exit # 1154# through _fpsp_done(). # 1155# PACKED opclass 0 and 2 is similar in how the instruction is # 1156# emulated and exceptions handled. The differences occur in how the # 1157# handler loads the packed op (by calling get_packed() routine) and # 1158# by the fact that a Trace exception could be pending for PACKED ops. # 1159# If a Trace exception is pending, then the current exception stack # 1160# frame is changed to a Trace exception stack frame and an exit is # 1161# made through _real_trace(). # 1162# For UNNORM/DENORM opclass 3, the actual move out to memory is # 1163# performed by calling the routine fout(). If no exception should occur # 1164# as the result of emulation, then an exit either occurs through # 1165# _fpsp_done() or through _real_trace() if a Trace exception is pending # 1166# (a Trace stack frame must be created here, too). If an FP exception # 1167# should occur, then we must create an exception stack frame of that # 1168# type and jump to either _real_snan(), _real_operr(), _real_inex(), # 1169# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 # 1170# emulation is performed in a similar manner. # 1171# # 1172######################################################################### 1173 1174# 1175# (1) DENORM and UNNORM (unimplemented) data types: 1176# 1177# post-instruction 1178# ***************** 1179# * EA * 1180# pre-instruction * * 1181# ***************** ***************** 1182# * 0x0 * 0x0dc * * 0x3 * 0x0dc * 1183# ***************** ***************** 1184# * Next * * Next * 1185# * PC * * PC * 1186# ***************** ***************** 1187# * SR * * SR * 1188# ***************** ***************** 1189# 1190# (2) PACKED format (unsupported) opclasses two and three: 1191# ***************** 1192# * EA * 1193# * * 1194# ***************** 1195# * 0x2 * 0x0dc * 1196# ***************** 1197# * Next * 1198# * PC * 1199# ***************** 1200# * SR * 1201# ***************** 1202# 1203 global _fpsp_unsupp 1204_fpsp_unsupp: 1205 1206 link.w %a6,&-LOCAL_SIZE # init stack frame 1207 1208 fsave FP_SRC(%a6) # save fp state 1209 1210 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1211 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 1212 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 1213 1214 btst &0x5,EXC_SR(%a6) # user or supervisor mode? 1215 bne.b fu_s 1216fu_u: 1217 mov.l %usp,%a0 # fetch user stack pointer 1218 mov.l %a0,EXC_A7(%a6) # save on stack 1219 bra.b fu_cont 1220# if the exception is an opclass zero or two unimplemented data type 1221# exception, then the a7' calculated here is wrong since it doesn't 1222# stack an ea. however, we don't need an a7' for this case anyways. 1223fu_s: 1224 lea 0x4+EXC_EA(%a6),%a0 # load old a7' 1225 mov.l %a0,EXC_A7(%a6) # save on stack 1226 1227fu_cont: 1228 1229# the FPIAR holds the "current PC" of the faulting instruction 1230# the FPIAR should be set correctly for ALL exceptions passing through 1231# this point. 1232 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 1233 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 1234 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 1235 bsr.l _imem_read_long # fetch the instruction words 1236 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 1237 1238############################ 1239 1240 clr.b SPCOND_FLG(%a6) # clear special condition flag 1241 1242# Separate opclass three (fpn-to-mem) ops since they have a different 1243# stack frame and protocol. 1244 btst &0x5,EXC_CMDREG(%a6) # is it an fmove out? 1245 bne.w fu_out # yes 1246 1247# Separate packed opclass two instructions. 1248 bfextu EXC_CMDREG(%a6){&0:&6},%d0 1249 cmpi.b %d0,&0x13 1250 beq.w fu_in_pack 1251 1252 1253# I'm not sure at this point what FPSR bits are valid for this instruction. 1254# so, since the emulation routines re-create them anyways, zero exception field 1255 andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field 1256 1257 fmov.l &0x0,%fpcr # zero current control regs 1258 fmov.l &0x0,%fpsr 1259 1260# Opclass two w/ memory-to-fpn operation will have an incorrect extended 1261# precision format if the src format was single or double and the 1262# source data type was an INF, NAN, DENORM, or UNNORM 1263 lea FP_SRC(%a6),%a0 # pass ptr to input 1264 bsr.l fix_skewed_ops 1265 1266# we don't know whether the src operand or the dst operand (or both) is the 1267# UNNORM or DENORM. call the function that tags the operand type. if the 1268# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO. 1269 lea FP_SRC(%a6),%a0 # pass: ptr to src op 1270 bsr.l set_tag_x # tag the operand type 1271 cmpi.b %d0,&UNNORM # is operand an UNNORM? 1272 bne.b fu_op2 # no 1273 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1274 1275fu_op2: 1276 mov.b %d0,STAG(%a6) # save src optype tag 1277 1278 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1279 1280# bit five of the fp extension word separates the monadic and dyadic operations 1281# at this point 1282 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 1283 beq.b fu_extract # monadic 1284 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst? 1285 beq.b fu_extract # yes, so it's monadic, too 1286 1287 bsr.l load_fpn2 # load dst into FP_DST 1288 1289 lea FP_DST(%a6),%a0 # pass: ptr to dst op 1290 bsr.l set_tag_x # tag the operand type 1291 cmpi.b %d0,&UNNORM # is operand an UNNORM? 1292 bne.b fu_op2_done # no 1293 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1294fu_op2_done: 1295 mov.b %d0,DTAG(%a6) # save dst optype tag 1296 1297fu_extract: 1298 clr.l %d0 1299 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1300 1301 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension 1302 1303 lea FP_SRC(%a6),%a0 1304 lea FP_DST(%a6),%a1 1305 1306 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr 1307 jsr (tbl_unsupp.l,%pc,%d1.l*1) 1308 1309# 1310# Exceptions in order of precedence: 1311# BSUN : none 1312# SNAN : all dyadic ops 1313# OPERR : fsqrt(-NORM) 1314# OVFL : all except ftst,fcmp 1315# UNFL : all except ftst,fcmp 1316# DZ : fdiv 1317# INEX2 : all except ftst,fcmp 1318# INEX1 : none (packed doesn't go through here) 1319# 1320 1321# we determine the highest priority exception(if any) set by the 1322# emulation routine that has also been enabled by the user. 1323 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set 1324 bne.b fu_in_ena # some are enabled 1325 1326fu_in_cont: 1327# fcmp and ftst do not store any result. 1328 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension 1329 andi.b &0x38,%d0 # extract bits 3-5 1330 cmpi.b %d0,&0x38 # is instr fcmp or ftst? 1331 beq.b fu_in_exit # yes 1332 1333 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1334 bsr.l store_fpreg # store the result 1335 1336fu_in_exit: 1337 1338 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1339 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1340 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1341 1342 unlk %a6 1343 1344 bra.l _fpsp_done 1345 1346fu_in_ena: 1347 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 1348 bfffo %d0{&24:&8},%d0 # find highest priority exception 1349 bne.b fu_in_exc # there is at least one set 1350 1351# 1352# No exceptions occurred that were also enabled. Now: 1353# 1354# if (OVFL && ovfl_disabled && inexact_enabled) { 1355# branch to _real_inex() (even if the result was exact!); 1356# } else { 1357# save the result in the proper fp reg (unless the op is fcmp or ftst); 1358# return; 1359# } 1360# 1361 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1362 beq.b fu_in_cont # no 1363 1364fu_in_ovflchk: 1365 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1366 beq.b fu_in_cont # no 1367 bra.w fu_in_exc_ovfl # go insert overflow frame 1368 1369# 1370# An exception occurred and that exception was enabled: 1371# 1372# shift enabled exception field into lo byte of d0; 1373# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) || 1374# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) { 1375# /* 1376# * this is the case where we must call _real_inex() now or else 1377# * there will be no other way to pass it the exceptional operand 1378# */ 1379# call _real_inex(); 1380# } else { 1381# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU; 1382# } 1383# 1384fu_in_exc: 1385 subi.l &24,%d0 # fix offset to be 0-8 1386 cmpi.b %d0,&0x6 # is exception INEX? (6) 1387 bne.b fu_in_exc_exit # no 1388 1389# the enabled exception was inexact 1390 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur? 1391 bne.w fu_in_exc_unfl # yes 1392 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur? 1393 bne.w fu_in_exc_ovfl # yes 1394 1395# here, we insert the correct fsave status value into the fsave frame for the 1396# corresponding exception. the operand in the fsave frame should be the original 1397# src operand. 1398fu_in_exc_exit: 1399 mov.l %d0,-(%sp) # save d0 1400 bsr.l funimp_skew # skew sgl or dbl inputs 1401 mov.l (%sp)+,%d0 # restore d0 1402 1403 mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status 1404 1405 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1406 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1407 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1408 1409 frestore FP_SRC(%a6) # restore src op 1410 1411 unlk %a6 1412 1413 bra.l _fpsp_done 1414 1415tbl_except: 1416 short 0xe000,0xe006,0xe004,0xe005 1417 short 0xe003,0xe002,0xe001,0xe001 1418 1419fu_in_exc_unfl: 1420 mov.w &0x4,%d0 1421 bra.b fu_in_exc_exit 1422fu_in_exc_ovfl: 1423 mov.w &0x03,%d0 1424 bra.b fu_in_exc_exit 1425 1426# If the input operand to this operation was opclass two and a single 1427# or double precision denorm, inf, or nan, the operand needs to be 1428# "corrected" in order to have the proper equivalent extended precision 1429# number. 1430 global fix_skewed_ops 1431fix_skewed_ops: 1432 bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt 1433 cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl? 1434 beq.b fso_sgl # yes 1435 cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl? 1436 beq.b fso_dbl # yes 1437 rts # no 1438 1439fso_sgl: 1440 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent 1441 andi.w &0x7fff,%d0 # strip sign 1442 cmpi.w %d0,&0x3f80 # is |exp| == $3f80? 1443 beq.b fso_sgl_dnrm_zero # yes 1444 cmpi.w %d0,&0x407f # no; is |exp| == $407f? 1445 beq.b fso_infnan # yes 1446 rts # no 1447 1448fso_sgl_dnrm_zero: 1449 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit 1450 beq.b fso_zero # it's a skewed zero 1451fso_sgl_dnrm: 1452# here, we count on norm not to alter a0... 1453 bsr.l norm # normalize mantissa 1454 neg.w %d0 # -shft amt 1455 addi.w &0x3f81,%d0 # adjust new exponent 1456 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent 1457 or.w %d0,LOCAL_EX(%a0) # insert new exponent 1458 rts 1459 1460fso_zero: 1461 andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent 1462 rts 1463 1464fso_infnan: 1465 andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit 1466 ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff 1467 rts 1468 1469fso_dbl: 1470 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent 1471 andi.w &0x7fff,%d0 # strip sign 1472 cmpi.w %d0,&0x3c00 # is |exp| == $3c00? 1473 beq.b fso_dbl_dnrm_zero # yes 1474 cmpi.w %d0,&0x43ff # no; is |exp| == $43ff? 1475 beq.b fso_infnan # yes 1476 rts # no 1477 1478fso_dbl_dnrm_zero: 1479 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit 1480 bne.b fso_dbl_dnrm # it's a skewed denorm 1481 tst.l LOCAL_LO(%a0) # is it a zero? 1482 beq.b fso_zero # yes 1483fso_dbl_dnrm: 1484# here, we count on norm not to alter a0... 1485 bsr.l norm # normalize mantissa 1486 neg.w %d0 # -shft amt 1487 addi.w &0x3c01,%d0 # adjust new exponent 1488 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent 1489 or.w %d0,LOCAL_EX(%a0) # insert new exponent 1490 rts 1491 1492################################################################# 1493 1494# fmove out took an unimplemented data type exception. 1495# the src operand is in FP_SRC. Call _fout() to write out the result and 1496# to determine which exceptions, if any, to take. 1497fu_out: 1498 1499# Separate packed move outs from the UNNORM and DENORM move outs. 1500 bfextu EXC_CMDREG(%a6){&3:&3},%d0 1501 cmpi.b %d0,&0x3 1502 beq.w fu_out_pack 1503 cmpi.b %d0,&0x7 1504 beq.w fu_out_pack 1505 1506 1507# I'm not sure at this point what FPSR bits are valid for this instruction. 1508# so, since the emulation routines re-create them anyways, zero exception field. 1509# fmove out doesn't affect ccodes. 1510 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 1511 1512 fmov.l &0x0,%fpcr # zero current control regs 1513 fmov.l &0x0,%fpsr 1514 1515# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine 1516# call here. just figure out what it is... 1517 mov.w FP_SRC_EX(%a6),%d0 # get exponent 1518 andi.w &0x7fff,%d0 # strip sign 1519 beq.b fu_out_denorm # it's a DENORM 1520 1521 lea FP_SRC(%a6),%a0 1522 bsr.l unnorm_fix # yes; fix it 1523 1524 mov.b %d0,STAG(%a6) 1525 1526 bra.b fu_out_cont 1527fu_out_denorm: 1528 mov.b &DENORM,STAG(%a6) 1529fu_out_cont: 1530 1531 clr.l %d0 1532 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1533 1534 lea FP_SRC(%a6),%a0 # pass ptr to src operand 1535 1536 mov.l (%a6),EXC_A6(%a6) # in case a6 changes 1537 bsr.l fout # call fmove out routine 1538 1539# Exceptions in order of precedence: 1540# BSUN : none 1541# SNAN : none 1542# OPERR : fmove.{b,w,l} out of large UNNORM 1543# OVFL : fmove.{s,d} 1544# UNFL : fmove.{s,d,x} 1545# DZ : none 1546# INEX2 : all 1547# INEX1 : none (packed doesn't travel through here) 1548 1549# determine the highest priority exception(if any) set by the 1550# emulation routine that has also been enabled by the user. 1551 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 1552 bne.w fu_out_ena # some are enabled 1553 1554fu_out_done: 1555 1556 mov.l EXC_A6(%a6),(%a6) # in case a6 changed 1557 1558# on extended precision opclass three instructions using pre-decrement or 1559# post-increment addressing mode, the address register is not updated. is the 1560# address register was the stack pointer used from user mode, then let's update 1561# it here. if it was used from supervisor mode, then we have to handle this 1562# as a special case. 1563 btst &0x5,EXC_SR(%a6) 1564 bne.b fu_out_done_s 1565 1566 mov.l EXC_A7(%a6),%a0 # restore a7 1567 mov.l %a0,%usp 1568 1569fu_out_done_cont: 1570 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1571 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1572 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1573 1574 unlk %a6 1575 1576 btst &0x7,(%sp) # is trace on? 1577 bne.b fu_out_trace # yes 1578 1579 bra.l _fpsp_done 1580 1581# is the ea mode pre-decrement of the stack pointer from supervisor mode? 1582# ("fmov.x fpm,-(a7)") if so, 1583fu_out_done_s: 1584 cmpi.b SPCOND_FLG(%a6),&mda7_flg 1585 bne.b fu_out_done_cont 1586 1587# the extended precision result is still in fp0. but, we need to save it 1588# somewhere on the stack until we can copy it to its final resting place. 1589# here, we're counting on the top of the stack to be the old place-holders 1590# for fp0/fp1 which have already been restored. that way, we can write 1591# over those destinations with the shifted stack frame. 1592 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack 1593 1594 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1595 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1596 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1597 1598 mov.l (%a6),%a6 # restore frame pointer 1599 1600 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 1601 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 1602 1603# now, copy the result to the proper place on the stack 1604 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 1605 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 1606 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 1607 1608 add.l &LOCAL_SIZE-0x8,%sp 1609 1610 btst &0x7,(%sp) 1611 bne.b fu_out_trace 1612 1613 bra.l _fpsp_done 1614 1615fu_out_ena: 1616 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 1617 bfffo %d0{&24:&8},%d0 # find highest priority exception 1618 bne.b fu_out_exc # there is at least one set 1619 1620# no exceptions were set. 1621# if a disabled overflow occurred and inexact was enabled but the result 1622# was exact, then a branch to _real_inex() is made. 1623 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1624 beq.w fu_out_done # no 1625 1626fu_out_ovflchk: 1627 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1628 beq.w fu_out_done # no 1629 bra.w fu_inex # yes 1630 1631# 1632# The fp move out that took the "Unimplemented Data Type" exception was 1633# being traced. Since the stack frames are similar, get the "current" PC 1634# from FPIAR and put it in the trace stack frame then jump to _real_trace(). 1635# 1636# UNSUPP FRAME TRACE FRAME 1637# ***************** ***************** 1638# * EA * * Current * 1639# * * * PC * 1640# ***************** ***************** 1641# * 0x3 * 0x0dc * * 0x2 * 0x024 * 1642# ***************** ***************** 1643# * Next * * Next * 1644# * PC * * PC * 1645# ***************** ***************** 1646# * SR * * SR * 1647# ***************** ***************** 1648# 1649fu_out_trace: 1650 mov.w &0x2024,0x6(%sp) 1651 fmov.l %fpiar,0x8(%sp) 1652 bra.l _real_trace 1653 1654# an exception occurred and that exception was enabled. 1655fu_out_exc: 1656 subi.l &24,%d0 # fix offset to be 0-8 1657 1658# we don't mess with the existing fsave frame. just re-insert it and 1659# jump to the "_real_{}()" handler... 1660 mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0 1661 jmp (tbl_fu_out.b,%pc,%d0.w*1) 1662 1663 swbeg &0x8 1664tbl_fu_out: 1665 short tbl_fu_out - tbl_fu_out # BSUN can't happen 1666 short tbl_fu_out - tbl_fu_out # SNAN can't happen 1667 short fu_operr - tbl_fu_out # OPERR 1668 short fu_ovfl - tbl_fu_out # OVFL 1669 short fu_unfl - tbl_fu_out # UNFL 1670 short tbl_fu_out - tbl_fu_out # DZ can't happen 1671 short fu_inex - tbl_fu_out # INEX2 1672 short tbl_fu_out - tbl_fu_out # INEX1 won't make it here 1673 1674# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just 1675# frestore it. 1676fu_snan: 1677 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1678 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1679 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1680 1681 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8 1682 mov.w &0xe006,2+FP_SRC(%a6) 1683 1684 frestore FP_SRC(%a6) 1685 1686 unlk %a6 1687 1688 1689 bra.l _real_snan 1690 1691fu_operr: 1692 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1693 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1694 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1695 1696 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0 1697 mov.w &0xe004,2+FP_SRC(%a6) 1698 1699 frestore FP_SRC(%a6) 1700 1701 unlk %a6 1702 1703 1704 bra.l _real_operr 1705 1706fu_ovfl: 1707 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1708 1709 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1710 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1711 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1712 1713 mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4 1714 mov.w &0xe005,2+FP_SRC(%a6) 1715 1716 frestore FP_SRC(%a6) # restore EXOP 1717 1718 unlk %a6 1719 1720 bra.l _real_ovfl 1721 1722# underflow can happen for extended precision. extended precision opclass 1723# three instruction exceptions don't update the stack pointer. so, if the 1724# exception occurred from user mode, then simply update a7 and exit normally. 1725# if the exception occurred from supervisor mode, check if 1726fu_unfl: 1727 mov.l EXC_A6(%a6),(%a6) # restore a6 1728 1729 btst &0x5,EXC_SR(%a6) 1730 bne.w fu_unfl_s 1731 1732 mov.l EXC_A7(%a6),%a0 # restore a7 whether we need 1733 mov.l %a0,%usp # to or not... 1734 1735fu_unfl_cont: 1736 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1737 1738 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1739 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1740 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1741 1742 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc 1743 mov.w &0xe003,2+FP_SRC(%a6) 1744 1745 frestore FP_SRC(%a6) # restore EXOP 1746 1747 unlk %a6 1748 1749 bra.l _real_unfl 1750 1751fu_unfl_s: 1752 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)? 1753 bne.b fu_unfl_cont 1754 1755# the extended precision result is still in fp0. but, we need to save it 1756# somewhere on the stack until we can copy it to its final resting place 1757# (where the exc frame is currently). make sure it's not at the top of the 1758# frame or it will get overwritten when the exc stack frame is shifted "down". 1759 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack 1760 fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack 1761 1762 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1763 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1764 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1765 1766 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc 1767 mov.w &0xe003,2+FP_DST(%a6) 1768 1769 frestore FP_DST(%a6) # restore EXOP 1770 1771 mov.l (%a6),%a6 # restore frame pointer 1772 1773 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 1774 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 1775 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 1776 1777# now, copy the result to the proper place on the stack 1778 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 1779 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 1780 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 1781 1782 add.l &LOCAL_SIZE-0x8,%sp 1783 1784 bra.l _real_unfl 1785 1786# fmove in and out enter here. 1787fu_inex: 1788 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1789 1790 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1791 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1792 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1793 1794 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4 1795 mov.w &0xe001,2+FP_SRC(%a6) 1796 1797 frestore FP_SRC(%a6) # restore EXOP 1798 1799 unlk %a6 1800 1801 1802 bra.l _real_inex 1803 1804######################################################################### 1805######################################################################### 1806fu_in_pack: 1807 1808 1809# I'm not sure at this point what FPSR bits are valid for this instruction. 1810# so, since the emulation routines re-create them anyways, zero exception field 1811 andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field 1812 1813 fmov.l &0x0,%fpcr # zero current control regs 1814 fmov.l &0x0,%fpsr 1815 1816 bsr.l get_packed # fetch packed src operand 1817 1818 lea FP_SRC(%a6),%a0 # pass ptr to src 1819 bsr.l set_tag_x # set src optype tag 1820 1821 mov.b %d0,STAG(%a6) # save src optype tag 1822 1823 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1824 1825# bit five of the fp extension word separates the monadic and dyadic operations 1826# at this point 1827 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 1828 beq.b fu_extract_p # monadic 1829 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst? 1830 beq.b fu_extract_p # yes, so it's monadic, too 1831 1832 bsr.l load_fpn2 # load dst into FP_DST 1833 1834 lea FP_DST(%a6),%a0 # pass: ptr to dst op 1835 bsr.l set_tag_x # tag the operand type 1836 cmpi.b %d0,&UNNORM # is operand an UNNORM? 1837 bne.b fu_op2_done_p # no 1838 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1839fu_op2_done_p: 1840 mov.b %d0,DTAG(%a6) # save dst optype tag 1841 1842fu_extract_p: 1843 clr.l %d0 1844 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1845 1846 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension 1847 1848 lea FP_SRC(%a6),%a0 1849 lea FP_DST(%a6),%a1 1850 1851 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr 1852 jsr (tbl_unsupp.l,%pc,%d1.l*1) 1853 1854# 1855# Exceptions in order of precedence: 1856# BSUN : none 1857# SNAN : all dyadic ops 1858# OPERR : fsqrt(-NORM) 1859# OVFL : all except ftst,fcmp 1860# UNFL : all except ftst,fcmp 1861# DZ : fdiv 1862# INEX2 : all except ftst,fcmp 1863# INEX1 : all 1864# 1865 1866# we determine the highest priority exception(if any) set by the 1867# emulation routine that has also been enabled by the user. 1868 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 1869 bne.w fu_in_ena_p # some are enabled 1870 1871fu_in_cont_p: 1872# fcmp and ftst do not store any result. 1873 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension 1874 andi.b &0x38,%d0 # extract bits 3-5 1875 cmpi.b %d0,&0x38 # is instr fcmp or ftst? 1876 beq.b fu_in_exit_p # yes 1877 1878 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1879 bsr.l store_fpreg # store the result 1880 1881fu_in_exit_p: 1882 1883 btst &0x5,EXC_SR(%a6) # user or supervisor? 1884 bne.w fu_in_exit_s_p # supervisor 1885 1886 mov.l EXC_A7(%a6),%a0 # update user a7 1887 mov.l %a0,%usp 1888 1889fu_in_exit_cont_p: 1890 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1891 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1892 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1893 1894 unlk %a6 # unravel stack frame 1895 1896 btst &0x7,(%sp) # is trace on? 1897 bne.w fu_trace_p # yes 1898 1899 bra.l _fpsp_done # exit to os 1900 1901# the exception occurred in supervisor mode. check to see if the 1902# addressing mode was (a7)+. if so, we'll need to shift the 1903# stack frame "up". 1904fu_in_exit_s_p: 1905 btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+ 1906 beq.b fu_in_exit_cont_p # no 1907 1908 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1909 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1910 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1911 1912 unlk %a6 # unravel stack frame 1913 1914# shift the stack frame "up". we don't really care about the <ea> field. 1915 mov.l 0x4(%sp),0x10(%sp) 1916 mov.l 0x0(%sp),0xc(%sp) 1917 add.l &0xc,%sp 1918 1919 btst &0x7,(%sp) # is trace on? 1920 bne.w fu_trace_p # yes 1921 1922 bra.l _fpsp_done # exit to os 1923 1924fu_in_ena_p: 1925 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set 1926 bfffo %d0{&24:&8},%d0 # find highest priority exception 1927 bne.b fu_in_exc_p # at least one was set 1928 1929# 1930# No exceptions occurred that were also enabled. Now: 1931# 1932# if (OVFL && ovfl_disabled && inexact_enabled) { 1933# branch to _real_inex() (even if the result was exact!); 1934# } else { 1935# save the result in the proper fp reg (unless the op is fcmp or ftst); 1936# return; 1937# } 1938# 1939 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1940 beq.w fu_in_cont_p # no 1941 1942fu_in_ovflchk_p: 1943 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1944 beq.w fu_in_cont_p # no 1945 bra.w fu_in_exc_ovfl_p # do _real_inex() now 1946 1947# 1948# An exception occurred and that exception was enabled: 1949# 1950# shift enabled exception field into lo byte of d0; 1951# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) || 1952# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) { 1953# /* 1954# * this is the case where we must call _real_inex() now or else 1955# * there will be no other way to pass it the exceptional operand 1956# */ 1957# call _real_inex(); 1958# } else { 1959# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU; 1960# } 1961# 1962fu_in_exc_p: 1963 subi.l &24,%d0 # fix offset to be 0-8 1964 cmpi.b %d0,&0x6 # is exception INEX? (6 or 7) 1965 blt.b fu_in_exc_exit_p # no 1966 1967# the enabled exception was inexact 1968 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur? 1969 bne.w fu_in_exc_unfl_p # yes 1970 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur? 1971 bne.w fu_in_exc_ovfl_p # yes 1972 1973# here, we insert the correct fsave status value into the fsave frame for the 1974# corresponding exception. the operand in the fsave frame should be the original 1975# src operand. 1976# as a reminder for future predicted pain and agony, we are passing in fsave the 1977# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs. 1978# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!! 1979fu_in_exc_exit_p: 1980 btst &0x5,EXC_SR(%a6) # user or supervisor? 1981 bne.w fu_in_exc_exit_s_p # supervisor 1982 1983 mov.l EXC_A7(%a6),%a0 # update user a7 1984 mov.l %a0,%usp 1985 1986fu_in_exc_exit_cont_p: 1987 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6) 1988 1989 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1990 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1991 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1992 1993 frestore FP_SRC(%a6) # restore src op 1994 1995 unlk %a6 1996 1997 btst &0x7,(%sp) # is trace enabled? 1998 bne.w fu_trace_p # yes 1999 2000 bra.l _fpsp_done 2001 2002tbl_except_p: 2003 short 0xe000,0xe006,0xe004,0xe005 2004 short 0xe003,0xe002,0xe001,0xe001 2005 2006fu_in_exc_ovfl_p: 2007 mov.w &0x3,%d0 2008 bra.w fu_in_exc_exit_p 2009 2010fu_in_exc_unfl_p: 2011 mov.w &0x4,%d0 2012 bra.w fu_in_exc_exit_p 2013 2014fu_in_exc_exit_s_p: 2015 btst &mia7_bit,SPCOND_FLG(%a6) 2016 beq.b fu_in_exc_exit_cont_p 2017 2018 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6) 2019 2020 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2021 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2022 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2023 2024 frestore FP_SRC(%a6) # restore src op 2025 2026 unlk %a6 # unravel stack frame 2027 2028# shift stack frame "up". who cares about <ea> field. 2029 mov.l 0x4(%sp),0x10(%sp) 2030 mov.l 0x0(%sp),0xc(%sp) 2031 add.l &0xc,%sp 2032 2033 btst &0x7,(%sp) # is trace on? 2034 bne.b fu_trace_p # yes 2035 2036 bra.l _fpsp_done # exit to os 2037 2038# 2039# The opclass two PACKED instruction that took an "Unimplemented Data Type" 2040# exception was being traced. Make the "current" PC the FPIAR and put it in the 2041# trace stack frame then jump to _real_trace(). 2042# 2043# UNSUPP FRAME TRACE FRAME 2044# ***************** ***************** 2045# * EA * * Current * 2046# * * * PC * 2047# ***************** ***************** 2048# * 0x2 * 0x0dc * * 0x2 * 0x024 * 2049# ***************** ***************** 2050# * Next * * Next * 2051# * PC * * PC * 2052# ***************** ***************** 2053# * SR * * SR * 2054# ***************** ***************** 2055fu_trace_p: 2056 mov.w &0x2024,0x6(%sp) 2057 fmov.l %fpiar,0x8(%sp) 2058 2059 bra.l _real_trace 2060 2061######################################################### 2062######################################################### 2063fu_out_pack: 2064 2065 2066# I'm not sure at this point what FPSR bits are valid for this instruction. 2067# so, since the emulation routines re-create them anyways, zero exception field. 2068# fmove out doesn't affect ccodes. 2069 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 2070 2071 fmov.l &0x0,%fpcr # zero current control regs 2072 fmov.l &0x0,%fpsr 2073 2074 bfextu EXC_CMDREG(%a6){&6:&3},%d0 2075 bsr.l load_fpn1 2076 2077# unlike other opclass 3, unimplemented data type exceptions, packed must be 2078# able to detect all operand types. 2079 lea FP_SRC(%a6),%a0 2080 bsr.l set_tag_x # tag the operand type 2081 cmpi.b %d0,&UNNORM # is operand an UNNORM? 2082 bne.b fu_op2_p # no 2083 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 2084 2085fu_op2_p: 2086 mov.b %d0,STAG(%a6) # save src optype tag 2087 2088 clr.l %d0 2089 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 2090 2091 lea FP_SRC(%a6),%a0 # pass ptr to src operand 2092 2093 mov.l (%a6),EXC_A6(%a6) # in case a6 changes 2094 bsr.l fout # call fmove out routine 2095 2096# Exceptions in order of precedence: 2097# BSUN : no 2098# SNAN : yes 2099# OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits)) 2100# OVFL : no 2101# UNFL : no 2102# DZ : no 2103# INEX2 : yes 2104# INEX1 : no 2105 2106# determine the highest priority exception(if any) set by the 2107# emulation routine that has also been enabled by the user. 2108 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 2109 bne.w fu_out_ena_p # some are enabled 2110 2111fu_out_exit_p: 2112 mov.l EXC_A6(%a6),(%a6) # restore a6 2113 2114 btst &0x5,EXC_SR(%a6) # user or supervisor? 2115 bne.b fu_out_exit_s_p # supervisor 2116 2117 mov.l EXC_A7(%a6),%a0 # update user a7 2118 mov.l %a0,%usp 2119 2120fu_out_exit_cont_p: 2121 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2122 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2123 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2124 2125 unlk %a6 # unravel stack frame 2126 2127 btst &0x7,(%sp) # is trace on? 2128 bne.w fu_trace_p # yes 2129 2130 bra.l _fpsp_done # exit to os 2131 2132# the exception occurred in supervisor mode. check to see if the 2133# addressing mode was -(a7). if so, we'll need to shift the 2134# stack frame "down". 2135fu_out_exit_s_p: 2136 btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7) 2137 beq.b fu_out_exit_cont_p # no 2138 2139 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2140 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2141 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2142 2143 mov.l (%a6),%a6 # restore frame pointer 2144 2145 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2146 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2147 2148# now, copy the result to the proper place on the stack 2149 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 2150 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 2151 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 2152 2153 add.l &LOCAL_SIZE-0x8,%sp 2154 2155 btst &0x7,(%sp) 2156 bne.w fu_trace_p 2157 2158 bra.l _fpsp_done 2159 2160fu_out_ena_p: 2161 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 2162 bfffo %d0{&24:&8},%d0 # find highest priority exception 2163 beq.w fu_out_exit_p 2164 2165 mov.l EXC_A6(%a6),(%a6) # restore a6 2166 2167# an exception occurred and that exception was enabled. 2168# the only exception possible on packed move out are INEX, OPERR, and SNAN. 2169fu_out_exc_p: 2170 cmpi.b %d0,&0x1a 2171 bgt.w fu_inex_p2 2172 beq.w fu_operr_p 2173 2174fu_snan_p: 2175 btst &0x5,EXC_SR(%a6) 2176 bne.b fu_snan_s_p 2177 2178 mov.l EXC_A7(%a6),%a0 2179 mov.l %a0,%usp 2180 bra.w fu_snan 2181 2182fu_snan_s_p: 2183 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2184 bne.w fu_snan 2185 2186# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2187# the strategy is to move the exception frame "down" 12 bytes. then, we 2188# can store the default result where the exception frame was. 2189 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2190 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2191 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2192 2193 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0 2194 mov.w &0xe006,2+FP_SRC(%a6) # set fsave status 2195 2196 frestore FP_SRC(%a6) # restore src operand 2197 2198 mov.l (%a6),%a6 # restore frame pointer 2199 2200 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2201 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2202 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2203 2204# now, we copy the default result to its proper location 2205 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2206 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2207 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2208 2209 add.l &LOCAL_SIZE-0x8,%sp 2210 2211 2212 bra.l _real_snan 2213 2214fu_operr_p: 2215 btst &0x5,EXC_SR(%a6) 2216 bne.w fu_operr_p_s 2217 2218 mov.l EXC_A7(%a6),%a0 2219 mov.l %a0,%usp 2220 bra.w fu_operr 2221 2222fu_operr_p_s: 2223 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2224 bne.w fu_operr 2225 2226# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2227# the strategy is to move the exception frame "down" 12 bytes. then, we 2228# can store the default result where the exception frame was. 2229 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2230 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2231 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2232 2233 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0 2234 mov.w &0xe004,2+FP_SRC(%a6) # set fsave status 2235 2236 frestore FP_SRC(%a6) # restore src operand 2237 2238 mov.l (%a6),%a6 # restore frame pointer 2239 2240 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2241 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2242 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2243 2244# now, we copy the default result to its proper location 2245 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2246 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2247 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2248 2249 add.l &LOCAL_SIZE-0x8,%sp 2250 2251 2252 bra.l _real_operr 2253 2254fu_inex_p2: 2255 btst &0x5,EXC_SR(%a6) 2256 bne.w fu_inex_s_p2 2257 2258 mov.l EXC_A7(%a6),%a0 2259 mov.l %a0,%usp 2260 bra.w fu_inex 2261 2262fu_inex_s_p2: 2263 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2264 bne.w fu_inex 2265 2266# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2267# the strategy is to move the exception frame "down" 12 bytes. then, we 2268# can store the default result where the exception frame was. 2269 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2270 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2271 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2272 2273 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4 2274 mov.w &0xe001,2+FP_SRC(%a6) # set fsave status 2275 2276 frestore FP_SRC(%a6) # restore src operand 2277 2278 mov.l (%a6),%a6 # restore frame pointer 2279 2280 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2281 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2282 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2283 2284# now, we copy the default result to its proper location 2285 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2286 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2287 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2288 2289 add.l &LOCAL_SIZE-0x8,%sp 2290 2291 2292 bra.l _real_inex 2293 2294######################################################################### 2295 2296# 2297# if we're stuffing a source operand back into an fsave frame then we 2298# have to make sure that for single or double source operands that the 2299# format stuffed is as weird as the hardware usually makes it. 2300# 2301 global funimp_skew 2302funimp_skew: 2303 bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier 2304 cmpi.b %d0,&0x1 # was src sgl? 2305 beq.b funimp_skew_sgl # yes 2306 cmpi.b %d0,&0x5 # was src dbl? 2307 beq.b funimp_skew_dbl # yes 2308 rts 2309 2310funimp_skew_sgl: 2311 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent 2312 andi.w &0x7fff,%d0 # strip sign 2313 beq.b funimp_skew_sgl_not 2314 cmpi.w %d0,&0x3f80 2315 bgt.b funimp_skew_sgl_not 2316 neg.w %d0 # make exponent negative 2317 addi.w &0x3f81,%d0 # find amt to shift 2318 mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man) 2319 lsr.l %d0,%d1 # shift it 2320 bset &31,%d1 # set j-bit 2321 mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man) 2322 andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent 2323 ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent 2324funimp_skew_sgl_not: 2325 rts 2326 2327funimp_skew_dbl: 2328 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent 2329 andi.w &0x7fff,%d0 # strip sign 2330 beq.b funimp_skew_dbl_not 2331 cmpi.w %d0,&0x3c00 2332 bgt.b funimp_skew_dbl_not 2333 2334 tst.b FP_SRC_EX(%a6) # make "internal format" 2335 smi.b 0x2+FP_SRC(%a6) 2336 mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign 2337 clr.l %d0 # clear g,r,s 2338 lea FP_SRC(%a6),%a0 # pass ptr to src op 2339 mov.w &0x3c01,%d1 # pass denorm threshold 2340 bsr.l dnrm_lp # denorm it 2341 mov.w &0x3c00,%d0 # new exponent 2342 tst.b 0x2+FP_SRC(%a6) # is sign set? 2343 beq.b fss_dbl_denorm_done # no 2344 bset &15,%d0 # set sign 2345fss_dbl_denorm_done: 2346 bset &0x7,FP_SRC_HI(%a6) # set j-bit 2347 mov.w %d0,FP_SRC_EX(%a6) # insert new exponent 2348funimp_skew_dbl_not: 2349 rts 2350 2351######################################################################### 2352 global _mem_write2 2353_mem_write2: 2354 btst &0x5,EXC_SR(%a6) 2355 beq.l _dmem_write 2356 mov.l 0x0(%a0),FP_DST_EX(%a6) 2357 mov.l 0x4(%a0),FP_DST_HI(%a6) 2358 mov.l 0x8(%a0),FP_DST_LO(%a6) 2359 clr.l %d1 2360 rts 2361 2362######################################################################### 2363# XDEF **************************************************************** # 2364# _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented # 2365# effective address" exception. # 2366# # 2367# This handler should be the first code executed upon taking the # 2368# FP Unimplemented Effective Address exception in an operating # 2369# system. # 2370# # 2371# XREF **************************************************************** # 2372# _imem_read_long() - read instruction longword # 2373# fix_skewed_ops() - adjust src operand in fsave frame # 2374# set_tag_x() - determine optype of src/dst operands # 2375# store_fpreg() - store opclass 0 or 2 result to FP regfile # 2376# unnorm_fix() - change UNNORM operands to NORM or ZERO # 2377# load_fpn2() - load dst operand from FP regfile # 2378# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 2379# decbin() - convert packed data to FP binary data # 2380# _real_fpu_disabled() - "callout" for "FPU disabled" exception # 2381# _real_access() - "callout" for access error exception # 2382# _mem_read() - read extended immediate operand from memory # 2383# _fpsp_done() - "callout" for exit; work all done # 2384# _real_trace() - "callout" for Trace enabled exception # 2385# fmovm_dynamic() - emulate dynamic fmovm instruction # 2386# fmovm_ctrl() - emulate fmovm control instruction # 2387# # 2388# INPUT *************************************************************** # 2389# - The system stack contains the "Unimplemented <ea>" stk frame # 2390# # 2391# OUTPUT ************************************************************** # 2392# If access error: # 2393# - The system stack is changed to an access error stack frame # 2394# If FPU disabled: # 2395# - The system stack is changed to an FPU disabled stack frame # 2396# If Trace exception enabled: # 2397# - The system stack is changed to a Trace exception stack frame # 2398# Else: (normal case) # 2399# - None (correct result has been stored as appropriate) # 2400# # 2401# ALGORITHM *********************************************************** # 2402# This exception handles 3 types of operations: # 2403# (1) FP Instructions using extended precision or packed immediate # 2404# addressing mode. # 2405# (2) The "fmovm.x" instruction w/ dynamic register specification. # 2406# (3) The "fmovm.l" instruction w/ 2 or 3 control registers. # 2407# # 2408# For immediate data operations, the data is read in w/ a # 2409# _mem_read() "callout", converted to FP binary (if packed), and used # 2410# as the source operand to the instruction specified by the instruction # 2411# word. If no FP exception should be reported ads a result of the # 2412# emulation, then the result is stored to the destination register and # 2413# the handler exits through _fpsp_done(). If an enabled exc has been # 2414# signalled as a result of emulation, then an fsave state frame # 2415# corresponding to the FP exception type must be entered into the 060 # 2416# FPU before exiting. In either the enabled or disabled cases, we # 2417# must also check if a Trace exception is pending, in which case, we # 2418# must create a Trace exception stack frame from the current exception # 2419# stack frame. If no Trace is pending, we simply exit through # 2420# _fpsp_done(). # 2421# For "fmovm.x", call the routine fmovm_dynamic() which will # 2422# decode and emulate the instruction. No FP exceptions can be pending # 2423# as a result of this operation emulation. A Trace exception can be # 2424# pending, though, which means the current stack frame must be changed # 2425# to a Trace stack frame and an exit made through _real_trace(). # 2426# For the case of "fmovm.x Dn,-(a7)", where the offending instruction # 2427# was executed from supervisor mode, this handler must store the FP # 2428# register file values to the system stack by itself since # 2429# fmovm_dynamic() can't handle this. A normal exit is made through # 2430# fpsp_done(). # 2431# For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. # 2432# Again, a Trace exception may be pending and an exit made through # 2433# _real_trace(). Else, a normal exit is made through _fpsp_done(). # 2434# # 2435# Before any of the above is attempted, it must be checked to # 2436# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken # 2437# before the "FPU disabled" exception, but the "FPU disabled" exception # 2438# has higher priority, we check the disabled bit in the PCR. If set, # 2439# then we must create an 8 word "FPU disabled" exception stack frame # 2440# from the current 4 word exception stack frame. This includes # 2441# reproducing the effective address of the instruction to put on the # 2442# new stack frame. # 2443# # 2444# In the process of all emulation work, if a _mem_read() # 2445# "callout" returns a failing result indicating an access error, then # 2446# we must create an access error stack frame from the current stack # 2447# frame. This information includes a faulting address and a fault- # 2448# status-longword. These are created within this handler. # 2449# # 2450######################################################################### 2451 2452 global _fpsp_effadd 2453_fpsp_effadd: 2454 2455# This exception type takes priority over the "Line F Emulator" 2456# exception. Therefore, the FPU could be disabled when entering here. 2457# So, we must check to see if it's disabled and handle that case separately. 2458 mov.l %d0,-(%sp) # save d0 2459 movc %pcr,%d0 # load proc cr 2460 btst &0x1,%d0 # is FPU disabled? 2461 bne.w iea_disabled # yes 2462 mov.l (%sp)+,%d0 # restore d0 2463 2464 link %a6,&-LOCAL_SIZE # init stack frame 2465 2466 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2467 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 2468 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 2469 2470# PC of instruction that took the exception is the PC in the frame 2471 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) 2472 2473 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 2474 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 2475 bsr.l _imem_read_long # fetch the instruction words 2476 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 2477 2478######################################################################### 2479 2480 tst.w %d0 # is operation fmovem? 2481 bmi.w iea_fmovm # yes 2482 2483# 2484# here, we will have: 2485# fabs fdabs fsabs facos fmod 2486# fadd fdadd fsadd fasin frem 2487# fcmp fatan fscale 2488# fdiv fddiv fsdiv fatanh fsin 2489# fint fcos fsincos 2490# fintrz fcosh fsinh 2491# fmove fdmove fsmove fetox ftan 2492# fmul fdmul fsmul fetoxm1 ftanh 2493# fneg fdneg fsneg fgetexp ftentox 2494# fsgldiv fgetman ftwotox 2495# fsglmul flog10 2496# fsqrt flog2 2497# fsub fdsub fssub flogn 2498# ftst flognp1 2499# which can all use f<op>.{x,p} 2500# so, now it's immediate data extended precision AND PACKED FORMAT! 2501# 2502iea_op: 2503 andi.l &0x00ff00ff,USER_FPSR(%a6) 2504 2505 btst &0xa,%d0 # is src fmt x or p? 2506 bne.b iea_op_pack # packed 2507 2508 2509 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data> 2510 lea FP_SRC(%a6),%a1 # pass: ptr to super addr 2511 mov.l &0xc,%d0 # pass: 12 bytes 2512 bsr.l _imem_read # read extended immediate 2513 2514 tst.l %d1 # did ifetch fail? 2515 bne.w iea_iacc # yes 2516 2517 bra.b iea_op_setsrc 2518 2519iea_op_pack: 2520 2521 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data> 2522 lea FP_SRC(%a6),%a1 # pass: ptr to super dst 2523 mov.l &0xc,%d0 # pass: 12 bytes 2524 bsr.l _imem_read # read packed operand 2525 2526 tst.l %d1 # did ifetch fail? 2527 bne.w iea_iacc # yes 2528 2529# The packed operand is an INF or a NAN if the exponent field is all ones. 2530 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp 2531 cmpi.w %d0,&0x7fff # INF or NAN? 2532 beq.b iea_op_setsrc # operand is an INF or NAN 2533 2534# The packed operand is a zero if the mantissa is all zero, else it's 2535# a normal packed op. 2536 mov.b 3+FP_SRC(%a6),%d0 # get byte 4 2537 andi.b &0x0f,%d0 # clear all but last nybble 2538 bne.b iea_op_gp_not_spec # not a zero 2539 tst.l FP_SRC_HI(%a6) # is lw 2 zero? 2540 bne.b iea_op_gp_not_spec # not a zero 2541 tst.l FP_SRC_LO(%a6) # is lw 3 zero? 2542 beq.b iea_op_setsrc # operand is a ZERO 2543iea_op_gp_not_spec: 2544 lea FP_SRC(%a6),%a0 # pass: ptr to packed op 2545 bsr.l decbin # convert to extended 2546 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop 2547 2548iea_op_setsrc: 2549 addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer 2550 2551# FP_SRC now holds the src operand. 2552 lea FP_SRC(%a6),%a0 # pass: ptr to src op 2553 bsr.l set_tag_x # tag the operand type 2554 mov.b %d0,STAG(%a6) # could be ANYTHING!!! 2555 cmpi.b %d0,&UNNORM # is operand an UNNORM? 2556 bne.b iea_op_getdst # no 2557 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO 2558 mov.b %d0,STAG(%a6) # set new optype tag 2559iea_op_getdst: 2560 clr.b STORE_FLG(%a6) # clear "store result" boolean 2561 2562 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 2563 beq.b iea_op_extract # monadic 2564 btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp? 2565 bne.b iea_op_spec # yes 2566 2567iea_op_loaddst: 2568 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno 2569 bsr.l load_fpn2 # load dst operand 2570 2571 lea FP_DST(%a6),%a0 # pass: ptr to dst op 2572 bsr.l set_tag_x # tag the operand type 2573 mov.b %d0,DTAG(%a6) # could be ANYTHING!!! 2574 cmpi.b %d0,&UNNORM # is operand an UNNORM? 2575 bne.b iea_op_extract # no 2576 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO 2577 mov.b %d0,DTAG(%a6) # set new optype tag 2578 bra.b iea_op_extract 2579 2580# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic 2581iea_op_spec: 2582 btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos? 2583 beq.b iea_op_extract # yes 2584# now, we're left with ftst and fcmp. so, first let's tag them so that they don't 2585# store a result. then, only fcmp will branch back and pick up a dst operand. 2586 st STORE_FLG(%a6) # don't store a final result 2587 btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp? 2588 beq.b iea_op_loaddst # yes 2589 2590iea_op_extract: 2591 clr.l %d0 2592 mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec 2593 2594 mov.b 1+EXC_CMDREG(%a6),%d1 2595 andi.w &0x007f,%d1 # extract extension 2596 2597 fmov.l &0x0,%fpcr 2598 fmov.l &0x0,%fpsr 2599 2600 lea FP_SRC(%a6),%a0 2601 lea FP_DST(%a6),%a1 2602 2603 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 2604 jsr (tbl_unsupp.l,%pc,%d1.l*1) 2605 2606# 2607# Exceptions in order of precedence: 2608# BSUN : none 2609# SNAN : all operations 2610# OPERR : all reg-reg or mem-reg operations that can normally operr 2611# OVFL : same as OPERR 2612# UNFL : same as OPERR 2613# DZ : same as OPERR 2614# INEX2 : same as OPERR 2615# INEX1 : all packed immediate operations 2616# 2617 2618# we determine the highest priority exception(if any) set by the 2619# emulation routine that has also been enabled by the user. 2620 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 2621 bne.b iea_op_ena # some are enabled 2622 2623# now, we save the result, unless, of course, the operation was ftst or fcmp. 2624# these don't save results. 2625iea_op_save: 2626 tst.b STORE_FLG(%a6) # does this op store a result? 2627 bne.b iea_op_exit1 # exit with no frestore 2628 2629iea_op_store: 2630 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno 2631 bsr.l store_fpreg # store the result 2632 2633iea_op_exit1: 2634 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC" 2635 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame 2636 2637 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2638 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2639 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2640 2641 unlk %a6 # unravel the frame 2642 2643 btst &0x7,(%sp) # is trace on? 2644 bne.w iea_op_trace # yes 2645 2646 bra.l _fpsp_done # exit to os 2647 2648iea_op_ena: 2649 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set 2650 bfffo %d0{&24:&8},%d0 # find highest priority exception 2651 bne.b iea_op_exc # at least one was set 2652 2653# no exception occurred. now, did a disabled, exact overflow occur with inexact 2654# enabled? if so, then we have to stuff an overflow frame into the FPU. 2655 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? 2656 beq.b iea_op_save 2657 2658iea_op_ovfl: 2659 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled? 2660 beq.b iea_op_store # no 2661 bra.b iea_op_exc_ovfl # yes 2662 2663# an enabled exception occurred. we have to insert the exception type back into 2664# the machine. 2665iea_op_exc: 2666 subi.l &24,%d0 # fix offset to be 0-8 2667 cmpi.b %d0,&0x6 # is exception INEX? 2668 bne.b iea_op_exc_force # no 2669 2670# the enabled exception was inexact. so, if it occurs with an overflow 2671# or underflow that was disabled, then we have to force an overflow or 2672# underflow frame. 2673 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? 2674 bne.b iea_op_exc_ovfl # yes 2675 btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur? 2676 bne.b iea_op_exc_unfl # yes 2677 2678iea_op_exc_force: 2679 mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) 2680 bra.b iea_op_exit2 # exit with frestore 2681 2682tbl_iea_except: 2683 short 0xe002, 0xe006, 0xe004, 0xe005 2684 short 0xe003, 0xe002, 0xe001, 0xe001 2685 2686iea_op_exc_ovfl: 2687 mov.w &0xe005,2+FP_SRC(%a6) 2688 bra.b iea_op_exit2 2689 2690iea_op_exc_unfl: 2691 mov.w &0xe003,2+FP_SRC(%a6) 2692 2693iea_op_exit2: 2694 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC" 2695 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame 2696 2697 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2698 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2699 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2700 2701 frestore FP_SRC(%a6) # restore exceptional state 2702 2703 unlk %a6 # unravel the frame 2704 2705 btst &0x7,(%sp) # is trace on? 2706 bne.b iea_op_trace # yes 2707 2708 bra.l _fpsp_done # exit to os 2709 2710# 2711# The opclass two instruction that took an "Unimplemented Effective Address" 2712# exception was being traced. Make the "current" PC the FPIAR and put it in 2713# the trace stack frame then jump to _real_trace(). 2714# 2715# UNIMP EA FRAME TRACE FRAME 2716# ***************** ***************** 2717# * 0x0 * 0x0f0 * * Current * 2718# ***************** * PC * 2719# * Current * ***************** 2720# * PC * * 0x2 * 0x024 * 2721# ***************** ***************** 2722# * SR * * Next * 2723# ***************** * PC * 2724# ***************** 2725# * SR * 2726# ***************** 2727iea_op_trace: 2728 mov.l (%sp),-(%sp) # shift stack frame "down" 2729 mov.w 0x8(%sp),0x4(%sp) 2730 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 2731 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 2732 2733 bra.l _real_trace 2734 2735######################################################################### 2736iea_fmovm: 2737 btst &14,%d0 # ctrl or data reg 2738 beq.w iea_fmovm_ctrl 2739 2740iea_fmovm_data: 2741 2742 btst &0x5,EXC_SR(%a6) # user or supervisor mode 2743 bne.b iea_fmovm_data_s 2744 2745iea_fmovm_data_u: 2746 mov.l %usp,%a0 2747 mov.l %a0,EXC_A7(%a6) # store current a7 2748 bsr.l fmovm_dynamic # do dynamic fmovm 2749 mov.l EXC_A7(%a6),%a0 # load possibly new a7 2750 mov.l %a0,%usp # update usp 2751 bra.w iea_fmovm_exit 2752 2753iea_fmovm_data_s: 2754 clr.b SPCOND_FLG(%a6) 2755 lea 0x2+EXC_VOFF(%a6),%a0 2756 mov.l %a0,EXC_A7(%a6) 2757 bsr.l fmovm_dynamic # do dynamic fmovm 2758 2759 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2760 beq.w iea_fmovm_data_predec 2761 cmpi.b SPCOND_FLG(%a6),&mia7_flg 2762 bne.w iea_fmovm_exit 2763 2764# right now, d0 = the size. 2765# the data has been fetched from the supervisor stack, but we have not 2766# incremented the stack pointer by the appropriate number of bytes. 2767# do it here. 2768iea_fmovm_data_postinc: 2769 btst &0x7,EXC_SR(%a6) 2770 bne.b iea_fmovm_data_pi_trace 2771 2772 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0) 2773 mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0) 2774 mov.w &0x00f0,(EXC_VOFF,%a6,%d0) 2775 2776 lea (EXC_SR,%a6,%d0),%a0 2777 mov.l %a0,EXC_SR(%a6) 2778 2779 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2780 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2781 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2782 2783 unlk %a6 2784 mov.l (%sp)+,%sp 2785 bra.l _fpsp_done 2786 2787iea_fmovm_data_pi_trace: 2788 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0) 2789 mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0) 2790 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0) 2791 mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0) 2792 2793 lea (EXC_SR-0x4,%a6,%d0),%a0 2794 mov.l %a0,EXC_SR(%a6) 2795 2796 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2797 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2798 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2799 2800 unlk %a6 2801 mov.l (%sp)+,%sp 2802 bra.l _real_trace 2803 2804# right now, d1 = size and d0 = the strg. 2805iea_fmovm_data_predec: 2806 mov.b %d1,EXC_VOFF(%a6) # store strg 2807 mov.b %d0,0x1+EXC_VOFF(%a6) # store size 2808 2809 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2810 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2811 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2812 2813 mov.l (%a6),-(%sp) # make a copy of a6 2814 mov.l %d0,-(%sp) # save d0 2815 mov.l %d1,-(%sp) # save d1 2816 mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC 2817 2818 clr.l %d0 2819 mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size 2820 neg.l %d0 # get negative of size 2821 2822 btst &0x7,EXC_SR(%a6) # is trace enabled? 2823 beq.b iea_fmovm_data_p2 2824 2825 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0) 2826 mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0) 2827 mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0) 2828 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0) 2829 2830 pea (%a6,%d0) # create final sp 2831 bra.b iea_fmovm_data_p3 2832 2833iea_fmovm_data_p2: 2834 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0) 2835 mov.l (%sp)+,(EXC_PC,%a6,%d0) 2836 mov.w &0x00f0,(EXC_VOFF,%a6,%d0) 2837 2838 pea (0x4,%a6,%d0) # create final sp 2839 2840iea_fmovm_data_p3: 2841 clr.l %d1 2842 mov.b EXC_VOFF(%a6),%d1 # fetch strg 2843 2844 tst.b %d1 2845 bpl.b fm_1 2846 fmovm.x &0x80,(0x4+0x8,%a6,%d0) 2847 addi.l &0xc,%d0 2848fm_1: 2849 lsl.b &0x1,%d1 2850 bpl.b fm_2 2851 fmovm.x &0x40,(0x4+0x8,%a6,%d0) 2852 addi.l &0xc,%d0 2853fm_2: 2854 lsl.b &0x1,%d1 2855 bpl.b fm_3 2856 fmovm.x &0x20,(0x4+0x8,%a6,%d0) 2857 addi.l &0xc,%d0 2858fm_3: 2859 lsl.b &0x1,%d1 2860 bpl.b fm_4 2861 fmovm.x &0x10,(0x4+0x8,%a6,%d0) 2862 addi.l &0xc,%d0 2863fm_4: 2864 lsl.b &0x1,%d1 2865 bpl.b fm_5 2866 fmovm.x &0x08,(0x4+0x8,%a6,%d0) 2867 addi.l &0xc,%d0 2868fm_5: 2869 lsl.b &0x1,%d1 2870 bpl.b fm_6 2871 fmovm.x &0x04,(0x4+0x8,%a6,%d0) 2872 addi.l &0xc,%d0 2873fm_6: 2874 lsl.b &0x1,%d1 2875 bpl.b fm_7 2876 fmovm.x &0x02,(0x4+0x8,%a6,%d0) 2877 addi.l &0xc,%d0 2878fm_7: 2879 lsl.b &0x1,%d1 2880 bpl.b fm_end 2881 fmovm.x &0x01,(0x4+0x8,%a6,%d0) 2882fm_end: 2883 mov.l 0x4(%sp),%d1 2884 mov.l 0x8(%sp),%d0 2885 mov.l 0xc(%sp),%a6 2886 mov.l (%sp)+,%sp 2887 2888 btst &0x7,(%sp) # is trace enabled? 2889 beq.l _fpsp_done 2890 bra.l _real_trace 2891 2892######################################################################### 2893iea_fmovm_ctrl: 2894 2895 bsr.l fmovm_ctrl # load ctrl regs 2896 2897iea_fmovm_exit: 2898 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2899 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2900 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2901 2902 btst &0x7,EXC_SR(%a6) # is trace on? 2903 bne.b iea_fmovm_trace # yes 2904 2905 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC 2906 2907 unlk %a6 # unravel the frame 2908 2909 bra.l _fpsp_done # exit to os 2910 2911# 2912# The control reg instruction that took an "Unimplemented Effective Address" 2913# exception was being traced. The "Current PC" for the trace frame is the 2914# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR. 2915# After fixing the stack frame, jump to _real_trace(). 2916# 2917# UNIMP EA FRAME TRACE FRAME 2918# ***************** ***************** 2919# * 0x0 * 0x0f0 * * Current * 2920# ***************** * PC * 2921# * Current * ***************** 2922# * PC * * 0x2 * 0x024 * 2923# ***************** ***************** 2924# * SR * * Next * 2925# ***************** * PC * 2926# ***************** 2927# * SR * 2928# ***************** 2929# this ain't a pretty solution, but it works: 2930# -restore a6 (not with unlk) 2931# -shift stack frame down over where old a6 used to be 2932# -add LOCAL_SIZE to stack pointer 2933iea_fmovm_trace: 2934 mov.l (%a6),%a6 # restore frame pointer 2935 mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp) 2936 mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp) 2937 mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp) 2938 mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024 2939 add.l &LOCAL_SIZE,%sp # clear stack frame 2940 2941 bra.l _real_trace 2942 2943######################################################################### 2944# The FPU is disabled and so we should really have taken the "Line 2945# F Emulator" exception. So, here we create an 8-word stack frame 2946# from our 4-word stack frame. This means we must calculate the length 2947# the faulting instruction to get the "next PC". This is trivial for 2948# immediate operands but requires some extra work for fmovm dynamic 2949# which can use most addressing modes. 2950iea_disabled: 2951 mov.l (%sp)+,%d0 # restore d0 2952 2953 link %a6,&-LOCAL_SIZE # init stack frame 2954 2955 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2956 2957# PC of instruction that took the exception is the PC in the frame 2958 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) 2959 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 2960 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 2961 bsr.l _imem_read_long # fetch the instruction words 2962 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 2963 2964 tst.w %d0 # is instr fmovm? 2965 bmi.b iea_dis_fmovm # yes 2966# instruction is using an extended precision immediate operand. Therefore, 2967# the total instruction length is 16 bytes. 2968iea_dis_immed: 2969 mov.l &0x10,%d0 # 16 bytes of instruction 2970 bra.b iea_dis_cont 2971iea_dis_fmovm: 2972 btst &0xe,%d0 # is instr fmovm ctrl 2973 bne.b iea_dis_fmovm_data # no 2974# the instruction is a fmovm.l with 2 or 3 registers. 2975 bfextu %d0{&19:&3},%d1 2976 mov.l &0xc,%d0 2977 cmpi.b %d1,&0x7 # move all regs? 2978 bne.b iea_dis_cont 2979 addq.l &0x4,%d0 2980 bra.b iea_dis_cont 2981# the instruction is an fmovm.x dynamic which can use many addressing 2982# modes and thus can have several different total instruction lengths. 2983# call fmovm_calc_ea which will go through the ea calc process and, 2984# as a by-product, will tell us how long the instruction is. 2985iea_dis_fmovm_data: 2986 clr.l %d0 2987 bsr.l fmovm_calc_ea 2988 mov.l EXC_EXTWPTR(%a6),%d0 2989 sub.l EXC_PC(%a6),%d0 2990iea_dis_cont: 2991 mov.w %d0,EXC_VOFF(%a6) # store stack shift value 2992 2993 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2994 2995 unlk %a6 2996 2997# here, we actually create the 8-word frame from the 4-word frame, 2998# with the "next PC" as additional info. 2999# the <ea> field is let as undefined. 3000 subq.l &0x8,%sp # make room for new stack 3001 mov.l %d0,-(%sp) # save d0 3002 mov.w 0xc(%sp),0x4(%sp) # move SR 3003 mov.l 0xe(%sp),0x6(%sp) # move Current PC 3004 clr.l %d0 3005 mov.w 0x12(%sp),%d0 3006 mov.l 0x6(%sp),0x10(%sp) # move Current PC 3007 add.l %d0,0x6(%sp) # make Next PC 3008 mov.w &0x402c,0xa(%sp) # insert offset,frame format 3009 mov.l (%sp)+,%d0 # restore d0 3010 3011 bra.l _real_fpu_disabled 3012 3013########## 3014 3015iea_iacc: 3016 movc %pcr,%d0 3017 btst &0x1,%d0 3018 bne.b iea_iacc_cont 3019 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3020 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack 3021iea_iacc_cont: 3022 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3023 3024 unlk %a6 3025 3026 subq.w &0x8,%sp # make stack frame bigger 3027 mov.l 0x8(%sp),(%sp) # store SR,hi(PC) 3028 mov.w 0xc(%sp),0x4(%sp) # store lo(PC) 3029 mov.w &0x4008,0x6(%sp) # store voff 3030 mov.l 0x2(%sp),0x8(%sp) # store ea 3031 mov.l &0x09428001,0xc(%sp) # store fslw 3032 3033iea_acc_done: 3034 btst &0x5,(%sp) # user or supervisor mode? 3035 beq.b iea_acc_done2 # user 3036 bset &0x2,0xd(%sp) # set supervisor TM bit 3037 3038iea_acc_done2: 3039 bra.l _real_access 3040 3041iea_dacc: 3042 lea -LOCAL_SIZE(%a6),%sp 3043 3044 movc %pcr,%d1 3045 btst &0x1,%d1 3046 bne.b iea_dacc_cont 3047 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack 3048 fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs 3049iea_dacc_cont: 3050 mov.l (%a6),%a6 3051 3052 mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp) 3053 mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp) 3054 mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp) 3055 mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp) 3056 mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp) 3057 mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp) 3058 3059 movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1 3060 add.w &LOCAL_SIZE-0x4,%sp 3061 3062 bra.b iea_acc_done 3063 3064######################################################################### 3065# XDEF **************************************************************** # 3066# _fpsp_operr(): 060FPSP entry point for FP Operr exception. # 3067# # 3068# This handler should be the first code executed upon taking the # 3069# FP Operand Error exception in an operating system. # 3070# # 3071# XREF **************************************************************** # 3072# _imem_read_long() - read instruction longword # 3073# fix_skewed_ops() - adjust src operand in fsave frame # 3074# _real_operr() - "callout" to operating system operr handler # 3075# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) # 3076# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) # 3077# facc_out_{b,w,l}() - store to memory took access error (opcl 3) # 3078# # 3079# INPUT *************************************************************** # 3080# - The system stack contains the FP Operr exception frame # 3081# - The fsave frame contains the source operand # 3082# # 3083# OUTPUT ************************************************************** # 3084# No access error: # 3085# - The system stack is unchanged # 3086# - The fsave frame contains the adjusted src op for opclass 0,2 # 3087# # 3088# ALGORITHM *********************************************************** # 3089# In a system where the FP Operr exception is enabled, the goal # 3090# is to get to the handler specified at _real_operr(). But, on the 060, # 3091# for opclass zero and two instruction taking this exception, the # 3092# input operand in the fsave frame may be incorrect for some cases # 3093# and needs to be corrected. This handler calls fix_skewed_ops() to # 3094# do just this and then exits through _real_operr(). # 3095# For opclass 3 instructions, the 060 doesn't store the default # 3096# operr result out to memory or data register file as it should. # 3097# This code must emulate the move out before finally exiting through # 3098# _real_inex(). The move out, if to memory, is performed using # 3099# _mem_write() "callout" routines that may return a failing result. # 3100# In this special case, the handler must exit through facc_out() # 3101# which creates an access error stack frame from the current operr # 3102# stack frame. # 3103# # 3104######################################################################### 3105 3106 global _fpsp_operr 3107_fpsp_operr: 3108 3109 link.w %a6,&-LOCAL_SIZE # init stack frame 3110 3111 fsave FP_SRC(%a6) # grab the "busy" frame 3112 3113 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3114 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3115 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3116 3117# the FPIAR holds the "current PC" of the faulting instruction 3118 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3119 3120 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3121 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3122 bsr.l _imem_read_long # fetch the instruction words 3123 mov.l %d0,EXC_OPWORD(%a6) 3124 3125############################################################################## 3126 3127 btst &13,%d0 # is instr an fmove out? 3128 bne.b foperr_out # fmove out 3129 3130 3131# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3132# this would be the case for opclass two operations with a source infinity or 3133# denorm operand in the sgl or dbl format. NANs also become skewed, but can't 3134# cause an operr so we don't need to check for them here. 3135 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3136 bsr.l fix_skewed_ops # fix src op 3137 3138foperr_exit: 3139 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3140 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3141 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3142 3143 frestore FP_SRC(%a6) 3144 3145 unlk %a6 3146 bra.l _real_operr 3147 3148######################################################################## 3149 3150# 3151# the hardware does not save the default result to memory on enabled 3152# operand error exceptions. we do this here before passing control to 3153# the user operand error handler. 3154# 3155# byte, word, and long destination format operations can pass 3156# through here. we simply need to test the sign of the src 3157# operand and save the appropriate minimum or maximum integer value 3158# to the effective address as pointed to by the stacked effective address. 3159# 3160# although packed opclass three operations can take operand error 3161# exceptions, they won't pass through here since they are caught 3162# first by the unsupported data format exception handler. that handler 3163# sends them directly to _real_operr() if necessary. 3164# 3165foperr_out: 3166 3167 mov.w FP_SRC_EX(%a6),%d1 # fetch exponent 3168 andi.w &0x7fff,%d1 3169 cmpi.w %d1,&0x7fff 3170 bne.b foperr_out_not_qnan 3171# the operand is either an infinity or a QNAN. 3172 tst.l FP_SRC_LO(%a6) 3173 bne.b foperr_out_qnan 3174 mov.l FP_SRC_HI(%a6),%d1 3175 andi.l &0x7fffffff,%d1 3176 beq.b foperr_out_not_qnan 3177foperr_out_qnan: 3178 mov.l FP_SRC_HI(%a6),L_SCR1(%a6) 3179 bra.b foperr_out_jmp 3180 3181foperr_out_not_qnan: 3182 mov.l &0x7fffffff,%d1 3183 tst.b FP_SRC_EX(%a6) 3184 bpl.b foperr_out_not_qnan2 3185 addq.l &0x1,%d1 3186foperr_out_not_qnan2: 3187 mov.l %d1,L_SCR1(%a6) 3188 3189foperr_out_jmp: 3190 bfextu %d0{&19:&3},%d0 # extract dst format field 3191 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg 3192 mov.w (tbl_operr.b,%pc,%d0.w*2),%a0 3193 jmp (tbl_operr.b,%pc,%a0) 3194 3195tbl_operr: 3196 short foperr_out_l - tbl_operr # long word integer 3197 short tbl_operr - tbl_operr # sgl prec shouldn't happen 3198 short tbl_operr - tbl_operr # ext prec shouldn't happen 3199 short foperr_exit - tbl_operr # packed won't enter here 3200 short foperr_out_w - tbl_operr # word integer 3201 short tbl_operr - tbl_operr # dbl prec shouldn't happen 3202 short foperr_out_b - tbl_operr # byte integer 3203 short tbl_operr - tbl_operr # packed won't enter here 3204 3205foperr_out_b: 3206 mov.b L_SCR1(%a6),%d0 # load positive default result 3207 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3208 ble.b foperr_out_b_save_dn # yes 3209 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3210 bsr.l _dmem_write_byte # write the default result 3211 3212 tst.l %d1 # did dstore fail? 3213 bne.l facc_out_b # yes 3214 3215 bra.w foperr_exit 3216foperr_out_b_save_dn: 3217 andi.w &0x0007,%d1 3218 bsr.l store_dreg_b # store result to regfile 3219 bra.w foperr_exit 3220 3221foperr_out_w: 3222 mov.w L_SCR1(%a6),%d0 # load positive default result 3223 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3224 ble.b foperr_out_w_save_dn # yes 3225 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3226 bsr.l _dmem_write_word # write the default result 3227 3228 tst.l %d1 # did dstore fail? 3229 bne.l facc_out_w # yes 3230 3231 bra.w foperr_exit 3232foperr_out_w_save_dn: 3233 andi.w &0x0007,%d1 3234 bsr.l store_dreg_w # store result to regfile 3235 bra.w foperr_exit 3236 3237foperr_out_l: 3238 mov.l L_SCR1(%a6),%d0 # load positive default result 3239 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3240 ble.b foperr_out_l_save_dn # yes 3241 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3242 bsr.l _dmem_write_long # write the default result 3243 3244 tst.l %d1 # did dstore fail? 3245 bne.l facc_out_l # yes 3246 3247 bra.w foperr_exit 3248foperr_out_l_save_dn: 3249 andi.w &0x0007,%d1 3250 bsr.l store_dreg_l # store result to regfile 3251 bra.w foperr_exit 3252 3253######################################################################### 3254# XDEF **************************************************************** # 3255# _fpsp_snan(): 060FPSP entry point for FP SNAN exception. # 3256# # 3257# This handler should be the first code executed upon taking the # 3258# FP Signalling NAN exception in an operating system. # 3259# # 3260# XREF **************************************************************** # 3261# _imem_read_long() - read instruction longword # 3262# fix_skewed_ops() - adjust src operand in fsave frame # 3263# _real_snan() - "callout" to operating system SNAN handler # 3264# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) # 3265# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) # 3266# facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) # 3267# _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> # 3268# # 3269# INPUT *************************************************************** # 3270# - The system stack contains the FP SNAN exception frame # 3271# - The fsave frame contains the source operand # 3272# # 3273# OUTPUT ************************************************************** # 3274# No access error: # 3275# - The system stack is unchanged # 3276# - The fsave frame contains the adjusted src op for opclass 0,2 # 3277# # 3278# ALGORITHM *********************************************************** # 3279# In a system where the FP SNAN exception is enabled, the goal # 3280# is to get to the handler specified at _real_snan(). But, on the 060, # 3281# for opclass zero and two instructions taking this exception, the # 3282# input operand in the fsave frame may be incorrect for some cases # 3283# and needs to be corrected. This handler calls fix_skewed_ops() to # 3284# do just this and then exits through _real_snan(). # 3285# For opclass 3 instructions, the 060 doesn't store the default # 3286# SNAN result out to memory or data register file as it should. # 3287# This code must emulate the move out before finally exiting through # 3288# _real_snan(). The move out, if to memory, is performed using # 3289# _mem_write() "callout" routines that may return a failing result. # 3290# In this special case, the handler must exit through facc_out() # 3291# which creates an access error stack frame from the current SNAN # 3292# stack frame. # 3293# For the case of an extended precision opclass 3 instruction, # 3294# if the effective addressing mode was -() or ()+, then the address # 3295# register must get updated by calling _calc_ea_fout(). If the <ea> # 3296# was -(a7) from supervisor mode, then the exception frame currently # 3297# on the system stack must be carefully moved "down" to make room # 3298# for the operand being moved. # 3299# # 3300######################################################################### 3301 3302 global _fpsp_snan 3303_fpsp_snan: 3304 3305 link.w %a6,&-LOCAL_SIZE # init stack frame 3306 3307 fsave FP_SRC(%a6) # grab the "busy" frame 3308 3309 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3310 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3311 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3312 3313# the FPIAR holds the "current PC" of the faulting instruction 3314 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3315 3316 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3317 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3318 bsr.l _imem_read_long # fetch the instruction words 3319 mov.l %d0,EXC_OPWORD(%a6) 3320 3321############################################################################## 3322 3323 btst &13,%d0 # is instr an fmove out? 3324 bne.w fsnan_out # fmove out 3325 3326 3327# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3328# this would be the case for opclass two operations with a source infinity or 3329# denorm operand in the sgl or dbl format. NANs also become skewed and must be 3330# fixed here. 3331 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3332 bsr.l fix_skewed_ops # fix src op 3333 3334fsnan_exit: 3335 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3336 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3337 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3338 3339 frestore FP_SRC(%a6) 3340 3341 unlk %a6 3342 bra.l _real_snan 3343 3344######################################################################## 3345 3346# 3347# the hardware does not save the default result to memory on enabled 3348# snan exceptions. we do this here before passing control to 3349# the user snan handler. 3350# 3351# byte, word, long, and packed destination format operations can pass 3352# through here. since packed format operations already were handled by 3353# fpsp_unsupp(), then we need to do nothing else for them here. 3354# for byte, word, and long, we simply need to test the sign of the src 3355# operand and save the appropriate minimum or maximum integer value 3356# to the effective address as pointed to by the stacked effective address. 3357# 3358fsnan_out: 3359 3360 bfextu %d0{&19:&3},%d0 # extract dst format field 3361 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg 3362 mov.w (tbl_snan.b,%pc,%d0.w*2),%a0 3363 jmp (tbl_snan.b,%pc,%a0) 3364 3365tbl_snan: 3366 short fsnan_out_l - tbl_snan # long word integer 3367 short fsnan_out_s - tbl_snan # sgl prec shouldn't happen 3368 short fsnan_out_x - tbl_snan # ext prec shouldn't happen 3369 short tbl_snan - tbl_snan # packed needs no help 3370 short fsnan_out_w - tbl_snan # word integer 3371 short fsnan_out_d - tbl_snan # dbl prec shouldn't happen 3372 short fsnan_out_b - tbl_snan # byte integer 3373 short tbl_snan - tbl_snan # packed needs no help 3374 3375fsnan_out_b: 3376 mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN 3377 bset &6,%d0 # set SNAN bit 3378 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3379 ble.b fsnan_out_b_dn # yes 3380 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3381 bsr.l _dmem_write_byte # write the default result 3382 3383 tst.l %d1 # did dstore fail? 3384 bne.l facc_out_b # yes 3385 3386 bra.w fsnan_exit 3387fsnan_out_b_dn: 3388 andi.w &0x0007,%d1 3389 bsr.l store_dreg_b # store result to regfile 3390 bra.w fsnan_exit 3391 3392fsnan_out_w: 3393 mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN 3394 bset &14,%d0 # set SNAN bit 3395 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3396 ble.b fsnan_out_w_dn # yes 3397 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3398 bsr.l _dmem_write_word # write the default result 3399 3400 tst.l %d1 # did dstore fail? 3401 bne.l facc_out_w # yes 3402 3403 bra.w fsnan_exit 3404fsnan_out_w_dn: 3405 andi.w &0x0007,%d1 3406 bsr.l store_dreg_w # store result to regfile 3407 bra.w fsnan_exit 3408 3409fsnan_out_l: 3410 mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN 3411 bset &30,%d0 # set SNAN bit 3412 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3413 ble.b fsnan_out_l_dn # yes 3414 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3415 bsr.l _dmem_write_long # write the default result 3416 3417 tst.l %d1 # did dstore fail? 3418 bne.l facc_out_l # yes 3419 3420 bra.w fsnan_exit 3421fsnan_out_l_dn: 3422 andi.w &0x0007,%d1 3423 bsr.l store_dreg_l # store result to regfile 3424 bra.w fsnan_exit 3425 3426fsnan_out_s: 3427 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3428 ble.b fsnan_out_d_dn # yes 3429 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3430 andi.l &0x80000000,%d0 # keep sign 3431 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit 3432 mov.l FP_SRC_HI(%a6),%d1 # load mantissa 3433 lsr.l &0x8,%d1 # shift mantissa for sgl 3434 or.l %d1,%d0 # create sgl SNAN 3435 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3436 bsr.l _dmem_write_long # write the default result 3437 3438 tst.l %d1 # did dstore fail? 3439 bne.l facc_out_l # yes 3440 3441 bra.w fsnan_exit 3442fsnan_out_d_dn: 3443 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3444 andi.l &0x80000000,%d0 # keep sign 3445 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit 3446 mov.l %d1,-(%sp) 3447 mov.l FP_SRC_HI(%a6),%d1 # load mantissa 3448 lsr.l &0x8,%d1 # shift mantissa for sgl 3449 or.l %d1,%d0 # create sgl SNAN 3450 mov.l (%sp)+,%d1 3451 andi.w &0x0007,%d1 3452 bsr.l store_dreg_l # store result to regfile 3453 bra.w fsnan_exit 3454 3455fsnan_out_d: 3456 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3457 andi.l &0x80000000,%d0 # keep sign 3458 ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit 3459 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa 3460 mov.l %d0,FP_SCR0_EX(%a6) # store to temp space 3461 mov.l &11,%d0 # load shift amt 3462 lsr.l %d0,%d1 3463 or.l %d1,FP_SCR0_EX(%a6) # create dbl hi 3464 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa 3465 andi.l &0x000007ff,%d1 3466 ror.l %d0,%d1 3467 mov.l %d1,FP_SCR0_HI(%a6) # store to temp space 3468 mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa 3469 lsr.l %d0,%d1 3470 or.l %d1,FP_SCR0_HI(%a6) # create dbl lo 3471 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 3472 mov.l EXC_EA(%a6),%a1 # pass: dst addr 3473 movq.l &0x8,%d0 # pass: size of 8 bytes 3474 bsr.l _dmem_write # write the default result 3475 3476 tst.l %d1 # did dstore fail? 3477 bne.l facc_out_d # yes 3478 3479 bra.w fsnan_exit 3480 3481# for extended precision, if the addressing mode is pre-decrement or 3482# post-increment, then the address register did not get updated. 3483# in addition, for pre-decrement, the stacked <ea> is incorrect. 3484fsnan_out_x: 3485 clr.b SPCOND_FLG(%a6) # clear special case flag 3486 3487 mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6) 3488 clr.w 2+FP_SCR0(%a6) 3489 mov.l FP_SRC_HI(%a6),%d0 3490 bset &30,%d0 3491 mov.l %d0,FP_SCR0_HI(%a6) 3492 mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6) 3493 3494 btst &0x5,EXC_SR(%a6) # supervisor mode exception? 3495 bne.b fsnan_out_x_s # yes 3496 3497 mov.l %usp,%a0 # fetch user stack pointer 3498 mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea() 3499 mov.l (%a6),EXC_A6(%a6) 3500 3501 bsr.l _calc_ea_fout # find the correct ea,update An 3502 mov.l %a0,%a1 3503 mov.l %a0,EXC_EA(%a6) # stack correct <ea> 3504 3505 mov.l EXC_A7(%a6),%a0 3506 mov.l %a0,%usp # restore user stack pointer 3507 mov.l EXC_A6(%a6),(%a6) 3508 3509fsnan_out_x_save: 3510 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 3511 movq.l &0xc,%d0 # pass: size of extended 3512 bsr.l _dmem_write # write the default result 3513 3514 tst.l %d1 # did dstore fail? 3515 bne.l facc_out_x # yes 3516 3517 bra.w fsnan_exit 3518 3519fsnan_out_x_s: 3520 mov.l (%a6),EXC_A6(%a6) 3521 3522 bsr.l _calc_ea_fout # find the correct ea,update An 3523 mov.l %a0,%a1 3524 mov.l %a0,EXC_EA(%a6) # stack correct <ea> 3525 3526 mov.l EXC_A6(%a6),(%a6) 3527 3528 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)? 3529 bne.b fsnan_out_x_save # no 3530 3531# the operation was "fmove.x SNAN,-(a7)" from supervisor mode. 3532 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3533 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3534 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3535 3536 frestore FP_SRC(%a6) 3537 3538 mov.l EXC_A6(%a6),%a6 # restore frame pointer 3539 3540 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 3541 mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp) 3542 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 3543 3544 mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp) 3545 mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp) 3546 mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp) 3547 3548 add.l &LOCAL_SIZE-0x8,%sp 3549 3550 bra.l _real_snan 3551 3552######################################################################### 3553# XDEF **************************************************************** # 3554# _fpsp_inex(): 060FPSP entry point for FP Inexact exception. # 3555# # 3556# This handler should be the first code executed upon taking the # 3557# FP Inexact exception in an operating system. # 3558# # 3559# XREF **************************************************************** # 3560# _imem_read_long() - read instruction longword # 3561# fix_skewed_ops() - adjust src operand in fsave frame # 3562# set_tag_x() - determine optype of src/dst operands # 3563# store_fpreg() - store opclass 0 or 2 result to FP regfile # 3564# unnorm_fix() - change UNNORM operands to NORM or ZERO # 3565# load_fpn2() - load dst operand from FP regfile # 3566# smovcr() - emulate an "fmovcr" instruction # 3567# fout() - emulate an opclass 3 instruction # 3568# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 3569# _real_inex() - "callout" to operating system inexact handler # 3570# # 3571# INPUT *************************************************************** # 3572# - The system stack contains the FP Inexact exception frame # 3573# - The fsave frame contains the source operand # 3574# # 3575# OUTPUT ************************************************************** # 3576# - The system stack is unchanged # 3577# - The fsave frame contains the adjusted src op for opclass 0,2 # 3578# # 3579# ALGORITHM *********************************************************** # 3580# In a system where the FP Inexact exception is enabled, the goal # 3581# is to get to the handler specified at _real_inex(). But, on the 060, # 3582# for opclass zero and two instruction taking this exception, the # 3583# hardware doesn't store the correct result to the destination FP # 3584# register as did the '040 and '881/2. This handler must emulate the # 3585# instruction in order to get this value and then store it to the # 3586# correct register before calling _real_inex(). # 3587# For opclass 3 instructions, the 060 doesn't store the default # 3588# inexact result out to memory or data register file as it should. # 3589# This code must emulate the move out by calling fout() before finally # 3590# exiting through _real_inex(). # 3591# # 3592######################################################################### 3593 3594 global _fpsp_inex 3595_fpsp_inex: 3596 3597 link.w %a6,&-LOCAL_SIZE # init stack frame 3598 3599 fsave FP_SRC(%a6) # grab the "busy" frame 3600 3601 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3602 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3603 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3604 3605# the FPIAR holds the "current PC" of the faulting instruction 3606 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3607 3608 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3609 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3610 bsr.l _imem_read_long # fetch the instruction words 3611 mov.l %d0,EXC_OPWORD(%a6) 3612 3613############################################################################## 3614 3615 btst &13,%d0 # is instr an fmove out? 3616 bne.w finex_out # fmove out 3617 3618 3619# the hardware, for "fabs" and "fneg" w/ a long source format, puts the 3620# longword integer directly into the upper longword of the mantissa along 3621# w/ an exponent value of 0x401e. we convert this to extended precision here. 3622 bfextu %d0{&19:&3},%d0 # fetch instr size 3623 bne.b finex_cont # instr size is not long 3624 cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e? 3625 bne.b finex_cont # no 3626 fmov.l &0x0,%fpcr 3627 fmov.l FP_SRC_HI(%a6),%fp0 # load integer src 3628 fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision 3629 mov.w &0xe001,0x2+FP_SRC(%a6) 3630 3631finex_cont: 3632 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3633 bsr.l fix_skewed_ops # fix src op 3634 3635# Here, we zero the ccode and exception byte field since we're going to 3636# emulate the whole instruction. Notice, though, that we don't kill the 3637# INEX1 bit. This is because a packed op has long since been converted 3638# to extended before arriving here. Therefore, we need to retain the 3639# INEX1 bit from when the operand was first converted. 3640 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field 3641 3642 fmov.l &0x0,%fpcr # zero current control regs 3643 fmov.l &0x0,%fpsr 3644 3645 bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg 3646 cmpi.b %d1,&0x17 # is op an fmovecr? 3647 beq.w finex_fmovcr # yes 3648 3649 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3650 bsr.l set_tag_x # tag the operand type 3651 mov.b %d0,STAG(%a6) # maybe NORM,DENORM 3652 3653# bits four and five of the fp extension word separate the monadic and dyadic 3654# operations that can pass through fpsp_inex(). remember that fcmp and ftst 3655# will never take this exception, but fsincos will. 3656 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 3657 beq.b finex_extract # monadic 3658 3659 btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos? 3660 bne.b finex_extract # yes 3661 3662 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 3663 bsr.l load_fpn2 # load dst into FP_DST 3664 3665 lea FP_DST(%a6),%a0 # pass: ptr to dst op 3666 bsr.l set_tag_x # tag the operand type 3667 cmpi.b %d0,&UNNORM # is operand an UNNORM? 3668 bne.b finex_op2_done # no 3669 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 3670finex_op2_done: 3671 mov.b %d0,DTAG(%a6) # save dst optype tag 3672 3673finex_extract: 3674 clr.l %d0 3675 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 3676 3677 mov.b 1+EXC_CMDREG(%a6),%d1 3678 andi.w &0x007f,%d1 # extract extension 3679 3680 lea FP_SRC(%a6),%a0 3681 lea FP_DST(%a6),%a1 3682 3683 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 3684 jsr (tbl_unsupp.l,%pc,%d1.l*1) 3685 3686# the operation has been emulated. the result is in fp0. 3687finex_save: 3688 bfextu EXC_CMDREG(%a6){&6:&3},%d0 3689 bsr.l store_fpreg 3690 3691finex_exit: 3692 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3693 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3694 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3695 3696 frestore FP_SRC(%a6) 3697 3698 unlk %a6 3699 bra.l _real_inex 3700 3701finex_fmovcr: 3702 clr.l %d0 3703 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode 3704 mov.b 1+EXC_CMDREG(%a6),%d1 3705 andi.l &0x0000007f,%d1 # pass rom offset 3706 bsr.l smovcr 3707 bra.b finex_save 3708 3709######################################################################## 3710 3711# 3712# the hardware does not save the default result to memory on enabled 3713# inexact exceptions. we do this here before passing control to 3714# the user inexact handler. 3715# 3716# byte, word, and long destination format operations can pass 3717# through here. so can double and single precision. 3718# although packed opclass three operations can take inexact 3719# exceptions, they won't pass through here since they are caught 3720# first by the unsupported data format exception handler. that handler 3721# sends them directly to _real_inex() if necessary. 3722# 3723finex_out: 3724 3725 mov.b &NORM,STAG(%a6) # src is a NORM 3726 3727 clr.l %d0 3728 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode 3729 3730 andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 3731 3732 lea FP_SRC(%a6),%a0 # pass ptr to src operand 3733 3734 bsr.l fout # store the default result 3735 3736 bra.b finex_exit 3737 3738######################################################################### 3739# XDEF **************************************************************** # 3740# _fpsp_dz(): 060FPSP entry point for FP DZ exception. # 3741# # 3742# This handler should be the first code executed upon taking # 3743# the FP DZ exception in an operating system. # 3744# # 3745# XREF **************************************************************** # 3746# _imem_read_long() - read instruction longword from memory # 3747# fix_skewed_ops() - adjust fsave operand # 3748# _real_dz() - "callout" exit point from FP DZ handler # 3749# # 3750# INPUT *************************************************************** # 3751# - The system stack contains the FP DZ exception stack. # 3752# - The fsave frame contains the source operand. # 3753# # 3754# OUTPUT ************************************************************** # 3755# - The system stack contains the FP DZ exception stack. # 3756# - The fsave frame contains the adjusted source operand. # 3757# # 3758# ALGORITHM *********************************************************** # 3759# In a system where the DZ exception is enabled, the goal is to # 3760# get to the handler specified at _real_dz(). But, on the 060, when the # 3761# exception is taken, the input operand in the fsave state frame may # 3762# be incorrect for some cases and need to be adjusted. So, this package # 3763# adjusts the operand using fix_skewed_ops() and then branches to # 3764# _real_dz(). # 3765# # 3766######################################################################### 3767 3768 global _fpsp_dz 3769_fpsp_dz: 3770 3771 link.w %a6,&-LOCAL_SIZE # init stack frame 3772 3773 fsave FP_SRC(%a6) # grab the "busy" frame 3774 3775 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3776 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3777 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3778 3779# the FPIAR holds the "current PC" of the faulting instruction 3780 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3781 3782 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3783 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3784 bsr.l _imem_read_long # fetch the instruction words 3785 mov.l %d0,EXC_OPWORD(%a6) 3786 3787############################################################################## 3788 3789 3790# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3791# this would be the case for opclass two operations with a source zero 3792# in the sgl or dbl format. 3793 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3794 bsr.l fix_skewed_ops # fix src op 3795 3796fdz_exit: 3797 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3798 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3799 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3800 3801 frestore FP_SRC(%a6) 3802 3803 unlk %a6 3804 bra.l _real_dz 3805 3806######################################################################### 3807# XDEF **************************************************************** # 3808# _fpsp_fline(): 060FPSP entry point for "Line F emulator" exc. # 3809# # 3810# This handler should be the first code executed upon taking the # 3811# "Line F Emulator" exception in an operating system. # 3812# # 3813# XREF **************************************************************** # 3814# _fpsp_unimp() - handle "FP Unimplemented" exceptions # 3815# _real_fpu_disabled() - handle "FPU disabled" exceptions # 3816# _real_fline() - handle "FLINE" exceptions # 3817# _imem_read_long() - read instruction longword # 3818# # 3819# INPUT *************************************************************** # 3820# - The system stack contains a "Line F Emulator" exception # 3821# stack frame. # 3822# # 3823# OUTPUT ************************************************************** # 3824# - The system stack is unchanged # 3825# # 3826# ALGORITHM *********************************************************** # 3827# When a "Line F Emulator" exception occurs, there are 3 possible # 3828# exception types, denoted by the exception stack frame format number: # 3829# (1) FPU unimplemented instruction (6 word stack frame) # 3830# (2) FPU disabled (8 word stack frame) # 3831# (3) Line F (4 word stack frame) # 3832# # 3833# This module determines which and forks the flow off to the # 3834# appropriate "callout" (for "disabled" and "Line F") or to the # 3835# correct emulation code (for "FPU unimplemented"). # 3836# This code also must check for "fmovecr" instructions w/ a # 3837# non-zero <ea> field. These may get flagged as "Line F" but should # 3838# really be flagged as "FPU Unimplemented". (This is a "feature" on # 3839# the '060. # 3840# # 3841######################################################################### 3842 3843 global _fpsp_fline 3844_fpsp_fline: 3845 3846# check to see if this exception is a "FP Unimplemented Instruction" 3847# exception. if so, branch directly to that handler's entry point. 3848 cmpi.w 0x6(%sp),&0x202c 3849 beq.l _fpsp_unimp 3850 3851# check to see if the FPU is disabled. if so, jump to the OS entry 3852# point for that condition. 3853 cmpi.w 0x6(%sp),&0x402c 3854 beq.l _real_fpu_disabled 3855 3856# the exception was an "F-Line Illegal" exception. we check to see 3857# if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if 3858# so, convert the F-Line exception stack frame to an FP Unimplemented 3859# Instruction exception stack frame else branch to the OS entry 3860# point for the F-Line exception handler. 3861 link.w %a6,&-LOCAL_SIZE # init stack frame 3862 3863 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3864 3865 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) 3866 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3867 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3868 bsr.l _imem_read_long # fetch instruction words 3869 3870 bfextu %d0{&0:&10},%d1 # is it an fmovecr? 3871 cmpi.w %d1,&0x03c8 3872 bne.b fline_fline # no 3873 3874 bfextu %d0{&16:&6},%d1 # is it an fmovecr? 3875 cmpi.b %d1,&0x17 3876 bne.b fline_fline # no 3877 3878# it's an fmovecr w/ a non-zero <ea> that has entered through 3879# the F-Line Illegal exception. 3880# so, we need to convert the F-Line exception stack frame into an 3881# FP Unimplemented Instruction stack frame and jump to that entry 3882# point. 3883# 3884# but, if the FPU is disabled, then we need to jump to the FPU disabled 3885# entry point. 3886 movc %pcr,%d0 3887 btst &0x1,%d0 3888 beq.b fline_fmovcr 3889 3890 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3891 3892 unlk %a6 3893 3894 sub.l &0x8,%sp # make room for "Next PC", <ea> 3895 mov.w 0x8(%sp),(%sp) 3896 mov.l 0xa(%sp),0x2(%sp) # move "Current PC" 3897 mov.w &0x402c,0x6(%sp) 3898 mov.l 0x2(%sp),0xc(%sp) 3899 addq.l &0x4,0x2(%sp) # set "Next PC" 3900 3901 bra.l _real_fpu_disabled 3902 3903fline_fmovcr: 3904 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3905 3906 unlk %a6 3907 3908 fmov.l 0x2(%sp),%fpiar # set current PC 3909 addq.l &0x4,0x2(%sp) # set Next PC 3910 3911 mov.l (%sp),-(%sp) 3912 mov.l 0x8(%sp),0x4(%sp) 3913 mov.b &0x20,0x6(%sp) 3914 3915 bra.l _fpsp_unimp 3916 3917fline_fline: 3918 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3919 3920 unlk %a6 3921 3922 bra.l _real_fline 3923 3924######################################################################### 3925# XDEF **************************************************************** # 3926# _fpsp_unimp(): 060FPSP entry point for FP "Unimplemented # 3927# Instruction" exception. # 3928# # 3929# This handler should be the first code executed upon taking the # 3930# FP Unimplemented Instruction exception in an operating system. # 3931# # 3932# XREF **************************************************************** # 3933# _imem_read_{word,long}() - read instruction word/longword # 3934# load_fop() - load src/dst ops from memory and/or FP regfile # 3935# store_fpreg() - store opclass 0 or 2 result to FP regfile # 3936# tbl_trans - addr of table of emulation routines for trnscndls # 3937# _real_access() - "callout" for access error exception # 3938# _fpsp_done() - "callout" for exit; work all done # 3939# _real_trace() - "callout" for Trace enabled exception # 3940# smovcr() - emulate "fmovecr" instruction # 3941# funimp_skew() - adjust fsave src ops to "incorrect" value # 3942# _ftrapcc() - emulate an "ftrapcc" instruction # 3943# _fdbcc() - emulate an "fdbcc" instruction # 3944# _fscc() - emulate an "fscc" instruction # 3945# _real_trap() - "callout" for Trap exception # 3946# _real_bsun() - "callout" for enabled Bsun exception # 3947# # 3948# INPUT *************************************************************** # 3949# - The system stack contains the "Unimplemented Instr" stk frame # 3950# # 3951# OUTPUT ************************************************************** # 3952# If access error: # 3953# - The system stack is changed to an access error stack frame # 3954# If Trace exception enabled: # 3955# - The system stack is changed to a Trace exception stack frame # 3956# Else: (normal case) # 3957# - Correct result has been stored as appropriate # 3958# # 3959# ALGORITHM *********************************************************** # 3960# There are two main cases of instructions that may enter here to # 3961# be emulated: (1) the FPgen instructions, most of which were also # 3962# unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc". # 3963# For the first set, this handler calls the routine load_fop() # 3964# to load the source and destination (for dyadic) operands to be used # 3965# for instruction emulation. The correct emulation routine is then # 3966# chosen by decoding the instruction type and indexing into an # 3967# emulation subroutine index table. After emulation returns, this # 3968# handler checks to see if an exception should occur as a result of the # 3969# FP instruction emulation. If so, then an FP exception of the correct # 3970# type is inserted into the FPU state frame using the "frestore" # 3971# instruction before exiting through _fpsp_done(). In either the # 3972# exceptional or non-exceptional cases, we must check to see if the # 3973# Trace exception is enabled. If so, then we must create a Trace # 3974# exception frame from the current exception frame and exit through # 3975# _real_trace(). # 3976# For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines # 3977# _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three # 3978# may flag that a BSUN exception should be taken. If so, then the # 3979# current exception stack frame is converted into a BSUN exception # 3980# stack frame and an exit is made through _real_bsun(). If the # 3981# instruction was "ftrapcc" and a Trap exception should result, a Trap # 3982# exception stack frame is created from the current frame and an exit # 3983# is made through _real_trap(). If a Trace exception is pending, then # 3984# a Trace exception frame is created from the current frame and a jump # 3985# is made to _real_trace(). Finally, if none of these conditions exist, # 3986# then the handler exits though the callout _fpsp_done(). # 3987# # 3988# In any of the above scenarios, if a _mem_read() or _mem_write() # 3989# "callout" returns a failing value, then an access error stack frame # 3990# is created from the current stack frame and an exit is made through # 3991# _real_access(). # 3992# # 3993######################################################################### 3994 3995# 3996# FP UNIMPLEMENTED INSTRUCTION STACK FRAME: 3997# 3998# ***************** 3999# * * => <ea> of fp unimp instr. 4000# - EA - 4001# * * 4002# ***************** 4003# * 0x2 * 0x02c * => frame format and vector offset(vector #11) 4004# ***************** 4005# * * 4006# - Next PC - => PC of instr to execute after exc handling 4007# * * 4008# ***************** 4009# * SR * => SR at the time the exception was taken 4010# ***************** 4011# 4012# Note: the !NULL bit does not get set in the fsave frame when the 4013# machine encounters an fp unimp exception. Therefore, it must be set 4014# before leaving this handler. 4015# 4016 global _fpsp_unimp 4017_fpsp_unimp: 4018 4019 link.w %a6,&-LOCAL_SIZE # init stack frame 4020 4021 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 4022 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 4023 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 4024 4025 btst &0x5,EXC_SR(%a6) # user mode exception? 4026 bne.b funimp_s # no; supervisor mode 4027 4028# save the value of the user stack pointer onto the stack frame 4029funimp_u: 4030 mov.l %usp,%a0 # fetch user stack pointer 4031 mov.l %a0,EXC_A7(%a6) # store in stack frame 4032 bra.b funimp_cont 4033 4034# store the value of the supervisor stack pointer BEFORE the exc occurred. 4035# old_sp is address just above stacked effective address. 4036funimp_s: 4037 lea 4+EXC_EA(%a6),%a0 # load old a7' 4038 mov.l %a0,EXC_A7(%a6) # store a7' 4039 mov.l %a0,OLD_A7(%a6) # make a copy 4040 4041funimp_cont: 4042 4043# the FPIAR holds the "current PC" of the faulting instruction. 4044 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 4045 4046 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4047 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 4048 bsr.l _imem_read_long # fetch the instruction words 4049 mov.l %d0,EXC_OPWORD(%a6) 4050 4051############################################################################ 4052 4053 fmov.l &0x0,%fpcr # clear FPCR 4054 fmov.l &0x0,%fpsr # clear FPSR 4055 4056 clr.b SPCOND_FLG(%a6) # clear "special case" flag 4057 4058# Divide the fp instructions into 8 types based on the TYPE field in 4059# bits 6-8 of the opword(classes 6,7 are undefined). 4060# (for the '060, only two types can take this exception) 4061# bftst %d0{&7:&3} # test TYPE 4062 btst &22,%d0 # type 0 or 1 ? 4063 bne.w funimp_misc # type 1 4064 4065######################################### 4066# TYPE == 0: General instructions # 4067######################################### 4068funimp_gen: 4069 4070 clr.b STORE_FLG(%a6) # clear "store result" flag 4071 4072# clear the ccode byte and exception status byte 4073 andi.l &0x00ff00ff,USER_FPSR(%a6) 4074 4075 bfextu %d0{&16:&6},%d1 # extract upper 6 of cmdreg 4076 cmpi.b %d1,&0x17 # is op an fmovecr? 4077 beq.w funimp_fmovcr # yes 4078 4079funimp_gen_op: 4080 bsr.l _load_fop # load 4081 4082 clr.l %d0 4083 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode 4084 4085 mov.b 1+EXC_CMDREG(%a6),%d1 4086 andi.w &0x003f,%d1 # extract extension bits 4087 lsl.w &0x3,%d1 # shift right 3 bits 4088 or.b STAG(%a6),%d1 # insert src optag bits 4089 4090 lea FP_DST(%a6),%a1 # pass dst ptr in a1 4091 lea FP_SRC(%a6),%a0 # pass src ptr in a0 4092 4093 mov.w (tbl_trans.w,%pc,%d1.w*2),%d1 4094 jsr (tbl_trans.w,%pc,%d1.w*1) # emulate 4095 4096funimp_fsave: 4097 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 4098 bne.w funimp_ena # some are enabled 4099 4100funimp_store: 4101 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn 4102 bsr.l store_fpreg # store result to fp regfile 4103 4104funimp_gen_exit: 4105 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 4106 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 4107 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4108 4109funimp_gen_exit_cmp: 4110 cmpi.b SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ? 4111 beq.b funimp_gen_exit_a7 # yes 4112 4113 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ? 4114 beq.b funimp_gen_exit_a7 # yes 4115 4116funimp_gen_exit_cont: 4117 unlk %a6 4118 4119funimp_gen_exit_cont2: 4120 btst &0x7,(%sp) # is trace on? 4121 beq.l _fpsp_done # no 4122 4123# this catches a problem with the case where an exception will be re-inserted 4124# into the machine. the frestore has already been executed...so, the fmov.l 4125# alone of the control register would trigger an unwanted exception. 4126# until I feel like fixing this, we'll sidestep the exception. 4127 fsave -(%sp) 4128 fmov.l %fpiar,0x14(%sp) # "Current PC" is in FPIAR 4129 frestore (%sp)+ 4130 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x24 4131 bra.l _real_trace 4132 4133funimp_gen_exit_a7: 4134 btst &0x5,EXC_SR(%a6) # supervisor or user mode? 4135 bne.b funimp_gen_exit_a7_s # supervisor 4136 4137 mov.l %a0,-(%sp) 4138 mov.l EXC_A7(%a6),%a0 4139 mov.l %a0,%usp 4140 mov.l (%sp)+,%a0 4141 bra.b funimp_gen_exit_cont 4142 4143# if the instruction was executed from supervisor mode and the addressing 4144# mode was (a7)+, then the stack frame for the rte must be shifted "up" 4145# "n" bytes where "n" is the size of the src operand type. 4146# f<op>.{b,w,l,s,d,x,p} 4147funimp_gen_exit_a7_s: 4148 mov.l %d0,-(%sp) # save d0 4149 mov.l EXC_A7(%a6),%d0 # load new a7' 4150 sub.l OLD_A7(%a6),%d0 # subtract old a7' 4151 mov.l 0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame 4152 mov.l EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame 4153 mov.w %d0,EXC_SR(%a6) # store incr number 4154 mov.l (%sp)+,%d0 # restore d0 4155 4156 unlk %a6 4157 4158 add.w (%sp),%sp # stack frame shifted 4159 bra.b funimp_gen_exit_cont2 4160 4161###################### 4162# fmovecr.x #ccc,fpn # 4163###################### 4164funimp_fmovcr: 4165 clr.l %d0 4166 mov.b FPCR_MODE(%a6),%d0 4167 mov.b 1+EXC_CMDREG(%a6),%d1 4168 andi.l &0x0000007f,%d1 # pass rom offset in d1 4169 bsr.l smovcr 4170 bra.w funimp_fsave 4171 4172######################################################################### 4173 4174# 4175# the user has enabled some exceptions. we figure not to see this too 4176# often so that's why it gets lower priority. 4177# 4178funimp_ena: 4179 4180# was an exception set that was also enabled? 4181 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled and set 4182 bfffo %d0{&24:&8},%d0 # find highest priority exception 4183 bne.b funimp_exc # at least one was set 4184 4185# no exception that was enabled was set BUT if we got an exact overflow 4186# and overflow wasn't enabled but inexact was (yech!) then this is 4187# an inexact exception; otherwise, return to normal non-exception flow. 4188 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? 4189 beq.w funimp_store # no; return to normal flow 4190 4191# the overflow w/ exact result happened but was inexact set in the FPCR? 4192funimp_ovfl: 4193 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled? 4194 beq.w funimp_store # no; return to normal flow 4195 bra.b funimp_exc_ovfl # yes 4196 4197# some exception happened that was actually enabled. 4198# we'll insert this new exception into the FPU and then return. 4199funimp_exc: 4200 subi.l &24,%d0 # fix offset to be 0-8 4201 cmpi.b %d0,&0x6 # is exception INEX? 4202 bne.b funimp_exc_force # no 4203 4204# the enabled exception was inexact. so, if it occurs with an overflow 4205# or underflow that was disabled, then we have to force an overflow or 4206# underflow frame. the eventual overflow or underflow handler will see that 4207# it's actually an inexact and act appropriately. this is the only easy 4208# way to have the EXOP available for the enabled inexact handler when 4209# a disabled overflow or underflow has also happened. 4210 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? 4211 bne.b funimp_exc_ovfl # yes 4212 btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur? 4213 bne.b funimp_exc_unfl # yes 4214 4215# force the fsave exception status bits to signal an exception of the 4216# appropriate type. don't forget to "skew" the source operand in case we 4217# "unskewed" the one the hardware initially gave us. 4218funimp_exc_force: 4219 mov.l %d0,-(%sp) # save d0 4220 bsr.l funimp_skew # check for special case 4221 mov.l (%sp)+,%d0 # restore d0 4222 mov.w (tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) 4223 bra.b funimp_gen_exit2 # exit with frestore 4224 4225tbl_funimp_except: 4226 short 0xe002, 0xe006, 0xe004, 0xe005 4227 short 0xe003, 0xe002, 0xe001, 0xe001 4228 4229# insert an overflow frame 4230funimp_exc_ovfl: 4231 bsr.l funimp_skew # check for special case 4232 mov.w &0xe005,2+FP_SRC(%a6) 4233 bra.b funimp_gen_exit2 4234 4235# insert an underflow frame 4236funimp_exc_unfl: 4237 bsr.l funimp_skew # check for special case 4238 mov.w &0xe003,2+FP_SRC(%a6) 4239 4240# this is the general exit point for an enabled exception that will be 4241# restored into the machine for the instruction just emulated. 4242funimp_gen_exit2: 4243 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 4244 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 4245 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4246 4247 frestore FP_SRC(%a6) # insert exceptional status 4248 4249 bra.w funimp_gen_exit_cmp 4250 4251############################################################################ 4252 4253# 4254# TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc> 4255# 4256# These instructions were implemented on the '881/2 and '040 in hardware but 4257# are emulated in software on the '060. 4258# 4259funimp_misc: 4260 bfextu %d0{&10:&3},%d1 # extract mode field 4261 cmpi.b %d1,&0x1 # is it an fdb<cc>? 4262 beq.w funimp_fdbcc # yes 4263 cmpi.b %d1,&0x7 # is it an fs<cc>? 4264 bne.w funimp_fscc # yes 4265 bfextu %d0{&13:&3},%d1 4266 cmpi.b %d1,&0x2 # is it an fs<cc>? 4267 blt.w funimp_fscc # yes 4268 4269######################### 4270# ftrap<cc> # 4271# ftrap<cc>.w #<data> # 4272# ftrap<cc>.l #<data> # 4273######################### 4274funimp_ftrapcc: 4275 4276 bsr.l _ftrapcc # FTRAP<cc>() 4277 4278 cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring? 4279 beq.w funimp_bsun # yes 4280 4281 cmpi.b SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur? 4282 bne.w funimp_done # no 4283 4284# FP UNIMP FRAME TRAP FRAME 4285# ***************** ***************** 4286# ** <EA> ** ** Current PC ** 4287# ***************** ***************** 4288# * 0x2 * 0x02c * * 0x2 * 0x01c * 4289# ***************** ***************** 4290# ** Next PC ** ** Next PC ** 4291# ***************** ***************** 4292# * SR * * SR * 4293# ***************** ***************** 4294# (6 words) (6 words) 4295# 4296# the ftrapcc instruction should take a trap. so, here we must create a 4297# trap stack frame from an unimplemented fp instruction stack frame and 4298# jump to the user supplied entry point for the trap exception 4299funimp_ftrapcc_tp: 4300 mov.l USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC 4301 mov.w &0x201c,EXC_VOFF(%a6) # Vector Offset = 0x01c 4302 4303 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 4304 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 4305 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4306 4307 unlk %a6 4308 bra.l _real_trap 4309 4310######################### 4311# fdb<cc> Dn,<label> # 4312######################### 4313funimp_fdbcc: 4314 4315 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4316 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4317 bsr.l _imem_read_word # read displacement 4318 4319 tst.l %d1 # did ifetch fail? 4320 bne.w funimp_iacc # yes 4321 4322 ext.l %d0 # sign extend displacement 4323 4324 bsr.l _fdbcc # FDB<cc>() 4325 4326 cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring? 4327 beq.w funimp_bsun 4328 4329 bra.w funimp_done # branch to finish 4330 4331################# 4332# fs<cc>.b <ea> # 4333################# 4334funimp_fscc: 4335 4336 bsr.l _fscc # FS<cc>() 4337 4338# I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction 4339# does not need to update "An" before taking a bsun exception. 4340 cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring? 4341 beq.w funimp_bsun 4342 4343 btst &0x5,EXC_SR(%a6) # yes; is it a user mode exception? 4344 bne.b funimp_fscc_s # no 4345 4346funimp_fscc_u: 4347 mov.l EXC_A7(%a6),%a0 # yes; set new USP 4348 mov.l %a0,%usp 4349 bra.w funimp_done # branch to finish 4350 4351# remember, I'm assuming that post-increment is bogus...(it IS!!!) 4352# so, the least significant WORD of the stacked effective address got 4353# overwritten by the "fs<cc> -(An)". We must shift the stack frame "down" 4354# so that the rte will work correctly without destroying the result. 4355# even though the operation size is byte, the stack ptr is decr by 2. 4356# 4357# remember, also, this instruction may be traced. 4358funimp_fscc_s: 4359 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was a7 modified? 4360 bne.w funimp_done # no 4361 4362 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 4363 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 4364 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4365 4366 unlk %a6 4367 4368 btst &0x7,(%sp) # is trace enabled? 4369 bne.b funimp_fscc_s_trace # yes 4370 4371 subq.l &0x2,%sp 4372 mov.l 0x2(%sp),(%sp) # shift SR,hi(PC) "down" 4373 mov.l 0x6(%sp),0x4(%sp) # shift lo(PC),voff "down" 4374 bra.l _fpsp_done 4375 4376funimp_fscc_s_trace: 4377 subq.l &0x2,%sp 4378 mov.l 0x2(%sp),(%sp) # shift SR,hi(PC) "down" 4379 mov.w 0x6(%sp),0x4(%sp) # shift lo(PC) 4380 mov.w &0x2024,0x6(%sp) # fmt/voff = $2024 4381 fmov.l %fpiar,0x8(%sp) # insert "current PC" 4382 4383 bra.l _real_trace 4384 4385# 4386# The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert 4387# the fp unimplemented instruction exception stack frame into a bsun stack frame, 4388# restore a bsun exception into the machine, and branch to the user 4389# supplied bsun hook. 4390# 4391# FP UNIMP FRAME BSUN FRAME 4392# ***************** ***************** 4393# ** <EA> ** * 0x0 * 0x0c0 * 4394# ***************** ***************** 4395# * 0x2 * 0x02c * ** Current PC ** 4396# ***************** ***************** 4397# ** Next PC ** * SR * 4398# ***************** ***************** 4399# * SR * (4 words) 4400# ***************** 4401# (6 words) 4402# 4403funimp_bsun: 4404 mov.w &0x00c0,2+EXC_EA(%a6) # Fmt = 0x0; Vector Offset = 0x0c0 4405 mov.l USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC 4406 mov.w EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up" 4407 4408 mov.w &0xe000,2+FP_SRC(%a6) # bsun exception enabled 4409 4410 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 4411 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 4412 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4413 4414 frestore FP_SRC(%a6) # restore bsun exception 4415 4416 unlk %a6 4417 4418 addq.l &0x4,%sp # erase sludge 4419 4420 bra.l _real_bsun # branch to user bsun hook 4421 4422# 4423# all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame 4424# and return. 4425# 4426# as usual, we have to check for trace mode being on here. since instructions 4427# modifying the supervisor stack frame don't pass through here, this is a 4428# relatively easy task. 4429# 4430funimp_done: 4431 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 4432 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 4433 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4434 4435 unlk %a6 4436 4437 btst &0x7,(%sp) # is trace enabled? 4438 bne.b funimp_trace # yes 4439 4440 bra.l _fpsp_done 4441 4442# FP UNIMP FRAME TRACE FRAME 4443# ***************** ***************** 4444# ** <EA> ** ** Current PC ** 4445# ***************** ***************** 4446# * 0x2 * 0x02c * * 0x2 * 0x024 * 4447# ***************** ***************** 4448# ** Next PC ** ** Next PC ** 4449# ***************** ***************** 4450# * SR * * SR * 4451# ***************** ***************** 4452# (6 words) (6 words) 4453# 4454# the fscc instruction should take a trace trap. so, here we must create a 4455# trace stack frame from an unimplemented fp instruction stack frame and 4456# jump to the user supplied entry point for the trace exception 4457funimp_trace: 4458 fmov.l %fpiar,0x8(%sp) # current PC is in fpiar 4459 mov.b &0x24,0x7(%sp) # vector offset = 0x024 4460 4461 bra.l _real_trace 4462 4463################################################################ 4464 4465 global tbl_trans 4466 swbeg &0x1c0 4467tbl_trans: 4468 short tbl_trans - tbl_trans # $00-0 fmovecr all 4469 short tbl_trans - tbl_trans # $00-1 fmovecr all 4470 short tbl_trans - tbl_trans # $00-2 fmovecr all 4471 short tbl_trans - tbl_trans # $00-3 fmovecr all 4472 short tbl_trans - tbl_trans # $00-4 fmovecr all 4473 short tbl_trans - tbl_trans # $00-5 fmovecr all 4474 short tbl_trans - tbl_trans # $00-6 fmovecr all 4475 short tbl_trans - tbl_trans # $00-7 fmovecr all 4476 4477 short tbl_trans - tbl_trans # $01-0 fint norm 4478 short tbl_trans - tbl_trans # $01-1 fint zero 4479 short tbl_trans - tbl_trans # $01-2 fint inf 4480 short tbl_trans - tbl_trans # $01-3 fint qnan 4481 short tbl_trans - tbl_trans # $01-5 fint denorm 4482 short tbl_trans - tbl_trans # $01-4 fint snan 4483 short tbl_trans - tbl_trans # $01-6 fint unnorm 4484 short tbl_trans - tbl_trans # $01-7 ERROR 4485 4486 short ssinh - tbl_trans # $02-0 fsinh norm 4487 short src_zero - tbl_trans # $02-1 fsinh zero 4488 short src_inf - tbl_trans # $02-2 fsinh inf 4489 short src_qnan - tbl_trans # $02-3 fsinh qnan 4490 short ssinhd - tbl_trans # $02-5 fsinh denorm 4491 short src_snan - tbl_trans # $02-4 fsinh snan 4492 short tbl_trans - tbl_trans # $02-6 fsinh unnorm 4493 short tbl_trans - tbl_trans # $02-7 ERROR 4494 4495 short tbl_trans - tbl_trans # $03-0 fintrz norm 4496 short tbl_trans - tbl_trans # $03-1 fintrz zero 4497 short tbl_trans - tbl_trans # $03-2 fintrz inf 4498 short tbl_trans - tbl_trans # $03-3 fintrz qnan 4499 short tbl_trans - tbl_trans # $03-5 fintrz denorm 4500 short tbl_trans - tbl_trans # $03-4 fintrz snan 4501 short tbl_trans - tbl_trans # $03-6 fintrz unnorm 4502 short tbl_trans - tbl_trans # $03-7 ERROR 4503 4504 short tbl_trans - tbl_trans # $04-0 fsqrt norm 4505 short tbl_trans - tbl_trans # $04-1 fsqrt zero 4506 short tbl_trans - tbl_trans # $04-2 fsqrt inf 4507 short tbl_trans - tbl_trans # $04-3 fsqrt qnan 4508 short tbl_trans - tbl_trans # $04-5 fsqrt denorm 4509 short tbl_trans - tbl_trans # $04-4 fsqrt snan 4510 short tbl_trans - tbl_trans # $04-6 fsqrt unnorm 4511 short tbl_trans - tbl_trans # $04-7 ERROR 4512 4513 short tbl_trans - tbl_trans # $05-0 ERROR 4514 short tbl_trans - tbl_trans # $05-1 ERROR 4515 short tbl_trans - tbl_trans # $05-2 ERROR 4516 short tbl_trans - tbl_trans # $05-3 ERROR 4517 short tbl_trans - tbl_trans # $05-4 ERROR 4518 short tbl_trans - tbl_trans # $05-5 ERROR 4519 short tbl_trans - tbl_trans # $05-6 ERROR 4520 short tbl_trans - tbl_trans # $05-7 ERROR 4521 4522 short slognp1 - tbl_trans # $06-0 flognp1 norm 4523 short src_zero - tbl_trans # $06-1 flognp1 zero 4524 short sopr_inf - tbl_trans # $06-2 flognp1 inf 4525 short src_qnan - tbl_trans # $06-3 flognp1 qnan 4526 short slognp1d - tbl_trans # $06-5 flognp1 denorm 4527 short src_snan - tbl_trans # $06-4 flognp1 snan 4528 short tbl_trans - tbl_trans # $06-6 flognp1 unnorm 4529 short tbl_trans - tbl_trans # $06-7 ERROR 4530 4531 short tbl_trans - tbl_trans # $07-0 ERROR 4532 short tbl_trans - tbl_trans # $07-1 ERROR 4533 short tbl_trans - tbl_trans # $07-2 ERROR 4534 short tbl_trans - tbl_trans # $07-3 ERROR 4535 short tbl_trans - tbl_trans # $07-4 ERROR 4536 short tbl_trans - tbl_trans # $07-5 ERROR 4537 short tbl_trans - tbl_trans # $07-6 ERROR 4538 short tbl_trans - tbl_trans # $07-7 ERROR 4539 4540 short setoxm1 - tbl_trans # $08-0 fetoxm1 norm 4541 short src_zero - tbl_trans # $08-1 fetoxm1 zero 4542 short setoxm1i - tbl_trans # $08-2 fetoxm1 inf 4543 short src_qnan - tbl_trans # $08-3 fetoxm1 qnan 4544 short setoxm1d - tbl_trans # $08-5 fetoxm1 denorm 4545 short src_snan - tbl_trans # $08-4 fetoxm1 snan 4546 short tbl_trans - tbl_trans # $08-6 fetoxm1 unnorm 4547 short tbl_trans - tbl_trans # $08-7 ERROR 4548 4549 short stanh - tbl_trans # $09-0 ftanh norm 4550 short src_zero - tbl_trans # $09-1 ftanh zero 4551 short src_one - tbl_trans # $09-2 ftanh inf 4552 short src_qnan - tbl_trans # $09-3 ftanh qnan 4553 short stanhd - tbl_trans # $09-5 ftanh denorm 4554 short src_snan - tbl_trans # $09-4 ftanh snan 4555 short tbl_trans - tbl_trans # $09-6 ftanh unnorm 4556 short tbl_trans - tbl_trans # $09-7 ERROR 4557 4558 short satan - tbl_trans # $0a-0 fatan norm 4559 short src_zero - tbl_trans # $0a-1 fatan zero 4560 short spi_2 - tbl_trans # $0a-2 fatan inf 4561 short src_qnan - tbl_trans # $0a-3 fatan qnan 4562 short satand - tbl_trans # $0a-5 fatan denorm 4563 short src_snan - tbl_trans # $0a-4 fatan snan 4564 short tbl_trans - tbl_trans # $0a-6 fatan unnorm 4565 short tbl_trans - tbl_trans # $0a-7 ERROR 4566 4567 short tbl_trans - tbl_trans # $0b-0 ERROR 4568 short tbl_trans - tbl_trans # $0b-1 ERROR 4569 short tbl_trans - tbl_trans # $0b-2 ERROR 4570 short tbl_trans - tbl_trans # $0b-3 ERROR 4571 short tbl_trans - tbl_trans # $0b-4 ERROR 4572 short tbl_trans - tbl_trans # $0b-5 ERROR 4573 short tbl_trans - tbl_trans # $0b-6 ERROR 4574 short tbl_trans - tbl_trans # $0b-7 ERROR 4575 4576 short sasin - tbl_trans # $0c-0 fasin norm 4577 short src_zero - tbl_trans # $0c-1 fasin zero 4578 short t_operr - tbl_trans # $0c-2 fasin inf 4579 short src_qnan - tbl_trans # $0c-3 fasin qnan 4580 short sasind - tbl_trans # $0c-5 fasin denorm 4581 short src_snan - tbl_trans # $0c-4 fasin snan 4582 short tbl_trans - tbl_trans # $0c-6 fasin unnorm 4583 short tbl_trans - tbl_trans # $0c-7 ERROR 4584 4585 short satanh - tbl_trans # $0d-0 fatanh norm 4586 short src_zero - tbl_trans # $0d-1 fatanh zero 4587 short t_operr - tbl_trans # $0d-2 fatanh inf 4588 short src_qnan - tbl_trans # $0d-3 fatanh qnan 4589 short satanhd - tbl_trans # $0d-5 fatanh denorm 4590 short src_snan - tbl_trans # $0d-4 fatanh snan 4591 short tbl_trans - tbl_trans # $0d-6 fatanh unnorm 4592 short tbl_trans - tbl_trans # $0d-7 ERROR 4593 4594 short ssin - tbl_trans # $0e-0 fsin norm 4595 short src_zero - tbl_trans # $0e-1 fsin zero 4596 short t_operr - tbl_trans # $0e-2 fsin inf 4597 short src_qnan - tbl_trans # $0e-3 fsin qnan 4598 short ssind - tbl_trans # $0e-5 fsin denorm 4599 short src_snan - tbl_trans # $0e-4 fsin snan 4600 short tbl_trans - tbl_trans # $0e-6 fsin unnorm 4601 short tbl_trans - tbl_trans # $0e-7 ERROR 4602 4603 short stan - tbl_trans # $0f-0 ftan norm 4604 short src_zero - tbl_trans # $0f-1 ftan zero 4605 short t_operr - tbl_trans # $0f-2 ftan inf 4606 short src_qnan - tbl_trans # $0f-3 ftan qnan 4607 short stand - tbl_trans # $0f-5 ftan denorm 4608 short src_snan - tbl_trans # $0f-4 ftan snan 4609 short tbl_trans - tbl_trans # $0f-6 ftan unnorm 4610 short tbl_trans - tbl_trans # $0f-7 ERROR 4611 4612 short setox - tbl_trans # $10-0 fetox norm 4613 short ld_pone - tbl_trans # $10-1 fetox zero 4614 short szr_inf - tbl_trans # $10-2 fetox inf 4615 short src_qnan - tbl_trans # $10-3 fetox qnan 4616 short setoxd - tbl_trans # $10-5 fetox denorm 4617 short src_snan - tbl_trans # $10-4 fetox snan 4618 short tbl_trans - tbl_trans # $10-6 fetox unnorm 4619 short tbl_trans - tbl_trans # $10-7 ERROR 4620 4621 short stwotox - tbl_trans # $11-0 ftwotox norm 4622 short ld_pone - tbl_trans # $11-1 ftwotox zero 4623 short szr_inf - tbl_trans # $11-2 ftwotox inf 4624 short src_qnan - tbl_trans # $11-3 ftwotox qnan 4625 short stwotoxd - tbl_trans # $11-5 ftwotox denorm 4626 short src_snan - tbl_trans # $11-4 ftwotox snan 4627 short tbl_trans - tbl_trans # $11-6 ftwotox unnorm 4628 short tbl_trans - tbl_trans # $11-7 ERROR 4629 4630 short stentox - tbl_trans # $12-0 ftentox norm 4631 short ld_pone - tbl_trans # $12-1 ftentox zero 4632 short szr_inf - tbl_trans # $12-2 ftentox inf 4633 short src_qnan - tbl_trans # $12-3 ftentox qnan 4634 short stentoxd - tbl_trans # $12-5 ftentox denorm 4635 short src_snan - tbl_trans # $12-4 ftentox snan 4636 short tbl_trans - tbl_trans # $12-6 ftentox unnorm 4637 short tbl_trans - tbl_trans # $12-7 ERROR 4638 4639 short tbl_trans - tbl_trans # $13-0 ERROR 4640 short tbl_trans - tbl_trans # $13-1 ERROR 4641 short tbl_trans - tbl_trans # $13-2 ERROR 4642 short tbl_trans - tbl_trans # $13-3 ERROR 4643 short tbl_trans - tbl_trans # $13-4 ERROR 4644 short tbl_trans - tbl_trans # $13-5 ERROR 4645 short tbl_trans - tbl_trans # $13-6 ERROR 4646 short tbl_trans - tbl_trans # $13-7 ERROR 4647 4648 short slogn - tbl_trans # $14-0 flogn norm 4649 short t_dz2 - tbl_trans # $14-1 flogn zero 4650 short sopr_inf - tbl_trans # $14-2 flogn inf 4651 short src_qnan - tbl_trans # $14-3 flogn qnan 4652 short slognd - tbl_trans # $14-5 flogn denorm 4653 short src_snan - tbl_trans # $14-4 flogn snan 4654 short tbl_trans - tbl_trans # $14-6 flogn unnorm 4655 short tbl_trans - tbl_trans # $14-7 ERROR 4656 4657 short slog10 - tbl_trans # $15-0 flog10 norm 4658 short t_dz2 - tbl_trans # $15-1 flog10 zero 4659 short sopr_inf - tbl_trans # $15-2 flog10 inf 4660 short src_qnan - tbl_trans # $15-3 flog10 qnan 4661 short slog10d - tbl_trans # $15-5 flog10 denorm 4662 short src_snan - tbl_trans # $15-4 flog10 snan 4663 short tbl_trans - tbl_trans # $15-6 flog10 unnorm 4664 short tbl_trans - tbl_trans # $15-7 ERROR 4665 4666 short slog2 - tbl_trans # $16-0 flog2 norm 4667 short t_dz2 - tbl_trans # $16-1 flog2 zero 4668 short sopr_inf - tbl_trans # $16-2 flog2 inf 4669 short src_qnan - tbl_trans # $16-3 flog2 qnan 4670 short slog2d - tbl_trans # $16-5 flog2 denorm 4671 short src_snan - tbl_trans # $16-4 flog2 snan 4672 short tbl_trans - tbl_trans # $16-6 flog2 unnorm 4673 short tbl_trans - tbl_trans # $16-7 ERROR 4674 4675 short tbl_trans - tbl_trans # $17-0 ERROR 4676 short tbl_trans - tbl_trans # $17-1 ERROR 4677 short tbl_trans - tbl_trans # $17-2 ERROR 4678 short tbl_trans - tbl_trans # $17-3 ERROR 4679 short tbl_trans - tbl_trans # $17-4 ERROR 4680 short tbl_trans - tbl_trans # $17-5 ERROR 4681 short tbl_trans - tbl_trans # $17-6 ERROR 4682 short tbl_trans - tbl_trans # $17-7 ERROR 4683 4684 short tbl_trans - tbl_trans # $18-0 fabs norm 4685 short tbl_trans - tbl_trans # $18-1 fabs zero 4686 short tbl_trans - tbl_trans # $18-2 fabs inf 4687 short tbl_trans - tbl_trans # $18-3 fabs qnan 4688 short tbl_trans - tbl_trans # $18-5 fabs denorm 4689 short tbl_trans - tbl_trans # $18-4 fabs snan 4690 short tbl_trans - tbl_trans # $18-6 fabs unnorm 4691 short tbl_trans - tbl_trans # $18-7 ERROR 4692 4693 short scosh - tbl_trans # $19-0 fcosh norm 4694 short ld_pone - tbl_trans # $19-1 fcosh zero 4695 short ld_pinf - tbl_trans # $19-2 fcosh inf 4696 short src_qnan - tbl_trans # $19-3 fcosh qnan 4697 short scoshd - tbl_trans # $19-5 fcosh denorm 4698 short src_snan - tbl_trans # $19-4 fcosh snan 4699 short tbl_trans - tbl_trans # $19-6 fcosh unnorm 4700 short tbl_trans - tbl_trans # $19-7 ERROR 4701 4702 short tbl_trans - tbl_trans # $1a-0 fneg norm 4703 short tbl_trans - tbl_trans # $1a-1 fneg zero 4704 short tbl_trans - tbl_trans # $1a-2 fneg inf 4705 short tbl_trans - tbl_trans # $1a-3 fneg qnan 4706 short tbl_trans - tbl_trans # $1a-5 fneg denorm 4707 short tbl_trans - tbl_trans # $1a-4 fneg snan 4708 short tbl_trans - tbl_trans # $1a-6 fneg unnorm 4709 short tbl_trans - tbl_trans # $1a-7 ERROR 4710 4711 short tbl_trans - tbl_trans # $1b-0 ERROR 4712 short tbl_trans - tbl_trans # $1b-1 ERROR 4713 short tbl_trans - tbl_trans # $1b-2 ERROR 4714 short tbl_trans - tbl_trans # $1b-3 ERROR 4715 short tbl_trans - tbl_trans # $1b-4 ERROR 4716 short tbl_trans - tbl_trans # $1b-5 ERROR 4717 short tbl_trans - tbl_trans # $1b-6 ERROR 4718 short tbl_trans - tbl_trans # $1b-7 ERROR 4719 4720 short sacos - tbl_trans # $1c-0 facos norm 4721 short ld_ppi2 - tbl_trans # $1c-1 facos zero 4722 short t_operr - tbl_trans # $1c-2 facos inf 4723 short src_qnan - tbl_trans # $1c-3 facos qnan 4724 short sacosd - tbl_trans # $1c-5 facos denorm 4725 short src_snan - tbl_trans # $1c-4 facos snan 4726 short tbl_trans - tbl_trans # $1c-6 facos unnorm 4727 short tbl_trans - tbl_trans # $1c-7 ERROR 4728 4729 short scos - tbl_trans # $1d-0 fcos norm 4730 short ld_pone - tbl_trans # $1d-1 fcos zero 4731 short t_operr - tbl_trans # $1d-2 fcos inf 4732 short src_qnan - tbl_trans # $1d-3 fcos qnan 4733 short scosd - tbl_trans # $1d-5 fcos denorm 4734 short src_snan - tbl_trans # $1d-4 fcos snan 4735 short tbl_trans - tbl_trans # $1d-6 fcos unnorm 4736 short tbl_trans - tbl_trans # $1d-7 ERROR 4737 4738 short sgetexp - tbl_trans # $1e-0 fgetexp norm 4739 short src_zero - tbl_trans # $1e-1 fgetexp zero 4740 short t_operr - tbl_trans # $1e-2 fgetexp inf 4741 short src_qnan - tbl_trans # $1e-3 fgetexp qnan 4742 short sgetexpd - tbl_trans # $1e-5 fgetexp denorm 4743 short src_snan - tbl_trans # $1e-4 fgetexp snan 4744 short tbl_trans - tbl_trans # $1e-6 fgetexp unnorm 4745 short tbl_trans - tbl_trans # $1e-7 ERROR 4746 4747 short sgetman - tbl_trans # $1f-0 fgetman norm 4748 short src_zero - tbl_trans # $1f-1 fgetman zero 4749 short t_operr - tbl_trans # $1f-2 fgetman inf 4750 short src_qnan - tbl_trans # $1f-3 fgetman qnan 4751 short sgetmand - tbl_trans # $1f-5 fgetman denorm 4752 short src_snan - tbl_trans # $1f-4 fgetman snan 4753 short tbl_trans - tbl_trans # $1f-6 fgetman unnorm 4754 short tbl_trans - tbl_trans # $1f-7 ERROR 4755 4756 short tbl_trans - tbl_trans # $20-0 fdiv norm 4757 short tbl_trans - tbl_trans # $20-1 fdiv zero 4758 short tbl_trans - tbl_trans # $20-2 fdiv inf 4759 short tbl_trans - tbl_trans # $20-3 fdiv qnan 4760 short tbl_trans - tbl_trans # $20-5 fdiv denorm 4761 short tbl_trans - tbl_trans # $20-4 fdiv snan 4762 short tbl_trans - tbl_trans # $20-6 fdiv unnorm 4763 short tbl_trans - tbl_trans # $20-7 ERROR 4764 4765 short smod_snorm - tbl_trans # $21-0 fmod norm 4766 short smod_szero - tbl_trans # $21-1 fmod zero 4767 short smod_sinf - tbl_trans # $21-2 fmod inf 4768 short sop_sqnan - tbl_trans # $21-3 fmod qnan 4769 short smod_sdnrm - tbl_trans # $21-5 fmod denorm 4770 short sop_ssnan - tbl_trans # $21-4 fmod snan 4771 short tbl_trans - tbl_trans # $21-6 fmod unnorm 4772 short tbl_trans - tbl_trans # $21-7 ERROR 4773 4774 short tbl_trans - tbl_trans # $22-0 fadd norm 4775 short tbl_trans - tbl_trans # $22-1 fadd zero 4776 short tbl_trans - tbl_trans # $22-2 fadd inf 4777 short tbl_trans - tbl_trans # $22-3 fadd qnan 4778 short tbl_trans - tbl_trans # $22-5 fadd denorm 4779 short tbl_trans - tbl_trans # $22-4 fadd snan 4780 short tbl_trans - tbl_trans # $22-6 fadd unnorm 4781 short tbl_trans - tbl_trans # $22-7 ERROR 4782 4783 short tbl_trans - tbl_trans # $23-0 fmul norm 4784 short tbl_trans - tbl_trans # $23-1 fmul zero 4785 short tbl_trans - tbl_trans # $23-2 fmul inf 4786 short tbl_trans - tbl_trans # $23-3 fmul qnan 4787 short tbl_trans - tbl_trans # $23-5 fmul denorm 4788 short tbl_trans - tbl_trans # $23-4 fmul snan 4789 short tbl_trans - tbl_trans # $23-6 fmul unnorm 4790 short tbl_trans - tbl_trans # $23-7 ERROR 4791 4792 short tbl_trans - tbl_trans # $24-0 fsgldiv norm 4793 short tbl_trans - tbl_trans # $24-1 fsgldiv zero 4794 short tbl_trans - tbl_trans # $24-2 fsgldiv inf 4795 short tbl_trans - tbl_trans # $24-3 fsgldiv qnan 4796 short tbl_trans - tbl_trans # $24-5 fsgldiv denorm 4797 short tbl_trans - tbl_trans # $24-4 fsgldiv snan 4798 short tbl_trans - tbl_trans # $24-6 fsgldiv unnorm 4799 short tbl_trans - tbl_trans # $24-7 ERROR 4800 4801 short srem_snorm - tbl_trans # $25-0 frem norm 4802 short srem_szero - tbl_trans # $25-1 frem zero 4803 short srem_sinf - tbl_trans # $25-2 frem inf 4804 short sop_sqnan - tbl_trans # $25-3 frem qnan 4805 short srem_sdnrm - tbl_trans # $25-5 frem denorm 4806 short sop_ssnan - tbl_trans # $25-4 frem snan 4807 short tbl_trans - tbl_trans # $25-6 frem unnorm 4808 short tbl_trans - tbl_trans # $25-7 ERROR 4809 4810 short sscale_snorm - tbl_trans # $26-0 fscale norm 4811 short sscale_szero - tbl_trans # $26-1 fscale zero 4812 short sscale_sinf - tbl_trans # $26-2 fscale inf 4813 short sop_sqnan - tbl_trans # $26-3 fscale qnan 4814 short sscale_sdnrm - tbl_trans # $26-5 fscale denorm 4815 short sop_ssnan - tbl_trans # $26-4 fscale snan 4816 short tbl_trans - tbl_trans # $26-6 fscale unnorm 4817 short tbl_trans - tbl_trans # $26-7 ERROR 4818 4819 short tbl_trans - tbl_trans # $27-0 fsglmul norm 4820 short tbl_trans - tbl_trans # $27-1 fsglmul zero 4821 short tbl_trans - tbl_trans # $27-2 fsglmul inf 4822 short tbl_trans - tbl_trans # $27-3 fsglmul qnan 4823 short tbl_trans - tbl_trans # $27-5 fsglmul denorm 4824 short tbl_trans - tbl_trans # $27-4 fsglmul snan 4825 short tbl_trans - tbl_trans # $27-6 fsglmul unnorm 4826 short tbl_trans - tbl_trans # $27-7 ERROR 4827 4828 short tbl_trans - tbl_trans # $28-0 fsub norm 4829 short tbl_trans - tbl_trans # $28-1 fsub zero 4830 short tbl_trans - tbl_trans # $28-2 fsub inf 4831 short tbl_trans - tbl_trans # $28-3 fsub qnan 4832 short tbl_trans - tbl_trans # $28-5 fsub denorm 4833 short tbl_trans - tbl_trans # $28-4 fsub snan 4834 short tbl_trans - tbl_trans # $28-6 fsub unnorm 4835 short tbl_trans - tbl_trans # $28-7 ERROR 4836 4837 short tbl_trans - tbl_trans # $29-0 ERROR 4838 short tbl_trans - tbl_trans # $29-1 ERROR 4839 short tbl_trans - tbl_trans # $29-2 ERROR 4840 short tbl_trans - tbl_trans # $29-3 ERROR 4841 short tbl_trans - tbl_trans # $29-4 ERROR 4842 short tbl_trans - tbl_trans # $29-5 ERROR 4843 short tbl_trans - tbl_trans # $29-6 ERROR 4844 short tbl_trans - tbl_trans # $29-7 ERROR 4845 4846 short tbl_trans - tbl_trans # $2a-0 ERROR 4847 short tbl_trans - tbl_trans # $2a-1 ERROR 4848 short tbl_trans - tbl_trans # $2a-2 ERROR 4849 short tbl_trans - tbl_trans # $2a-3 ERROR 4850 short tbl_trans - tbl_trans # $2a-4 ERROR 4851 short tbl_trans - tbl_trans # $2a-5 ERROR 4852 short tbl_trans - tbl_trans # $2a-6 ERROR 4853 short tbl_trans - tbl_trans # $2a-7 ERROR 4854 4855 short tbl_trans - tbl_trans # $2b-0 ERROR 4856 short tbl_trans - tbl_trans # $2b-1 ERROR 4857 short tbl_trans - tbl_trans # $2b-2 ERROR 4858 short tbl_trans - tbl_trans # $2b-3 ERROR 4859 short tbl_trans - tbl_trans # $2b-4 ERROR 4860 short tbl_trans - tbl_trans # $2b-5 ERROR 4861 short tbl_trans - tbl_trans # $2b-6 ERROR 4862 short tbl_trans - tbl_trans # $2b-7 ERROR 4863 4864 short tbl_trans - tbl_trans # $2c-0 ERROR 4865 short tbl_trans - tbl_trans # $2c-1 ERROR 4866 short tbl_trans - tbl_trans # $2c-2 ERROR 4867 short tbl_trans - tbl_trans # $2c-3 ERROR 4868 short tbl_trans - tbl_trans # $2c-4 ERROR 4869 short tbl_trans - tbl_trans # $2c-5 ERROR 4870 short tbl_trans - tbl_trans # $2c-6 ERROR 4871 short tbl_trans - tbl_trans # $2c-7 ERROR 4872 4873 short tbl_trans - tbl_trans # $2d-0 ERROR 4874 short tbl_trans - tbl_trans # $2d-1 ERROR 4875 short tbl_trans - tbl_trans # $2d-2 ERROR 4876 short tbl_trans - tbl_trans # $2d-3 ERROR 4877 short tbl_trans - tbl_trans # $2d-4 ERROR 4878 short tbl_trans - tbl_trans # $2d-5 ERROR 4879 short tbl_trans - tbl_trans # $2d-6 ERROR 4880 short tbl_trans - tbl_trans # $2d-7 ERROR 4881 4882 short tbl_trans - tbl_trans # $2e-0 ERROR 4883 short tbl_trans - tbl_trans # $2e-1 ERROR 4884 short tbl_trans - tbl_trans # $2e-2 ERROR 4885 short tbl_trans - tbl_trans # $2e-3 ERROR 4886 short tbl_trans - tbl_trans # $2e-4 ERROR 4887 short tbl_trans - tbl_trans # $2e-5 ERROR 4888 short tbl_trans - tbl_trans # $2e-6 ERROR 4889 short tbl_trans - tbl_trans # $2e-7 ERROR 4890 4891 short tbl_trans - tbl_trans # $2f-0 ERROR 4892 short tbl_trans - tbl_trans # $2f-1 ERROR 4893 short tbl_trans - tbl_trans # $2f-2 ERROR 4894 short tbl_trans - tbl_trans # $2f-3 ERROR 4895 short tbl_trans - tbl_trans # $2f-4 ERROR 4896 short tbl_trans - tbl_trans # $2f-5 ERROR 4897 short tbl_trans - tbl_trans # $2f-6 ERROR 4898 short tbl_trans - tbl_trans # $2f-7 ERROR 4899 4900 short ssincos - tbl_trans # $30-0 fsincos norm 4901 short ssincosz - tbl_trans # $30-1 fsincos zero 4902 short ssincosi - tbl_trans # $30-2 fsincos inf 4903 short ssincosqnan - tbl_trans # $30-3 fsincos qnan 4904 short ssincosd - tbl_trans # $30-5 fsincos denorm 4905 short ssincossnan - tbl_trans # $30-4 fsincos snan 4906 short tbl_trans - tbl_trans # $30-6 fsincos unnorm 4907 short tbl_trans - tbl_trans # $30-7 ERROR 4908 4909 short ssincos - tbl_trans # $31-0 fsincos norm 4910 short ssincosz - tbl_trans # $31-1 fsincos zero 4911 short ssincosi - tbl_trans # $31-2 fsincos inf 4912 short ssincosqnan - tbl_trans # $31-3 fsincos qnan 4913 short ssincosd - tbl_trans # $31-5 fsincos denorm 4914 short ssincossnan - tbl_trans # $31-4 fsincos snan 4915 short tbl_trans - tbl_trans # $31-6 fsincos unnorm 4916 short tbl_trans - tbl_trans # $31-7 ERROR 4917 4918 short ssincos - tbl_trans # $32-0 fsincos norm 4919 short ssincosz - tbl_trans # $32-1 fsincos zero 4920 short ssincosi - tbl_trans # $32-2 fsincos inf 4921 short ssincosqnan - tbl_trans # $32-3 fsincos qnan 4922 short ssincosd - tbl_trans # $32-5 fsincos denorm 4923 short ssincossnan - tbl_trans # $32-4 fsincos snan 4924 short tbl_trans - tbl_trans # $32-6 fsincos unnorm 4925 short tbl_trans - tbl_trans # $32-7 ERROR 4926 4927 short ssincos - tbl_trans # $33-0 fsincos norm 4928 short ssincosz - tbl_trans # $33-1 fsincos zero 4929 short ssincosi - tbl_trans # $33-2 fsincos inf 4930 short ssincosqnan - tbl_trans # $33-3 fsincos qnan 4931 short ssincosd - tbl_trans # $33-5 fsincos denorm 4932 short ssincossnan - tbl_trans # $33-4 fsincos snan 4933 short tbl_trans - tbl_trans # $33-6 fsincos unnorm 4934 short tbl_trans - tbl_trans # $33-7 ERROR 4935 4936 short ssincos - tbl_trans # $34-0 fsincos norm 4937 short ssincosz - tbl_trans # $34-1 fsincos zero 4938 short ssincosi - tbl_trans # $34-2 fsincos inf 4939 short ssincosqnan - tbl_trans # $34-3 fsincos qnan 4940 short ssincosd - tbl_trans # $34-5 fsincos denorm 4941 short ssincossnan - tbl_trans # $34-4 fsincos snan 4942 short tbl_trans - tbl_trans # $34-6 fsincos unnorm 4943 short tbl_trans - tbl_trans # $34-7 ERROR 4944 4945 short ssincos - tbl_trans # $35-0 fsincos norm 4946 short ssincosz - tbl_trans # $35-1 fsincos zero 4947 short ssincosi - tbl_trans # $35-2 fsincos inf 4948 short ssincosqnan - tbl_trans # $35-3 fsincos qnan 4949 short ssincosd - tbl_trans # $35-5 fsincos denorm 4950 short ssincossnan - tbl_trans # $35-4 fsincos snan 4951 short tbl_trans - tbl_trans # $35-6 fsincos unnorm 4952 short tbl_trans - tbl_trans # $35-7 ERROR 4953 4954 short ssincos - tbl_trans # $36-0 fsincos norm 4955 short ssincosz - tbl_trans # $36-1 fsincos zero 4956 short ssincosi - tbl_trans # $36-2 fsincos inf 4957 short ssincosqnan - tbl_trans # $36-3 fsincos qnan 4958 short ssincosd - tbl_trans # $36-5 fsincos denorm 4959 short ssincossnan - tbl_trans # $36-4 fsincos snan 4960 short tbl_trans - tbl_trans # $36-6 fsincos unnorm 4961 short tbl_trans - tbl_trans # $36-7 ERROR 4962 4963 short ssincos - tbl_trans # $37-0 fsincos norm 4964 short ssincosz - tbl_trans # $37-1 fsincos zero 4965 short ssincosi - tbl_trans # $37-2 fsincos inf 4966 short ssincosqnan - tbl_trans # $37-3 fsincos qnan 4967 short ssincosd - tbl_trans # $37-5 fsincos denorm 4968 short ssincossnan - tbl_trans # $37-4 fsincos snan 4969 short tbl_trans - tbl_trans # $37-6 fsincos unnorm 4970 short tbl_trans - tbl_trans # $37-7 ERROR 4971 4972########## 4973 4974# the instruction fetch access for the displacement word for the 4975# fdbcc emulation failed. here, we create an access error frame 4976# from the current frame and branch to _real_access(). 4977funimp_iacc: 4978 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 4979 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 4980 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 4981 4982 mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC 4983 4984 unlk %a6 4985 4986 mov.l (%sp),-(%sp) # store SR,hi(PC) 4987 mov.w 0x8(%sp),0x4(%sp) # store lo(PC) 4988 mov.w &0x4008,0x6(%sp) # store voff 4989 mov.l 0x2(%sp),0x8(%sp) # store EA 4990 mov.l &0x09428001,0xc(%sp) # store FSLW 4991 4992 btst &0x5,(%sp) # user or supervisor mode? 4993 beq.b funimp_iacc_end # user 4994 bset &0x2,0xd(%sp) # set supervisor TM bit 4995 4996funimp_iacc_end: 4997 bra.l _real_access 4998 4999######################################################################### 5000# ssin(): computes the sine of a normalized input # 5001# ssind(): computes the sine of a denormalized input # 5002# scos(): computes the cosine of a normalized input # 5003# scosd(): computes the cosine of a denormalized input # 5004# ssincos(): computes the sine and cosine of a normalized input # 5005# ssincosd(): computes the sine and cosine of a denormalized input # 5006# # 5007# INPUT *************************************************************** # 5008# a0 = pointer to extended precision input # 5009# d0 = round precision,mode # 5010# # 5011# OUTPUT ************************************************************** # 5012# fp0 = sin(X) or cos(X) # 5013# # 5014# For ssincos(X): # 5015# fp0 = sin(X) # 5016# fp1 = cos(X) # 5017# # 5018# ACCURACY and MONOTONICITY ******************************************* # 5019# The returned result is within 1 ulp in 64 significant bit, i.e. # 5020# within 0.5001 ulp to 53 bits if the result is subsequently # 5021# rounded to double precision. The result is provably monotonic # 5022# in double precision. # 5023# # 5024# ALGORITHM *********************************************************** # 5025# # 5026# SIN and COS: # 5027# 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. # 5028# # 5029# 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. # 5030# # 5031# 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # 5032# k = N mod 4, so in particular, k = 0,1,2,or 3. # 5033# Overwrite k by k := k + AdjN. # 5034# # 5035# 4. If k is even, go to 6. # 5036# # 5037# 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. # 5038# Return sgn*cos(r) where cos(r) is approximated by an # 5039# even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), # 5040# s = r*r. # 5041# Exit. # 5042# # 5043# 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) # 5044# where sin(r) is approximated by an odd polynomial in r # 5045# r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. # 5046# Exit. # 5047# # 5048# 7. If |X| > 1, go to 9. # 5049# # 5050# 8. (|X|<2**(-40)) If SIN is invoked, return X; # 5051# otherwise return 1. # 5052# # 5053# 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, # 5054# go back to 3. # 5055# # 5056# SINCOS: # 5057# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. # 5058# # 5059# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # 5060# k = N mod 4, so in particular, k = 0,1,2,or 3. # 5061# # 5062# 3. If k is even, go to 5. # 5063# # 5064# 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. # 5065# j1 exclusive or with the l.s.b. of k. # 5066# sgn1 := (-1)**j1, sgn2 := (-1)**j2. # 5067# SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where # 5068# sin(r) and cos(r) are computed as odd and even # 5069# polynomials in r, respectively. Exit # 5070# # 5071# 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. # 5072# SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where # 5073# sin(r) and cos(r) are computed as odd and even # 5074# polynomials in r, respectively. Exit # 5075# # 5076# 6. If |X| > 1, go to 8. # 5077# # 5078# 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. # 5079# # 5080# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, # 5081# go back to 2. # 5082# # 5083######################################################################### 5084 5085SINA7: long 0xBD6AAA77,0xCCC994F5 5086SINA6: long 0x3DE61209,0x7AAE8DA1 5087SINA5: long 0xBE5AE645,0x2A118AE4 5088SINA4: long 0x3EC71DE3,0xA5341531 5089SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000 5090SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000 5091SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000 5092 5093COSB8: long 0x3D2AC4D0,0xD6011EE3 5094COSB7: long 0xBDA9396F,0x9F45AC19 5095COSB6: long 0x3E21EED9,0x0612C972 5096COSB5: long 0xBE927E4F,0xB79D9FCF 5097COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000 5098COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000 5099COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E 5100COSB1: long 0xBF000000 5101 5102 set INARG,FP_SCR0 5103 5104 set X,FP_SCR0 5105# set XDCARE,X+2 5106 set XFRAC,X+4 5107 5108 set RPRIME,FP_SCR0 5109 set SPRIME,FP_SCR1 5110 5111 set POSNEG1,L_SCR1 5112 set TWOTO63,L_SCR1 5113 5114 set ENDFLAG,L_SCR2 5115 set INT,L_SCR2 5116 5117 set ADJN,L_SCR3 5118 5119############################################ 5120 global ssin 5121ssin: 5122 mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0 5123 bra.b SINBGN 5124 5125############################################ 5126 global scos 5127scos: 5128 mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1 5129 5130############################################ 5131SINBGN: 5132#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE 5133 5134 fmov.x (%a0),%fp0 # LOAD INPUT 5135 fmov.x %fp0,X(%a6) # save input at X 5136 5137# "COMPACTIFY" X 5138 mov.l (%a0),%d1 # put exp in hi word 5139 mov.w 4(%a0),%d1 # fetch hi(man) 5140 and.l &0x7FFFFFFF,%d1 # strip sign 5141 5142 cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)? 5143 bge.b SOK1 # no 5144 bra.w SINSM # yes; input is very small 5145 5146SOK1: 5147 cmp.l %d1,&0x4004BC7E # is |X| < 15 PI? 5148 blt.b SINMAIN # no 5149 bra.w SREDUCEX # yes; input is very large 5150 5151#--THIS IS THE USUAL CASE, |X| <= 15 PI. 5152#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. 5153SINMAIN: 5154 fmov.x %fp0,%fp1 5155 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI 5156 5157 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 5158 5159 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER 5160 5161 mov.l INT(%a6),%d1 # make a copy of N 5162 asl.l &4,%d1 # N *= 16 5163 add.l %d1,%a1 # tbl_addr = a1 + (N*16) 5164 5165# A1 IS THE ADDRESS OF N*PIBY2 5166# ...WHICH IS IN TWO PIECES Y1 & Y2 5167 fsub.x (%a1)+,%fp0 # X-Y1 5168 fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2 5169 5170SINCONT: 5171#--continuation from REDUCEX 5172 5173#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED 5174 mov.l INT(%a6),%d1 5175 add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN 5176 ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE 5177 cmp.l %d1,&0 5178 blt.w COSPOLY 5179 5180#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. 5181#--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY 5182#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE 5183#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS 5184#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))]) 5185#--WHERE T=S*S. 5186#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION 5187#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT. 5188SINPOLY: 5189 fmovm.x &0x0c,-(%sp) # save fp2/fp3 5190 5191 fmov.x %fp0,X(%a6) # X IS R 5192 fmul.x %fp0,%fp0 # FP0 IS S 5193 5194 fmov.d SINA7(%pc),%fp3 5195 fmov.d SINA6(%pc),%fp2 5196 5197 fmov.x %fp0,%fp1 5198 fmul.x %fp1,%fp1 # FP1 IS T 5199 5200 ror.l &1,%d1 5201 and.l &0x80000000,%d1 5202# ...LEAST SIG. BIT OF D0 IN SIGN POSITION 5203 eor.l %d1,X(%a6) # X IS NOW R'= SGN*R 5204 5205 fmul.x %fp1,%fp3 # TA7 5206 fmul.x %fp1,%fp2 # TA6 5207 5208 fadd.d SINA5(%pc),%fp3 # A5+TA7 5209 fadd.d SINA4(%pc),%fp2 # A4+TA6 5210 5211 fmul.x %fp1,%fp3 # T(A5+TA7) 5212 fmul.x %fp1,%fp2 # T(A4+TA6) 5213 5214 fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7) 5215 fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6) 5216 5217 fmul.x %fp3,%fp1 # T(A3+T(A5+TA7)) 5218 5219 fmul.x %fp0,%fp2 # S(A2+T(A4+TA6)) 5220 fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7)) 5221 fmul.x X(%a6),%fp0 # R'*S 5222 5223 fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] 5224 5225 fmul.x %fp1,%fp0 # SIN(R')-R' 5226 5227 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 5228 5229 fmov.l %d0,%fpcr # restore users round mode,prec 5230 fadd.x X(%a6),%fp0 # last inst - possible exception set 5231 bra t_inx2 5232 5233#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. 5234#--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY 5235#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE 5236#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS 5237#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))]) 5238#--WHERE T=S*S. 5239#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION 5240#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2 5241#--AND IS THEREFORE STORED AS SINGLE PRECISION. 5242COSPOLY: 5243 fmovm.x &0x0c,-(%sp) # save fp2/fp3 5244 5245 fmul.x %fp0,%fp0 # FP0 IS S 5246 5247 fmov.d COSB8(%pc),%fp2 5248 fmov.d COSB7(%pc),%fp3 5249 5250 fmov.x %fp0,%fp1 5251 fmul.x %fp1,%fp1 # FP1 IS T 5252 5253 fmov.x %fp0,X(%a6) # X IS S 5254 ror.l &1,%d1 5255 and.l &0x80000000,%d1 5256# ...LEAST SIG. BIT OF D0 IN SIGN POSITION 5257 5258 fmul.x %fp1,%fp2 # TB8 5259 5260 eor.l %d1,X(%a6) # X IS NOW S'= SGN*S 5261 and.l &0x80000000,%d1 5262 5263 fmul.x %fp1,%fp3 # TB7 5264 5265 or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE 5266 mov.l %d1,POSNEG1(%a6) 5267 5268 fadd.d COSB6(%pc),%fp2 # B6+TB8 5269 fadd.d COSB5(%pc),%fp3 # B5+TB7 5270 5271 fmul.x %fp1,%fp2 # T(B6+TB8) 5272 fmul.x %fp1,%fp3 # T(B5+TB7) 5273 5274 fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8) 5275 fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7) 5276 5277 fmul.x %fp1,%fp2 # T(B4+T(B6+TB8)) 5278 fmul.x %fp3,%fp1 # T(B3+T(B5+TB7)) 5279 5280 fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8)) 5281 fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7)) 5282 5283 fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8))) 5284 5285 fadd.x %fp1,%fp0 5286 5287 fmul.x X(%a6),%fp0 5288 5289 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 5290 5291 fmov.l %d0,%fpcr # restore users round mode,prec 5292 fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set 5293 bra t_inx2 5294 5295############################################## 5296 5297# SINe: Big OR Small? 5298#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. 5299#--IF |X| < 2**(-40), RETURN X OR 1. 5300SINBORS: 5301 cmp.l %d1,&0x3FFF8000 5302 bgt.l SREDUCEX 5303 5304SINSM: 5305 mov.l ADJN(%a6),%d1 5306 cmp.l %d1,&0 5307 bgt.b COSTINY 5308 5309# here, the operation may underflow iff the precision is sgl or dbl. 5310# extended denorms are handled through another entry point. 5311SINTINY: 5312# mov.w &0x0000,XDCARE(%a6) # JUST IN CASE 5313 5314 fmov.l %d0,%fpcr # restore users round mode,prec 5315 mov.b &FMOV_OP,%d1 # last inst is MOVE 5316 fmov.x X(%a6),%fp0 # last inst - possible exception set 5317 bra t_catch 5318 5319COSTINY: 5320 fmov.s &0x3F800000,%fp0 # fp0 = 1.0 5321 fmov.l %d0,%fpcr # restore users round mode,prec 5322 fadd.s &0x80800000,%fp0 # last inst - possible exception set 5323 bra t_pinx2 5324 5325################################################ 5326 global ssind 5327#--SIN(X) = X FOR DENORMALIZED X 5328ssind: 5329 bra t_extdnrm 5330 5331############################################ 5332 global scosd 5333#--COS(X) = 1 FOR DENORMALIZED X 5334scosd: 5335 fmov.s &0x3F800000,%fp0 # fp0 = 1.0 5336 bra t_pinx2 5337 5338################################################## 5339 5340 global ssincos 5341ssincos: 5342#--SET ADJN TO 4 5343 mov.l &4,ADJN(%a6) 5344 5345 fmov.x (%a0),%fp0 # LOAD INPUT 5346 fmov.x %fp0,X(%a6) 5347 5348 mov.l (%a0),%d1 5349 mov.w 4(%a0),%d1 5350 and.l &0x7FFFFFFF,%d1 # COMPACTIFY X 5351 5352 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)? 5353 bge.b SCOK1 5354 bra.w SCSM 5355 5356SCOK1: 5357 cmp.l %d1,&0x4004BC7E # |X| < 15 PI? 5358 blt.b SCMAIN 5359 bra.w SREDUCEX 5360 5361 5362#--THIS IS THE USUAL CASE, |X| <= 15 PI. 5363#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. 5364SCMAIN: 5365 fmov.x %fp0,%fp1 5366 5367 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI 5368 5369 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 5370 5371 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER 5372 5373 mov.l INT(%a6),%d1 5374 asl.l &4,%d1 5375 add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2 5376 5377 fsub.x (%a1)+,%fp0 # X-Y1 5378 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2 5379 5380SCCONT: 5381#--continuation point from REDUCEX 5382 5383 mov.l INT(%a6),%d1 5384 ror.l &1,%d1 5385 cmp.l %d1,&0 # D0 < 0 IFF N IS ODD 5386 bge.w NEVEN 5387 5388SNODD: 5389#--REGISTERS SAVED SO FAR: D0, A0, FP2. 5390 fmovm.x &0x04,-(%sp) # save fp2 5391 5392 fmov.x %fp0,RPRIME(%a6) 5393 fmul.x %fp0,%fp0 # FP0 IS S = R*R 5394 fmov.d SINA7(%pc),%fp1 # A7 5395 fmov.d COSB8(%pc),%fp2 # B8 5396 fmul.x %fp0,%fp1 # SA7 5397 fmul.x %fp0,%fp2 # SB8 5398 5399 mov.l %d2,-(%sp) 5400 mov.l %d1,%d2 5401 ror.l &1,%d2 5402 and.l &0x80000000,%d2 5403 eor.l %d1,%d2 5404 and.l &0x80000000,%d2 5405 5406 fadd.d SINA6(%pc),%fp1 # A6+SA7 5407 fadd.d COSB7(%pc),%fp2 # B7+SB8 5408 5409 fmul.x %fp0,%fp1 # S(A6+SA7) 5410 eor.l %d2,RPRIME(%a6) 5411 mov.l (%sp)+,%d2 5412 fmul.x %fp0,%fp2 # S(B7+SB8) 5413 ror.l &1,%d1 5414 and.l &0x80000000,%d1 5415 mov.l &0x3F800000,POSNEG1(%a6) 5416 eor.l %d1,POSNEG1(%a6) 5417 5418 fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7) 5419 fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8) 5420 5421 fmul.x %fp0,%fp1 # S(A5+S(A6+SA7)) 5422 fmul.x %fp0,%fp2 # S(B6+S(B7+SB8)) 5423 fmov.x %fp0,SPRIME(%a6) 5424 5425 fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7)) 5426 eor.l %d1,SPRIME(%a6) 5427 fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8)) 5428 5429 fmul.x %fp0,%fp1 # S(A4+...) 5430 fmul.x %fp0,%fp2 # S(B5+...) 5431 5432 fadd.d SINA3(%pc),%fp1 # A3+S(A4+...) 5433 fadd.d COSB4(%pc),%fp2 # B4+S(B5+...) 5434 5435 fmul.x %fp0,%fp1 # S(A3+...) 5436 fmul.x %fp0,%fp2 # S(B4+...) 5437 5438 fadd.x SINA2(%pc),%fp1 # A2+S(A3+...) 5439 fadd.x COSB3(%pc),%fp2 # B3+S(B4+...) 5440 5441 fmul.x %fp0,%fp1 # S(A2+...) 5442 fmul.x %fp0,%fp2 # S(B3+...) 5443 5444 fadd.x SINA1(%pc),%fp1 # A1+S(A2+...) 5445 fadd.x COSB2(%pc),%fp2 # B2+S(B3+...) 5446 5447 fmul.x %fp0,%fp1 # S(A1+...) 5448 fmul.x %fp2,%fp0 # S(B2+...) 5449 5450 fmul.x RPRIME(%a6),%fp1 # R'S(A1+...) 5451 fadd.s COSB1(%pc),%fp0 # B1+S(B2...) 5452 fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...)) 5453 5454 fmovm.x (%sp)+,&0x20 # restore fp2 5455 5456 fmov.l %d0,%fpcr 5457 fadd.x RPRIME(%a6),%fp1 # COS(X) 5458 bsr sto_cos # store cosine result 5459 fadd.s POSNEG1(%a6),%fp0 # SIN(X) 5460 bra t_inx2 5461 5462NEVEN: 5463#--REGISTERS SAVED SO FAR: FP2. 5464 fmovm.x &0x04,-(%sp) # save fp2 5465 5466 fmov.x %fp0,RPRIME(%a6) 5467 fmul.x %fp0,%fp0 # FP0 IS S = R*R 5468 5469 fmov.d COSB8(%pc),%fp1 # B8 5470 fmov.d SINA7(%pc),%fp2 # A7 5471 5472 fmul.x %fp0,%fp1 # SB8 5473 fmov.x %fp0,SPRIME(%a6) 5474 fmul.x %fp0,%fp2 # SA7 5475 5476 ror.l &1,%d1 5477 and.l &0x80000000,%d1 5478 5479 fadd.d COSB7(%pc),%fp1 # B7+SB8 5480 fadd.d SINA6(%pc),%fp2 # A6+SA7 5481 5482 eor.l %d1,RPRIME(%a6) 5483 eor.l %d1,SPRIME(%a6) 5484 5485 fmul.x %fp0,%fp1 # S(B7+SB8) 5486 5487 or.l &0x3F800000,%d1 5488 mov.l %d1,POSNEG1(%a6) 5489 5490 fmul.x %fp0,%fp2 # S(A6+SA7) 5491 5492 fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8) 5493 fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7) 5494 5495 fmul.x %fp0,%fp1 # S(B6+S(B7+SB8)) 5496 fmul.x %fp0,%fp2 # S(A5+S(A6+SA7)) 5497 5498 fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8)) 5499 fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7)) 5500 5501 fmul.x %fp0,%fp1 # S(B5+...) 5502 fmul.x %fp0,%fp2 # S(A4+...) 5503 5504 fadd.d COSB4(%pc),%fp1 # B4+S(B5+...) 5505 fadd.d SINA3(%pc),%fp2 # A3+S(A4+...) 5506 5507 fmul.x %fp0,%fp1 # S(B4+...) 5508 fmul.x %fp0,%fp2 # S(A3+...) 5509 5510 fadd.x COSB3(%pc),%fp1 # B3+S(B4+...) 5511 fadd.x SINA2(%pc),%fp2 # A2+S(A3+...) 5512 5513 fmul.x %fp0,%fp1 # S(B3+...) 5514 fmul.x %fp0,%fp2 # S(A2+...) 5515 5516 fadd.x COSB2(%pc),%fp1 # B2+S(B3+...) 5517 fadd.x SINA1(%pc),%fp2 # A1+S(A2+...) 5518 5519 fmul.x %fp0,%fp1 # S(B2+...) 5520 fmul.x %fp2,%fp0 # s(a1+...) 5521 5522 5523 fadd.s COSB1(%pc),%fp1 # B1+S(B2...) 5524 fmul.x RPRIME(%a6),%fp0 # R'S(A1+...) 5525 fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...)) 5526 5527 fmovm.x (%sp)+,&0x20 # restore fp2 5528 5529 fmov.l %d0,%fpcr 5530 fadd.s POSNEG1(%a6),%fp1 # COS(X) 5531 bsr sto_cos # store cosine result 5532 fadd.x RPRIME(%a6),%fp0 # SIN(X) 5533 bra t_inx2 5534 5535################################################ 5536 5537SCBORS: 5538 cmp.l %d1,&0x3FFF8000 5539 bgt.w SREDUCEX 5540 5541################################################ 5542 5543SCSM: 5544# mov.w &0x0000,XDCARE(%a6) 5545 fmov.s &0x3F800000,%fp1 5546 5547 fmov.l %d0,%fpcr 5548 fsub.s &0x00800000,%fp1 5549 bsr sto_cos # store cosine result 5550 fmov.l %fpcr,%d0 # d0 must have fpcr,too 5551 mov.b &FMOV_OP,%d1 # last inst is MOVE 5552 fmov.x X(%a6),%fp0 5553 bra t_catch 5554 5555############################################## 5556 5557 global ssincosd 5558#--SIN AND COS OF X FOR DENORMALIZED X 5559ssincosd: 5560 mov.l %d0,-(%sp) # save d0 5561 fmov.s &0x3F800000,%fp1 5562 bsr sto_cos # store cosine result 5563 mov.l (%sp)+,%d0 # restore d0 5564 bra t_extdnrm 5565 5566############################################ 5567 5568#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. 5569#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING 5570#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. 5571SREDUCEX: 5572 fmovm.x &0x3c,-(%sp) # save {fp2-fp5} 5573 mov.l %d2,-(%sp) # save d2 5574 fmov.s &0x00000000,%fp1 # fp1 = 0 5575 5576#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that 5577#--there is a danger of unwanted overflow in first LOOP iteration. In this 5578#--case, reduce argument by one remainder step to make subsequent reduction 5579#--safe. 5580 cmp.l %d1,&0x7ffeffff # is arg dangerously large? 5581 bne.b SLOOP # no 5582 5583# yes; create 2**16383*PI/2 5584 mov.w &0x7ffe,FP_SCR0_EX(%a6) 5585 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6) 5586 clr.l FP_SCR0_LO(%a6) 5587 5588# create low half of 2**16383*PI/2 at FP_SCR1 5589 mov.w &0x7fdc,FP_SCR1_EX(%a6) 5590 mov.l &0x85a308d3,FP_SCR1_HI(%a6) 5591 clr.l FP_SCR1_LO(%a6) 5592 5593 ftest.x %fp0 # test sign of argument 5594 fblt.w sred_neg 5595 5596 or.b &0x80,FP_SCR0_EX(%a6) # positive arg 5597 or.b &0x80,FP_SCR1_EX(%a6) 5598sred_neg: 5599 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact 5600 fmov.x %fp0,%fp1 # save high result in fp1 5601 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction 5602 fsub.x %fp0,%fp1 # determine low component of result 5603 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument. 5604 5605#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. 5606#--integer quotient will be stored in N 5607#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) 5608SLOOP: 5609 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2 5610 mov.w INARG(%a6),%d1 5611 mov.l %d1,%a1 # save a copy of D0 5612 and.l &0x00007FFF,%d1 5613 sub.l &0x00003FFF,%d1 # d0 = K 5614 cmp.l %d1,&28 5615 ble.b SLASTLOOP 5616SCONTLOOP: 5617 sub.l &27,%d1 # d0 = L := K-27 5618 mov.b &0,ENDFLAG(%a6) 5619 bra.b SWORK 5620SLASTLOOP: 5621 clr.l %d1 # d0 = L := 0 5622 mov.b &1,ENDFLAG(%a6) 5623 5624SWORK: 5625#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN 5626#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. 5627 5628#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), 5629#--2**L * (PIby2_1), 2**L * (PIby2_2) 5630 5631 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI 5632 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI) 5633 5634 mov.l &0xA2F9836E,FP_SCR0_HI(%a6) 5635 mov.l &0x4E44152A,FP_SCR0_LO(%a6) 5636 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI) 5637 5638 fmov.x %fp0,%fp2 5639 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI) 5640 5641#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN 5642#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N 5643#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT 5644#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE 5645#--US THE DESIRED VALUE IN FLOATING POINT. 5646 mov.l %a1,%d2 5647 swap %d2 5648 and.l &0x80000000,%d2 5649 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL 5650 mov.l %d2,TWOTO63(%a6) 5651 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED 5652 fsub.s TWOTO63(%a6),%fp2 # fp2 = N 5653# fint.x %fp2 5654 5655#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2 5656 mov.l %d1,%d2 # d2 = L 5657 5658 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2) 5659 mov.w %d2,FP_SCR0_EX(%a6) 5660 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6) 5661 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1 5662 5663 add.l &0x00003FDD,%d1 5664 mov.w %d1,FP_SCR1_EX(%a6) 5665 mov.l &0x85A308D3,FP_SCR1_HI(%a6) 5666 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2 5667 5668 mov.b ENDFLAG(%a6),%d1 5669 5670#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and 5671#--P2 = 2**(L) * Piby2_2 5672 fmov.x %fp2,%fp4 # fp4 = N 5673 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1 5674 fmov.x %fp2,%fp5 # fp5 = N 5675 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2 5676 fmov.x %fp4,%fp3 # fp3 = W = N*P1 5677 5678#--we want P+p = W+w but |p| <= half ulp of P 5679#--Then, we need to compute A := R-P and a := r-p 5680 fadd.x %fp5,%fp3 # fp3 = P 5681 fsub.x %fp3,%fp4 # fp4 = W-P 5682 5683 fsub.x %fp3,%fp0 # fp0 = A := R - P 5684 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w 5685 5686 fmov.x %fp0,%fp3 # fp3 = A 5687 fsub.x %fp4,%fp1 # fp1 = a := r - p 5688 5689#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but 5690#--|r| <= half ulp of R. 5691 fadd.x %fp1,%fp0 # fp0 = R := A+a 5692#--No need to calculate r if this is the last loop 5693 cmp.b %d1,&0 5694 bgt.w SRESTORE 5695 5696#--Need to calculate r 5697 fsub.x %fp0,%fp3 # fp3 = A-R 5698 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a 5699 bra.w SLOOP 5700 5701SRESTORE: 5702 fmov.l %fp2,INT(%a6) 5703 mov.l (%sp)+,%d2 # restore d2 5704 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5} 5705 5706 mov.l ADJN(%a6),%d1 5707 cmp.l %d1,&4 5708 5709 blt.w SINCONT 5710 bra.w SCCONT 5711 5712######################################################################### 5713# stan(): computes the tangent of a normalized input # 5714# stand(): computes the tangent of a denormalized input # 5715# # 5716# INPUT *************************************************************** # 5717# a0 = pointer to extended precision input # 5718# d0 = round precision,mode # 5719# # 5720# OUTPUT ************************************************************** # 5721# fp0 = tan(X) # 5722# # 5723# ACCURACY and MONOTONICITY ******************************************* # 5724# The returned result is within 3 ulp in 64 significant bit, i.e. # 5725# within 0.5001 ulp to 53 bits if the result is subsequently # 5726# rounded to double precision. The result is provably monotonic # 5727# in double precision. # 5728# # 5729# ALGORITHM *********************************************************** # 5730# # 5731# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. # 5732# # 5733# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # 5734# k = N mod 2, so in particular, k = 0 or 1. # 5735# # 5736# 3. If k is odd, go to 5. # 5737# # 5738# 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a # 5739# rational function U/V where # 5740# U = r + r*s*(P1 + s*(P2 + s*P3)), and # 5741# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. # 5742# Exit. # 5743# # 5744# 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by # 5745# a rational function U/V where # 5746# U = r + r*s*(P1 + s*(P2 + s*P3)), and # 5747# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, # 5748# -Cot(r) = -V/U. Exit. # 5749# # 5750# 6. If |X| > 1, go to 8. # 5751# # 5752# 7. (|X|<2**(-40)) Tan(X) = X. Exit. # 5753# # 5754# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back # 5755# to 2. # 5756# # 5757######################################################################### 5758 5759TANQ4: 5760 long 0x3EA0B759,0xF50F8688 5761TANP3: 5762 long 0xBEF2BAA5,0xA8924F04 5763 5764TANQ3: 5765 long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000 5766 5767TANP2: 5768 long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000 5769 5770TANQ2: 5771 long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000 5772 5773TANP1: 5774 long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000 5775 5776TANQ1: 5777 long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000 5778 5779INVTWOPI: 5780 long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000 5781 5782TWOPI1: 5783 long 0x40010000,0xC90FDAA2,0x00000000,0x00000000 5784TWOPI2: 5785 long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000 5786 5787#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING 5788#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT 5789#--MOST 69 BITS LONG. 5790# global PITBL 5791PITBL: 5792 long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000 5793 long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000 5794 long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000 5795 long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000 5796 long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000 5797 long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000 5798 long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000 5799 long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000 5800 long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000 5801 long 0xC0040000,0x90836524,0x88034B96,0x20B00000 5802 long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000 5803 long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000 5804 long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000 5805 long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000 5806 long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000 5807 long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000 5808 long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000 5809 long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000 5810 long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000 5811 long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000 5812 long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000 5813 long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000 5814 long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000 5815 long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000 5816 long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000 5817 long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000 5818 long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000 5819 long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000 5820 long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000 5821 long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000 5822 long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000 5823 long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000 5824 long 0x00000000,0x00000000,0x00000000,0x00000000 5825 long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000 5826 long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000 5827 long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000 5828 long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000 5829 long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000 5830 long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000 5831 long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000 5832 long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000 5833 long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000 5834 long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000 5835 long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000 5836 long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000 5837 long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000 5838 long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000 5839 long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000 5840 long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000 5841 long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000 5842 long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000 5843 long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000 5844 long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000 5845 long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000 5846 long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000 5847 long 0x40040000,0x90836524,0x88034B96,0xA0B00000 5848 long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000 5849 long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000 5850 long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000 5851 long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000 5852 long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000 5853 long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000 5854 long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000 5855 long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000 5856 long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000 5857 5858 set INARG,FP_SCR0 5859 5860 set TWOTO63,L_SCR1 5861 set INT,L_SCR1 5862 set ENDFLAG,L_SCR2 5863 5864 global stan 5865stan: 5866 fmov.x (%a0),%fp0 # LOAD INPUT 5867 5868 mov.l (%a0),%d1 5869 mov.w 4(%a0),%d1 5870 and.l &0x7FFFFFFF,%d1 5871 5872 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)? 5873 bge.b TANOK1 5874 bra.w TANSM 5875TANOK1: 5876 cmp.l %d1,&0x4004BC7E # |X| < 15 PI? 5877 blt.b TANMAIN 5878 bra.w REDUCEX 5879 5880TANMAIN: 5881#--THIS IS THE USUAL CASE, |X| <= 15 PI. 5882#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. 5883 fmov.x %fp0,%fp1 5884 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI 5885 5886 lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 5887 5888 fmov.l %fp1,%d1 # CONVERT TO INTEGER 5889 5890 asl.l &4,%d1 5891 add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2 5892 5893 fsub.x (%a1)+,%fp0 # X-Y1 5894 5895 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2 5896 5897 ror.l &5,%d1 5898 and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0 5899 5900TANCONT: 5901 fmovm.x &0x0c,-(%sp) # save fp2,fp3 5902 5903 cmp.l %d1,&0 5904 blt.w NODD 5905 5906 fmov.x %fp0,%fp1 5907 fmul.x %fp1,%fp1 # S = R*R 5908 5909 fmov.d TANQ4(%pc),%fp3 5910 fmov.d TANP3(%pc),%fp2 5911 5912 fmul.x %fp1,%fp3 # SQ4 5913 fmul.x %fp1,%fp2 # SP3 5914 5915 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4 5916 fadd.x TANP2(%pc),%fp2 # P2+SP3 5917 5918 fmul.x %fp1,%fp3 # S(Q3+SQ4) 5919 fmul.x %fp1,%fp2 # S(P2+SP3) 5920 5921 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4) 5922 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3) 5923 5924 fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4)) 5925 fmul.x %fp1,%fp2 # S(P1+S(P2+SP3)) 5926 5927 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4)) 5928 fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3)) 5929 5930 fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4))) 5931 5932 fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3)) 5933 5934 fadd.s &0x3F800000,%fp1 # 1+S(Q1+...) 5935 5936 fmovm.x (%sp)+,&0x30 # restore fp2,fp3 5937 5938 fmov.l %d0,%fpcr # restore users round mode,prec 5939 fdiv.x %fp1,%fp0 # last inst - possible exception set 5940 bra t_inx2 5941 5942NODD: 5943 fmov.x %fp0,%fp1 5944 fmul.x %fp0,%fp0 # S = R*R 5945 5946 fmov.d TANQ4(%pc),%fp3 5947 fmov.d TANP3(%pc),%fp2 5948 5949 fmul.x %fp0,%fp3 # SQ4 5950 fmul.x %fp0,%fp2 # SP3 5951 5952 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4 5953 fadd.x TANP2(%pc),%fp2 # P2+SP3 5954 5955 fmul.x %fp0,%fp3 # S(Q3+SQ4) 5956 fmul.x %fp0,%fp2 # S(P2+SP3) 5957 5958 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4) 5959 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3) 5960 5961 fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4)) 5962 fmul.x %fp0,%fp2 # S(P1+S(P2+SP3)) 5963 5964 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4)) 5965 fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3)) 5966 5967 fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4))) 5968 5969 fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3)) 5970 fadd.s &0x3F800000,%fp0 # 1+S(Q1+...) 5971 5972 fmovm.x (%sp)+,&0x30 # restore fp2,fp3 5973 5974 fmov.x %fp1,-(%sp) 5975 eor.l &0x80000000,(%sp) 5976 5977 fmov.l %d0,%fpcr # restore users round mode,prec 5978 fdiv.x (%sp)+,%fp0 # last inst - possible exception set 5979 bra t_inx2 5980 5981TANBORS: 5982#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. 5983#--IF |X| < 2**(-40), RETURN X OR 1. 5984 cmp.l %d1,&0x3FFF8000 5985 bgt.b REDUCEX 5986 5987TANSM: 5988 fmov.x %fp0,-(%sp) 5989 fmov.l %d0,%fpcr # restore users round mode,prec 5990 mov.b &FMOV_OP,%d1 # last inst is MOVE 5991 fmov.x (%sp)+,%fp0 # last inst - posibble exception set 5992 bra t_catch 5993 5994 global stand 5995#--TAN(X) = X FOR DENORMALIZED X 5996stand: 5997 bra t_extdnrm 5998 5999#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. 6000#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING 6001#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. 6002REDUCEX: 6003 fmovm.x &0x3c,-(%sp) # save {fp2-fp5} 6004 mov.l %d2,-(%sp) # save d2 6005 fmov.s &0x00000000,%fp1 # fp1 = 0 6006 6007#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that 6008#--there is a danger of unwanted overflow in first LOOP iteration. In this 6009#--case, reduce argument by one remainder step to make subsequent reduction 6010#--safe. 6011 cmp.l %d1,&0x7ffeffff # is arg dangerously large? 6012 bne.b LOOP # no 6013 6014# yes; create 2**16383*PI/2 6015 mov.w &0x7ffe,FP_SCR0_EX(%a6) 6016 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6) 6017 clr.l FP_SCR0_LO(%a6) 6018 6019# create low half of 2**16383*PI/2 at FP_SCR1 6020 mov.w &0x7fdc,FP_SCR1_EX(%a6) 6021 mov.l &0x85a308d3,FP_SCR1_HI(%a6) 6022 clr.l FP_SCR1_LO(%a6) 6023 6024 ftest.x %fp0 # test sign of argument 6025 fblt.w red_neg 6026 6027 or.b &0x80,FP_SCR0_EX(%a6) # positive arg 6028 or.b &0x80,FP_SCR1_EX(%a6) 6029red_neg: 6030 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact 6031 fmov.x %fp0,%fp1 # save high result in fp1 6032 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction 6033 fsub.x %fp0,%fp1 # determine low component of result 6034 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument. 6035 6036#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. 6037#--integer quotient will be stored in N 6038#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) 6039LOOP: 6040 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2 6041 mov.w INARG(%a6),%d1 6042 mov.l %d1,%a1 # save a copy of D0 6043 and.l &0x00007FFF,%d1 6044 sub.l &0x00003FFF,%d1 # d0 = K 6045 cmp.l %d1,&28 6046 ble.b LASTLOOP 6047CONTLOOP: 6048 sub.l &27,%d1 # d0 = L := K-27 6049 mov.b &0,ENDFLAG(%a6) 6050 bra.b WORK 6051LASTLOOP: 6052 clr.l %d1 # d0 = L := 0 6053 mov.b &1,ENDFLAG(%a6) 6054 6055WORK: 6056#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN 6057#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. 6058 6059#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), 6060#--2**L * (PIby2_1), 2**L * (PIby2_2) 6061 6062 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI 6063 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI) 6064 6065 mov.l &0xA2F9836E,FP_SCR0_HI(%a6) 6066 mov.l &0x4E44152A,FP_SCR0_LO(%a6) 6067 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI) 6068 6069 fmov.x %fp0,%fp2 6070 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI) 6071 6072#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN 6073#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N 6074#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT 6075#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE 6076#--US THE DESIRED VALUE IN FLOATING POINT. 6077 mov.l %a1,%d2 6078 swap %d2 6079 and.l &0x80000000,%d2 6080 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL 6081 mov.l %d2,TWOTO63(%a6) 6082 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED 6083 fsub.s TWOTO63(%a6),%fp2 # fp2 = N 6084# fintrz.x %fp2,%fp2 6085 6086#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2 6087 mov.l %d1,%d2 # d2 = L 6088 6089 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2) 6090 mov.w %d2,FP_SCR0_EX(%a6) 6091 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6) 6092 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1 6093 6094 add.l &0x00003FDD,%d1 6095 mov.w %d1,FP_SCR1_EX(%a6) 6096 mov.l &0x85A308D3,FP_SCR1_HI(%a6) 6097 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2 6098 6099 mov.b ENDFLAG(%a6),%d1 6100 6101#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and 6102#--P2 = 2**(L) * Piby2_2 6103 fmov.x %fp2,%fp4 # fp4 = N 6104 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1 6105 fmov.x %fp2,%fp5 # fp5 = N 6106 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2 6107 fmov.x %fp4,%fp3 # fp3 = W = N*P1 6108 6109#--we want P+p = W+w but |p| <= half ulp of P 6110#--Then, we need to compute A := R-P and a := r-p 6111 fadd.x %fp5,%fp3 # fp3 = P 6112 fsub.x %fp3,%fp4 # fp4 = W-P 6113 6114 fsub.x %fp3,%fp0 # fp0 = A := R - P 6115 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w 6116 6117 fmov.x %fp0,%fp3 # fp3 = A 6118 fsub.x %fp4,%fp1 # fp1 = a := r - p 6119 6120#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but 6121#--|r| <= half ulp of R. 6122 fadd.x %fp1,%fp0 # fp0 = R := A+a 6123#--No need to calculate r if this is the last loop 6124 cmp.b %d1,&0 6125 bgt.w RESTORE 6126 6127#--Need to calculate r 6128 fsub.x %fp0,%fp3 # fp3 = A-R 6129 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a 6130 bra.w LOOP 6131 6132RESTORE: 6133 fmov.l %fp2,INT(%a6) 6134 mov.l (%sp)+,%d2 # restore d2 6135 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5} 6136 6137 mov.l INT(%a6),%d1 6138 ror.l &1,%d1 6139 6140 bra.w TANCONT 6141 6142######################################################################### 6143# satan(): computes the arctangent of a normalized number # 6144# satand(): computes the arctangent of a denormalized number # 6145# # 6146# INPUT *************************************************************** # 6147# a0 = pointer to extended precision input # 6148# d0 = round precision,mode # 6149# # 6150# OUTPUT ************************************************************** # 6151# fp0 = arctan(X) # 6152# # 6153# ACCURACY and MONOTONICITY ******************************************* # 6154# The returned result is within 2 ulps in 64 significant bit, # 6155# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6156# rounded to double precision. The result is provably monotonic # 6157# in double precision. # 6158# # 6159# ALGORITHM *********************************************************** # 6160# Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. # 6161# # 6162# Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. # 6163# Note that k = -4, -3,..., or 3. # 6164# Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 # 6165# significant bits of X with a bit-1 attached at the 6-th # 6166# bit position. Define u to be u = (X-F) / (1 + X*F). # 6167# # 6168# Step 3. Approximate arctan(u) by a polynomial poly. # 6169# # 6170# Step 4. Return arctan(F) + poly, arctan(F) is fetched from a # 6171# table of values calculated beforehand. Exit. # 6172# # 6173# Step 5. If |X| >= 16, go to Step 7. # 6174# # 6175# Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. # 6176# # 6177# Step 7. Define X' = -1/X. Approximate arctan(X') by an odd # 6178# polynomial in X'. # 6179# Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. # 6180# # 6181######################################################################### 6182 6183ATANA3: long 0xBFF6687E,0x314987D8 6184ATANA2: long 0x4002AC69,0x34A26DB3 6185ATANA1: long 0xBFC2476F,0x4E1DA28E 6186 6187ATANB6: long 0x3FB34444,0x7F876989 6188ATANB5: long 0xBFB744EE,0x7FAF45DB 6189ATANB4: long 0x3FBC71C6,0x46940220 6190ATANB3: long 0xBFC24924,0x921872F9 6191ATANB2: long 0x3FC99999,0x99998FA9 6192ATANB1: long 0xBFD55555,0x55555555 6193 6194ATANC5: long 0xBFB70BF3,0x98539E6A 6195ATANC4: long 0x3FBC7187,0x962D1D7D 6196ATANC3: long 0xBFC24924,0x827107B8 6197ATANC2: long 0x3FC99999,0x9996263E 6198ATANC1: long 0xBFD55555,0x55555536 6199 6200PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 6201NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000 6202 6203PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000 6204NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000 6205 6206ATANTBL: 6207 long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000 6208 long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000 6209 long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000 6210 long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000 6211 long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000 6212 long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000 6213 long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000 6214 long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000 6215 long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000 6216 long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000 6217 long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000 6218 long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000 6219 long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000 6220 long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000 6221 long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000 6222 long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000 6223 long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000 6224 long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000 6225 long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000 6226 long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000 6227 long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000 6228 long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000 6229 long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000 6230 long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000 6231 long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000 6232 long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000 6233 long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000 6234 long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000 6235 long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000 6236 long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000 6237 long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000 6238 long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000 6239 long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000 6240 long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000 6241 long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000 6242 long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000 6243 long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000 6244 long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000 6245 long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000 6246 long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000 6247 long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000 6248 long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000 6249 long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000 6250 long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000 6251 long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000 6252 long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000 6253 long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000 6254 long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000 6255 long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000 6256 long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000 6257 long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000 6258 long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000 6259 long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000 6260 long 0x3FFE0000,0x97731420,0x365E538C,0x00000000 6261 long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000 6262 long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000 6263 long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000 6264 long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000 6265 long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000 6266 long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000 6267 long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000 6268 long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000 6269 long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000 6270 long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000 6271 long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000 6272 long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000 6273 long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000 6274 long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000 6275 long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000 6276 long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000 6277 long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000 6278 long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000 6279 long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000 6280 long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000 6281 long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000 6282 long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000 6283 long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000 6284 long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000 6285 long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000 6286 long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000 6287 long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000 6288 long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000 6289 long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000 6290 long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000 6291 long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000 6292 long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000 6293 long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000 6294 long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000 6295 long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000 6296 long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000 6297 long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000 6298 long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000 6299 long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000 6300 long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000 6301 long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000 6302 long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000 6303 long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000 6304 long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000 6305 long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000 6306 long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000 6307 long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000 6308 long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000 6309 long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000 6310 long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000 6311 long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000 6312 long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000 6313 long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000 6314 long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000 6315 long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000 6316 long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000 6317 long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000 6318 long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000 6319 long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000 6320 long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000 6321 long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000 6322 long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000 6323 long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000 6324 long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000 6325 long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000 6326 long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000 6327 long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000 6328 long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000 6329 long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000 6330 long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000 6331 long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000 6332 long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000 6333 long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000 6334 long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000 6335 6336 set X,FP_SCR0 6337 set XDCARE,X+2 6338 set XFRAC,X+4 6339 set XFRACLO,X+8 6340 6341 set ATANF,FP_SCR1 6342 set ATANFHI,ATANF+4 6343 set ATANFLO,ATANF+8 6344 6345 global satan 6346#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 6347satan: 6348 fmov.x (%a0),%fp0 # LOAD INPUT 6349 6350 mov.l (%a0),%d1 6351 mov.w 4(%a0),%d1 6352 fmov.x %fp0,X(%a6) 6353 and.l &0x7FFFFFFF,%d1 6354 6355 cmp.l %d1,&0x3FFB8000 # |X| >= 1/16? 6356 bge.b ATANOK1 6357 bra.w ATANSM 6358 6359ATANOK1: 6360 cmp.l %d1,&0x4002FFFF # |X| < 16 ? 6361 ble.b ATANMAIN 6362 bra.w ATANBIG 6363 6364#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE 6365#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). 6366#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN 6367#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE 6368#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS 6369#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR 6370#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO 6371#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE 6372#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL 6373#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE 6374#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION 6375#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION 6376#--WILL INVOLVE A VERY LONG POLYNOMIAL. 6377 6378#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS 6379#--WE CHOSE F TO BE +-2^K * 1.BBBB1 6380#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE 6381#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE 6382#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS 6383#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). 6384 6385ATANMAIN: 6386 6387 and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS 6388 or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1 6389 mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F 6390 6391 fmov.x %fp0,%fp1 # FP1 IS X 6392 fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 0 6393 fsub.x X(%a6),%fp0 # FP0 IS X-F 6394 fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F 6395 fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F) 6396 6397#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) 6398#--CREATE ATAN(F) AND STORE IT IN ATANF, AND 6399#--SAVE REGISTERS FP2. 6400 6401 mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY 6402 mov.l %d1,%d2 # THE EXP AND 16 BITS OF X 6403 and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION 6404 and.l &0x7FFF0000,%d2 # EXPONENT OF F 6405 sub.l &0x3FFB0000,%d2 # K+4 6406 asr.l &1,%d2 6407 add.l %d2,%d1 # THE 7 BITS IDENTIFYING F 6408 asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|) 6409 lea ATANTBL(%pc),%a1 6410 add.l %d1,%a1 # ADDRESS OF ATAN(|F|) 6411 mov.l (%a1)+,ATANF(%a6) 6412 mov.l (%a1)+,ATANFHI(%a6) 6413 mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|) 6414 mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN 6415 and.l &0x80000000,%d1 # SIGN(F) 6416 or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|) 6417 mov.l (%sp)+,%d2 # RESTORE d2 6418 6419#--THAT'S ALL I HAVE TO DO FOR NOW, 6420#--BUT ALAS, THE DIVIDE IS STILL CRANKING! 6421 6422#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS 6423#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U 6424#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. 6425#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) 6426#--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. 6427#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT 6428#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED 6429 6430 fmovm.x &0x04,-(%sp) # save fp2 6431 6432 fmov.x %fp0,%fp1 6433 fmul.x %fp1,%fp1 6434 fmov.d ATANA3(%pc),%fp2 6435 fadd.x %fp1,%fp2 # A3+V 6436 fmul.x %fp1,%fp2 # V*(A3+V) 6437 fmul.x %fp0,%fp1 # U*V 6438 fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V) 6439 fmul.d ATANA1(%pc),%fp1 # A1*U*V 6440 fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V)) 6441 fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED 6442 6443 fmovm.x (%sp)+,&0x20 # restore fp2 6444 6445 fmov.l %d0,%fpcr # restore users rnd mode,prec 6446 fadd.x ATANF(%a6),%fp0 # ATAN(X) 6447 bra t_inx2 6448 6449ATANBORS: 6450#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. 6451#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. 6452 cmp.l %d1,&0x3FFF8000 6453 bgt.w ATANBIG # I.E. |X| >= 16 6454 6455ATANSM: 6456#--|X| <= 1/16 6457#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE 6458#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) 6459#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) 6460#--WHERE Y = X*X, AND Z = Y*Y. 6461 6462 cmp.l %d1,&0x3FD78000 6463 blt.w ATANTINY 6464 6465#--COMPUTE POLYNOMIAL 6466 fmovm.x &0x0c,-(%sp) # save fp2/fp3 6467 6468 fmul.x %fp0,%fp0 # FPO IS Y = X*X 6469 6470 fmov.x %fp0,%fp1 6471 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y 6472 6473 fmov.d ATANB6(%pc),%fp2 6474 fmov.d ATANB5(%pc),%fp3 6475 6476 fmul.x %fp1,%fp2 # Z*B6 6477 fmul.x %fp1,%fp3 # Z*B5 6478 6479 fadd.d ATANB4(%pc),%fp2 # B4+Z*B6 6480 fadd.d ATANB3(%pc),%fp3 # B3+Z*B5 6481 6482 fmul.x %fp1,%fp2 # Z*(B4+Z*B6) 6483 fmul.x %fp3,%fp1 # Z*(B3+Z*B5) 6484 6485 fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6) 6486 fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5) 6487 6488 fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6)) 6489 fmul.x X(%a6),%fp0 # X*Y 6490 6491 fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] 6492 6493 fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) 6494 6495 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 6496 6497 fmov.l %d0,%fpcr # restore users rnd mode,prec 6498 fadd.x X(%a6),%fp0 6499 bra t_inx2 6500 6501ATANTINY: 6502#--|X| < 2^(-40), ATAN(X) = X 6503 6504 fmov.l %d0,%fpcr # restore users rnd mode,prec 6505 mov.b &FMOV_OP,%d1 # last inst is MOVE 6506 fmov.x X(%a6),%fp0 # last inst - possible exception set 6507 6508 bra t_catch 6509 6510ATANBIG: 6511#--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, 6512#--RETURN SIGN(X)*PI/2 + ATAN(-1/X). 6513 cmp.l %d1,&0x40638000 6514 bgt.w ATANHUGE 6515 6516#--APPROXIMATE ATAN(-1/X) BY 6517#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' 6518#--THIS CAN BE RE-WRITTEN AS 6519#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. 6520 6521 fmovm.x &0x0c,-(%sp) # save fp2/fp3 6522 6523 fmov.s &0xBF800000,%fp1 # LOAD -1 6524 fdiv.x %fp0,%fp1 # FP1 IS -1/X 6525 6526#--DIVIDE IS STILL CRANKING 6527 6528 fmov.x %fp1,%fp0 # FP0 IS X' 6529 fmul.x %fp0,%fp0 # FP0 IS Y = X'*X' 6530 fmov.x %fp1,X(%a6) # X IS REALLY X' 6531 6532 fmov.x %fp0,%fp1 6533 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y 6534 6535 fmov.d ATANC5(%pc),%fp3 6536 fmov.d ATANC4(%pc),%fp2 6537 6538 fmul.x %fp1,%fp3 # Z*C5 6539 fmul.x %fp1,%fp2 # Z*B4 6540 6541 fadd.d ATANC3(%pc),%fp3 # C3+Z*C5 6542 fadd.d ATANC2(%pc),%fp2 # C2+Z*C4 6543 6544 fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED 6545 fmul.x %fp0,%fp2 # Y*(C2+Z*C4) 6546 6547 fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5) 6548 fmul.x X(%a6),%fp0 # X'*Y 6549 6550 fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] 6551 6552 fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)] 6553# ... +[Y*(B2+Z*(B4+Z*B6))]) 6554 fadd.x X(%a6),%fp0 6555 6556 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 6557 6558 fmov.l %d0,%fpcr # restore users rnd mode,prec 6559 tst.b (%a0) 6560 bpl.b pos_big 6561 6562neg_big: 6563 fadd.x NPIBY2(%pc),%fp0 6564 bra t_minx2 6565 6566pos_big: 6567 fadd.x PPIBY2(%pc),%fp0 6568 bra t_pinx2 6569 6570ATANHUGE: 6571#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY 6572 tst.b (%a0) 6573 bpl.b pos_huge 6574 6575neg_huge: 6576 fmov.x NPIBY2(%pc),%fp0 6577 fmov.l %d0,%fpcr 6578 fadd.x PTINY(%pc),%fp0 6579 bra t_minx2 6580 6581pos_huge: 6582 fmov.x PPIBY2(%pc),%fp0 6583 fmov.l %d0,%fpcr 6584 fadd.x NTINY(%pc),%fp0 6585 bra t_pinx2 6586 6587 global satand 6588#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT 6589satand: 6590 bra t_extdnrm 6591 6592######################################################################### 6593# sasin(): computes the inverse sine of a normalized input # 6594# sasind(): computes the inverse sine of a denormalized input # 6595# # 6596# INPUT *************************************************************** # 6597# a0 = pointer to extended precision input # 6598# d0 = round precision,mode # 6599# # 6600# OUTPUT ************************************************************** # 6601# fp0 = arcsin(X) # 6602# # 6603# ACCURACY and MONOTONICITY ******************************************* # 6604# The returned result is within 3 ulps in 64 significant bit, # 6605# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6606# rounded to double precision. The result is provably monotonic # 6607# in double precision. # 6608# # 6609# ALGORITHM *********************************************************** # 6610# # 6611# ASIN # 6612# 1. If |X| >= 1, go to 3. # 6613# # 6614# 2. (|X| < 1) Calculate asin(X) by # 6615# z := sqrt( [1-X][1+X] ) # 6616# asin(X) = atan( x / z ). # 6617# Exit. # 6618# # 6619# 3. If |X| > 1, go to 5. # 6620# # 6621# 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.# 6622# # 6623# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # 6624# Exit. # 6625# # 6626######################################################################### 6627 6628 global sasin 6629sasin: 6630 fmov.x (%a0),%fp0 # LOAD INPUT 6631 6632 mov.l (%a0),%d1 6633 mov.w 4(%a0),%d1 6634 and.l &0x7FFFFFFF,%d1 6635 cmp.l %d1,&0x3FFF8000 6636 bge.b ASINBIG 6637 6638# This catch is added here for the '060 QSP. Originally, the call to 6639# satan() would handle this case by causing the exception which would 6640# not be caught until gen_except(). Now, with the exceptions being 6641# detected inside of satan(), the exception would have been handled there 6642# instead of inside sasin() as expected. 6643 cmp.l %d1,&0x3FD78000 6644 blt.w ASINTINY 6645 6646#--THIS IS THE USUAL CASE, |X| < 1 6647#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) ) 6648 6649ASINMAIN: 6650 fmov.s &0x3F800000,%fp1 6651 fsub.x %fp0,%fp1 # 1-X 6652 fmovm.x &0x4,-(%sp) # {fp2} 6653 fmov.s &0x3F800000,%fp2 6654 fadd.x %fp0,%fp2 # 1+X 6655 fmul.x %fp2,%fp1 # (1+X)(1-X) 6656 fmovm.x (%sp)+,&0x20 # {fp2} 6657 fsqrt.x %fp1 # SQRT([1-X][1+X]) 6658 fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X]) 6659 fmovm.x &0x01,-(%sp) # save X/SQRT(...) 6660 lea (%sp),%a0 # pass ptr to X/SQRT(...) 6661 bsr satan 6662 add.l &0xc,%sp # clear X/SQRT(...) from stack 6663 bra t_inx2 6664 6665ASINBIG: 6666 fabs.x %fp0 # |X| 6667 fcmp.s %fp0,&0x3F800000 6668 fbgt t_operr # cause an operr exception 6669 6670#--|X| = 1, ASIN(X) = +- PI/2. 6671ASINONE: 6672 fmov.x PIBY2(%pc),%fp0 6673 mov.l (%a0),%d1 6674 and.l &0x80000000,%d1 # SIGN BIT OF X 6675 or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT 6676 mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT 6677 fmov.l %d0,%fpcr 6678 fmul.s (%sp)+,%fp0 6679 bra t_inx2 6680 6681#--|X| < 2^(-40), ATAN(X) = X 6682ASINTINY: 6683 fmov.l %d0,%fpcr # restore users rnd mode,prec 6684 mov.b &FMOV_OP,%d1 # last inst is MOVE 6685 fmov.x (%a0),%fp0 # last inst - possible exception 6686 bra t_catch 6687 6688 global sasind 6689#--ASIN(X) = X FOR DENORMALIZED X 6690sasind: 6691 bra t_extdnrm 6692 6693######################################################################### 6694# sacos(): computes the inverse cosine of a normalized input # 6695# sacosd(): computes the inverse cosine of a denormalized input # 6696# # 6697# INPUT *************************************************************** # 6698# a0 = pointer to extended precision input # 6699# d0 = round precision,mode # 6700# # 6701# OUTPUT ************************************************************** # 6702# fp0 = arccos(X) # 6703# # 6704# ACCURACY and MONOTONICITY ******************************************* # 6705# The returned result is within 3 ulps in 64 significant bit, # 6706# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6707# rounded to double precision. The result is provably monotonic # 6708# in double precision. # 6709# # 6710# ALGORITHM *********************************************************** # 6711# # 6712# ACOS # 6713# 1. If |X| >= 1, go to 3. # 6714# # 6715# 2. (|X| < 1) Calculate acos(X) by # 6716# z := (1-X) / (1+X) # 6717# acos(X) = 2 * atan( sqrt(z) ). # 6718# Exit. # 6719# # 6720# 3. If |X| > 1, go to 5. # 6721# # 6722# 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. # 6723# # 6724# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # 6725# Exit. # 6726# # 6727######################################################################### 6728 6729 global sacos 6730sacos: 6731 fmov.x (%a0),%fp0 # LOAD INPUT 6732 6733 mov.l (%a0),%d1 # pack exp w/ upper 16 fraction 6734 mov.w 4(%a0),%d1 6735 and.l &0x7FFFFFFF,%d1 6736 cmp.l %d1,&0x3FFF8000 6737 bge.b ACOSBIG 6738 6739#--THIS IS THE USUAL CASE, |X| < 1 6740#--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) ) 6741 6742ACOSMAIN: 6743 fmov.s &0x3F800000,%fp1 6744 fadd.x %fp0,%fp1 # 1+X 6745 fneg.x %fp0 # -X 6746 fadd.s &0x3F800000,%fp0 # 1-X 6747 fdiv.x %fp1,%fp0 # (1-X)/(1+X) 6748 fsqrt.x %fp0 # SQRT((1-X)/(1+X)) 6749 mov.l %d0,-(%sp) # save original users fpcr 6750 clr.l %d0 6751 fmovm.x &0x01,-(%sp) # save SQRT(...) to stack 6752 lea (%sp),%a0 # pass ptr to sqrt 6753 bsr satan # ATAN(SQRT([1-X]/[1+X])) 6754 add.l &0xc,%sp # clear SQRT(...) from stack 6755 6756 fmov.l (%sp)+,%fpcr # restore users round prec,mode 6757 fadd.x %fp0,%fp0 # 2 * ATAN( STUFF ) 6758 bra t_pinx2 6759 6760ACOSBIG: 6761 fabs.x %fp0 6762 fcmp.s %fp0,&0x3F800000 6763 fbgt t_operr # cause an operr exception 6764 6765#--|X| = 1, ACOS(X) = 0 OR PI 6766 tst.b (%a0) # is X positive or negative? 6767 bpl.b ACOSP1 6768 6769#--X = -1 6770#Returns PI and inexact exception 6771ACOSM1: 6772 fmov.x PI(%pc),%fp0 # load PI 6773 fmov.l %d0,%fpcr # load round mode,prec 6774 fadd.s &0x00800000,%fp0 # add a small value 6775 bra t_pinx2 6776 6777ACOSP1: 6778 bra ld_pzero # answer is positive zero 6779 6780 global sacosd 6781#--ACOS(X) = PI/2 FOR DENORMALIZED X 6782sacosd: 6783 fmov.l %d0,%fpcr # load user's rnd mode/prec 6784 fmov.x PIBY2(%pc),%fp0 6785 bra t_pinx2 6786 6787######################################################################### 6788# setox(): computes the exponential for a normalized input # 6789# setoxd(): computes the exponential for a denormalized input # 6790# setoxm1(): computes the exponential minus 1 for a normalized input # 6791# setoxm1d(): computes the exponential minus 1 for a denormalized input # 6792# # 6793# INPUT *************************************************************** # 6794# a0 = pointer to extended precision input # 6795# d0 = round precision,mode # 6796# # 6797# OUTPUT ************************************************************** # 6798# fp0 = exp(X) or exp(X)-1 # 6799# # 6800# ACCURACY and MONOTONICITY ******************************************* # 6801# The returned result is within 0.85 ulps in 64 significant bit, # 6802# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 6803# rounded to double precision. The result is provably monotonic # 6804# in double precision. # 6805# # 6806# ALGORITHM and IMPLEMENTATION **************************************** # 6807# # 6808# setoxd # 6809# ------ # 6810# Step 1. Set ans := 1.0 # 6811# # 6812# Step 2. Return ans := ans + sign(X)*2^(-126). Exit. # 6813# Notes: This will always generate one exception -- inexact. # 6814# # 6815# # 6816# setox # 6817# ----- # 6818# # 6819# Step 1. Filter out extreme cases of input argument. # 6820# 1.1 If |X| >= 2^(-65), go to Step 1.3. # 6821# 1.2 Go to Step 7. # 6822# 1.3 If |X| < 16380 log(2), go to Step 2. # 6823# 1.4 Go to Step 8. # 6824# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.# 6825# To avoid the use of floating-point comparisons, a # 6826# compact representation of |X| is used. This format is a # 6827# 32-bit integer, the upper (more significant) 16 bits # 6828# are the sign and biased exponent field of |X|; the # 6829# lower 16 bits are the 16 most significant fraction # 6830# (including the explicit bit) bits of |X|. Consequently, # 6831# the comparisons in Steps 1.1 and 1.3 can be performed # 6832# by integer comparison. Note also that the constant # 6833# 16380 log(2) used in Step 1.3 is also in the compact # 6834# form. Thus taking the branch to Step 2 guarantees # 6835# |X| < 16380 log(2). There is no harm to have a small # 6836# number of cases where |X| is less than, but close to, # 6837# 16380 log(2) and the branch to Step 9 is taken. # 6838# # 6839# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). # 6840# 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 # 6841# was taken) # 6842# 2.2 N := round-to-nearest-integer( X * 64/log2 ). # 6843# 2.3 Calculate J = N mod 64; so J = 0,1,2,..., # 6844# or 63. # 6845# 2.4 Calculate M = (N - J)/64; so N = 64M + J. # 6846# 2.5 Calculate the address of the stored value of # 6847# 2^(J/64). # 6848# 2.6 Create the value Scale = 2^M. # 6849# Notes: The calculation in 2.2 is really performed by # 6850# Z := X * constant # 6851# N := round-to-nearest-integer(Z) # 6852# where # 6853# constant := single-precision( 64/log 2 ). # 6854# # 6855# Using a single-precision constant avoids memory # 6856# access. Another effect of using a single-precision # 6857# "constant" is that the calculated value Z is # 6858# # 6859# Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). # 6860# # 6861# This error has to be considered later in Steps 3 and 4. # 6862# # 6863# Step 3. Calculate X - N*log2/64. # 6864# 3.1 R := X + N*L1, # 6865# where L1 := single-precision(-log2/64). # 6866# 3.2 R := R + N*L2, # 6867# L2 := extended-precision(-log2/64 - L1).# 6868# Notes: a) The way L1 and L2 are chosen ensures L1+L2 # 6869# approximate the value -log2/64 to 88 bits of accuracy. # 6870# b) N*L1 is exact because N is no longer than 22 bits # 6871# and L1 is no longer than 24 bits. # 6872# c) The calculation X+N*L1 is also exact due to # 6873# cancellation. Thus, R is practically X+N(L1+L2) to full # 6874# 64 bits. # 6875# d) It is important to estimate how large can |R| be # 6876# after Step 3.2. # 6877# # 6878# N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) # 6879# X*64/log2 (1+eps) = N + f, |f| <= 0.5 # 6880# X*64/log2 - N = f - eps*X 64/log2 # 6881# X - N*log2/64 = f*log2/64 - eps*X # 6882# # 6883# # 6884# Now |X| <= 16446 log2, thus # 6885# # 6886# |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 # 6887# <= 0.57 log2/64. # 6888# This bound will be used in Step 4. # 6889# # 6890# Step 4. Approximate exp(R)-1 by a polynomial # 6891# p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) # 6892# Notes: a) In order to reduce memory access, the coefficients # 6893# are made as "short" as possible: A1 (which is 1/2), A4 # 6894# and A5 are single precision; A2 and A3 are double # 6895# precision. # 6896# b) Even with the restrictions above, # 6897# |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. # 6898# Note that 0.0062 is slightly bigger than 0.57 log2/64. # 6899# c) To fully utilize the pipeline, p is separated into # 6900# two independent pieces of roughly equal complexities # 6901# p = [ R + R*S*(A2 + S*A4) ] + # 6902# [ S*(A1 + S*(A3 + S*A5)) ] # 6903# where S = R*R. # 6904# # 6905# Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by # 6906# ans := T + ( T*p + t) # 6907# where T and t are the stored values for 2^(J/64). # 6908# Notes: 2^(J/64) is stored as T and t where T+t approximates # 6909# 2^(J/64) to roughly 85 bits; T is in extended precision # 6910# and t is in single precision. Note also that T is # 6911# rounded to 62 bits so that the last two bits of T are # 6912# zero. The reason for such a special form is that T-1, # 6913# T-2, and T-8 will all be exact --- a property that will # 6914# give much more accurate computation of the function # 6915# EXPM1. # 6916# # 6917# Step 6. Reconstruction of exp(X) # 6918# exp(X) = 2^M * 2^(J/64) * exp(R). # 6919# 6.1 If AdjFlag = 0, go to 6.3 # 6920# 6.2 ans := ans * AdjScale # 6921# 6.3 Restore the user FPCR # 6922# 6.4 Return ans := ans * Scale. Exit. # 6923# Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, # 6924# |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will # 6925# neither overflow nor underflow. If AdjFlag = 1, that # 6926# means that # 6927# X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. # 6928# Hence, exp(X) may overflow or underflow or neither. # 6929# When that is the case, AdjScale = 2^(M1) where M1 is # 6930# approximately M. Thus 6.2 will never cause # 6931# over/underflow. Possible exception in 6.4 is overflow # 6932# or underflow. The inexact exception is not generated in # 6933# 6.4. Although one can argue that the inexact flag # 6934# should always be raised, to simulate that exception # 6935# cost to much than the flag is worth in practical uses. # 6936# # 6937# Step 7. Return 1 + X. # 6938# 7.1 ans := X # 6939# 7.2 Restore user FPCR. # 6940# 7.3 Return ans := 1 + ans. Exit # 6941# Notes: For non-zero X, the inexact exception will always be # 6942# raised by 7.3. That is the only exception raised by 7.3.# 6943# Note also that we use the FMOVEM instruction to move X # 6944# in Step 7.1 to avoid unnecessary trapping. (Although # 6945# the FMOVEM may not seem relevant since X is normalized, # 6946# the precaution will be useful in the library version of # 6947# this code where the separate entry for denormalized # 6948# inputs will be done away with.) # 6949# # 6950# Step 8. Handle exp(X) where |X| >= 16380log2. # 6951# 8.1 If |X| > 16480 log2, go to Step 9. # 6952# (mimic 2.2 - 2.6) # 6953# 8.2 N := round-to-integer( X * 64/log2 ) # 6954# 8.3 Calculate J = N mod 64, J = 0,1,...,63 # 6955# 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, # 6956# AdjFlag := 1. # 6957# 8.5 Calculate the address of the stored value # 6958# 2^(J/64). # 6959# 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. # 6960# 8.7 Go to Step 3. # 6961# Notes: Refer to notes for 2.2 - 2.6. # 6962# # 6963# Step 9. Handle exp(X), |X| > 16480 log2. # 6964# 9.1 If X < 0, go to 9.3 # 6965# 9.2 ans := Huge, go to 9.4 # 6966# 9.3 ans := Tiny. # 6967# 9.4 Restore user FPCR. # 6968# 9.5 Return ans := ans * ans. Exit. # 6969# Notes: Exp(X) will surely overflow or underflow, depending on # 6970# X's sign. "Huge" and "Tiny" are respectively large/tiny # 6971# extended-precision numbers whose square over/underflow # 6972# with an inexact result. Thus, 9.5 always raises the # 6973# inexact together with either overflow or underflow. # 6974# # 6975# setoxm1d # 6976# -------- # 6977# # 6978# Step 1. Set ans := 0 # 6979# # 6980# Step 2. Return ans := X + ans. Exit. # 6981# Notes: This will return X with the appropriate rounding # 6982# precision prescribed by the user FPCR. # 6983# # 6984# setoxm1 # 6985# ------- # 6986# # 6987# Step 1. Check |X| # 6988# 1.1 If |X| >= 1/4, go to Step 1.3. # 6989# 1.2 Go to Step 7. # 6990# 1.3 If |X| < 70 log(2), go to Step 2. # 6991# 1.4 Go to Step 10. # 6992# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.# 6993# However, it is conceivable |X| can be small very often # 6994# because EXPM1 is intended to evaluate exp(X)-1 # 6995# accurately when |X| is small. For further details on # 6996# the comparisons, see the notes on Step 1 of setox. # 6997# # 6998# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). # 6999# 2.1 N := round-to-nearest-integer( X * 64/log2 ). # 7000# 2.2 Calculate J = N mod 64; so J = 0,1,2,..., # 7001# or 63. # 7002# 2.3 Calculate M = (N - J)/64; so N = 64M + J. # 7003# 2.4 Calculate the address of the stored value of # 7004# 2^(J/64). # 7005# 2.5 Create the values Sc = 2^M and # 7006# OnebySc := -2^(-M). # 7007# Notes: See the notes on Step 2 of setox. # 7008# # 7009# Step 3. Calculate X - N*log2/64. # 7010# 3.1 R := X + N*L1, # 7011# where L1 := single-precision(-log2/64). # 7012# 3.2 R := R + N*L2, # 7013# L2 := extended-precision(-log2/64 - L1).# 7014# Notes: Applying the analysis of Step 3 of setox in this case # 7015# shows that |R| <= 0.0055 (note that |X| <= 70 log2 in # 7016# this case). # 7017# # 7018# Step 4. Approximate exp(R)-1 by a polynomial # 7019# p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) # 7020# Notes: a) In order to reduce memory access, the coefficients # 7021# are made as "short" as possible: A1 (which is 1/2), A5 # 7022# and A6 are single precision; A2, A3 and A4 are double # 7023# precision. # 7024# b) Even with the restriction above, # 7025# |p - (exp(R)-1)| < |R| * 2^(-72.7) # 7026# for all |R| <= 0.0055. # 7027# c) To fully utilize the pipeline, p is separated into # 7028# two independent pieces of roughly equal complexity # 7029# p = [ R*S*(A2 + S*(A4 + S*A6)) ] + # 7030# [ R + S*(A1 + S*(A3 + S*A5)) ] # 7031# where S = R*R. # 7032# # 7033# Step 5. Compute 2^(J/64)*p by # 7034# p := T*p # 7035# where T and t are the stored values for 2^(J/64). # 7036# Notes: 2^(J/64) is stored as T and t where T+t approximates # 7037# 2^(J/64) to roughly 85 bits; T is in extended precision # 7038# and t is in single precision. Note also that T is # 7039# rounded to 62 bits so that the last two bits of T are # 7040# zero. The reason for such a special form is that T-1, # 7041# T-2, and T-8 will all be exact --- a property that will # 7042# be exploited in Step 6 below. The total relative error # 7043# in p is no bigger than 2^(-67.7) compared to the final # 7044# result. # 7045# # 7046# Step 6. Reconstruction of exp(X)-1 # 7047# exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). # 7048# 6.1 If M <= 63, go to Step 6.3. # 7049# 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 # 7050# 6.3 If M >= -3, go to 6.5. # 7051# 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 # 7052# 6.5 ans := (T + OnebySc) + (p + t). # 7053# 6.6 Restore user FPCR. # 7054# 6.7 Return ans := Sc * ans. Exit. # 7055# Notes: The various arrangements of the expressions give # 7056# accurate evaluations. # 7057# # 7058# Step 7. exp(X)-1 for |X| < 1/4. # 7059# 7.1 If |X| >= 2^(-65), go to Step 9. # 7060# 7.2 Go to Step 8. # 7061# # 7062# Step 8. Calculate exp(X)-1, |X| < 2^(-65). # 7063# 8.1 If |X| < 2^(-16312), goto 8.3 # 7064# 8.2 Restore FPCR; return ans := X - 2^(-16382). # 7065# Exit. # 7066# 8.3 X := X * 2^(140). # 7067# 8.4 Restore FPCR; ans := ans - 2^(-16382). # 7068# Return ans := ans*2^(140). Exit # 7069# Notes: The idea is to return "X - tiny" under the user # 7070# precision and rounding modes. To avoid unnecessary # 7071# inefficiency, we stay away from denormalized numbers # 7072# the best we can. For |X| >= 2^(-16312), the # 7073# straightforward 8.2 generates the inexact exception as # 7074# the case warrants. # 7075# # 7076# Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial # 7077# p = X + X*X*(B1 + X*(B2 + ... + X*B12)) # 7078# Notes: a) In order to reduce memory access, the coefficients # 7079# are made as "short" as possible: B1 (which is 1/2), B9 # 7080# to B12 are single precision; B3 to B8 are double # 7081# precision; and B2 is double extended. # 7082# b) Even with the restriction above, # 7083# |p - (exp(X)-1)| < |X| 2^(-70.6) # 7084# for all |X| <= 0.251. # 7085# Note that 0.251 is slightly bigger than 1/4. # 7086# c) To fully preserve accuracy, the polynomial is # 7087# computed as # 7088# X + ( S*B1 + Q ) where S = X*X and # 7089# Q = X*S*(B2 + X*(B3 + ... + X*B12)) # 7090# d) To fully utilize the pipeline, Q is separated into # 7091# two independent pieces of roughly equal complexity # 7092# Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + # 7093# [ S*S*(B3 + S*(B5 + ... + S*B11)) ] # 7094# # 7095# Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. # 7096# 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all # 7097# practical purposes. Therefore, go to Step 1 of setox. # 7098# 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical # 7099# purposes. # 7100# ans := -1 # 7101# Restore user FPCR # 7102# Return ans := ans + 2^(-126). Exit. # 7103# Notes: 10.2 will always create an inexact and return -1 + tiny # 7104# in the user rounding precision and mode. # 7105# # 7106######################################################################### 7107 7108L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000 7109 7110EEXPA3: long 0x3FA55555,0x55554CC1 7111EEXPA2: long 0x3FC55555,0x55554A54 7112 7113EM1A4: long 0x3F811111,0x11174385 7114EM1A3: long 0x3FA55555,0x55554F5A 7115 7116EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000 7117 7118EM1B8: long 0x3EC71DE3,0xA5774682 7119EM1B7: long 0x3EFA01A0,0x19D7CB68 7120 7121EM1B6: long 0x3F2A01A0,0x1A019DF3 7122EM1B5: long 0x3F56C16C,0x16C170E2 7123 7124EM1B4: long 0x3F811111,0x11111111 7125EM1B3: long 0x3FA55555,0x55555555 7126 7127EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB 7128 long 0x00000000 7129 7130TWO140: long 0x48B00000,0x00000000 7131TWON140: 7132 long 0x37300000,0x00000000 7133 7134EEXPTBL: 7135 long 0x3FFF0000,0x80000000,0x00000000,0x00000000 7136 long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B 7137 long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9 7138 long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369 7139 long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C 7140 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F 7141 long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729 7142 long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF 7143 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF 7144 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA 7145 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051 7146 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029 7147 long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494 7148 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0 7149 long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D 7150 long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537 7151 long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD 7152 long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087 7153 long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818 7154 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D 7155 long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890 7156 long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C 7157 long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05 7158 long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126 7159 long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140 7160 long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA 7161 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A 7162 long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC 7163 long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC 7164 long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610 7165 long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90 7166 long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A 7167 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13 7168 long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30 7169 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC 7170 long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6 7171 long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70 7172 long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518 7173 long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41 7174 long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B 7175 long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568 7176 long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E 7177 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03 7178 long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D 7179 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4 7180 long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C 7181 long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9 7182 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21 7183 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F 7184 long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F 7185 long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207 7186 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175 7187 long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B 7188 long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5 7189 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A 7190 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22 7191 long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945 7192 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B 7193 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3 7194 long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05 7195 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19 7196 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5 7197 long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22 7198 long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A 7199 7200 set ADJFLAG,L_SCR2 7201 set SCALE,FP_SCR0 7202 set ADJSCALE,FP_SCR1 7203 set SC,FP_SCR0 7204 set ONEBYSC,FP_SCR1 7205 7206 global setox 7207setox: 7208#--entry point for EXP(X), here X is finite, non-zero, and not NaN's 7209 7210#--Step 1. 7211 mov.l (%a0),%d1 # load part of input X 7212 and.l &0x7FFF0000,%d1 # biased expo. of X 7213 cmp.l %d1,&0x3FBE0000 # 2^(-65) 7214 bge.b EXPC1 # normal case 7215 bra EXPSM 7216 7217EXPC1: 7218#--The case |X| >= 2^(-65) 7219 mov.w 4(%a0),%d1 # expo. and partial sig. of |X| 7220 cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits 7221 blt.b EXPMAIN # normal case 7222 bra EEXPBIG 7223 7224EXPMAIN: 7225#--Step 2. 7226#--This is the normal branch: 2^(-65) <= |X| < 16380 log2. 7227 fmov.x (%a0),%fp0 # load input from (a0) 7228 7229 fmov.x %fp0,%fp1 7230 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X 7231 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7232 mov.l &0,ADJFLAG(%a6) 7233 fmov.l %fp0,%d1 # N = int( X * 64/log2 ) 7234 lea EEXPTBL(%pc),%a1 7235 fmov.l %d1,%fp0 # convert to floating-format 7236 7237 mov.l %d1,L_SCR1(%a6) # save N temporarily 7238 and.l &0x3F,%d1 # D0 is J = N mod 64 7239 lsl.l &4,%d1 7240 add.l %d1,%a1 # address of 2^(J/64) 7241 mov.l L_SCR1(%a6),%d1 7242 asr.l &6,%d1 # D0 is M 7243 add.w &0x3FFF,%d1 # biased expo. of 2^(M) 7244 mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB 7245 7246EXPCONT1: 7247#--Step 3. 7248#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, 7249#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M) 7250 fmov.x %fp0,%fp2 7251 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64) 7252 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64 7253 fadd.x %fp1,%fp0 # X + N*L1 7254 fadd.x %fp2,%fp0 # fp0 is R, reduced arg. 7255 7256#--Step 4. 7257#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL 7258#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) 7259#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R 7260#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))] 7261 7262 fmov.x %fp0,%fp1 7263 fmul.x %fp1,%fp1 # fp1 IS S = R*R 7264 7265 fmov.s &0x3AB60B70,%fp2 # fp2 IS A5 7266 7267 fmul.x %fp1,%fp2 # fp2 IS S*A5 7268 fmov.x %fp1,%fp3 7269 fmul.s &0x3C088895,%fp3 # fp3 IS S*A4 7270 7271 fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5 7272 fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4 7273 7274 fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5) 7275 mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended 7276 mov.l &0x80000000,SCALE+4(%a6) 7277 clr.l SCALE+8(%a6) 7278 7279 fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4) 7280 7281 fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5) 7282 fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4) 7283 7284 fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5)) 7285 fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4), 7286 7287 fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64) 7288 fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1 7289 7290#--Step 5 7291#--final reconstruction process 7292#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) ) 7293 7294 fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1) 7295 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} 7296 fadd.s (%a1),%fp0 # accurate 2^(J/64) 7297 7298 fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*... 7299 mov.l ADJFLAG(%a6),%d1 7300 7301#--Step 6 7302 tst.l %d1 7303 beq.b NORMAL 7304ADJUST: 7305 fmul.x ADJSCALE(%a6),%fp0 7306NORMAL: 7307 fmov.l %d0,%fpcr # restore user FPCR 7308 mov.b &FMUL_OP,%d1 # last inst is MUL 7309 fmul.x SCALE(%a6),%fp0 # multiply 2^(M) 7310 bra t_catch 7311 7312EXPSM: 7313#--Step 7 7314 fmovm.x (%a0),&0x80 # load X 7315 fmov.l %d0,%fpcr 7316 fadd.s &0x3F800000,%fp0 # 1+X in user mode 7317 bra t_pinx2 7318 7319EEXPBIG: 7320#--Step 8 7321 cmp.l %d1,&0x400CB27C # 16480 log2 7322 bgt.b EXP2BIG 7323#--Steps 8.2 -- 8.6 7324 fmov.x (%a0),%fp0 # load input from (a0) 7325 7326 fmov.x %fp0,%fp1 7327 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X 7328 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7329 mov.l &1,ADJFLAG(%a6) 7330 fmov.l %fp0,%d1 # N = int( X * 64/log2 ) 7331 lea EEXPTBL(%pc),%a1 7332 fmov.l %d1,%fp0 # convert to floating-format 7333 mov.l %d1,L_SCR1(%a6) # save N temporarily 7334 and.l &0x3F,%d1 # D0 is J = N mod 64 7335 lsl.l &4,%d1 7336 add.l %d1,%a1 # address of 2^(J/64) 7337 mov.l L_SCR1(%a6),%d1 7338 asr.l &6,%d1 # D0 is K 7339 mov.l %d1,L_SCR1(%a6) # save K temporarily 7340 asr.l &1,%d1 # D0 is M1 7341 sub.l %d1,L_SCR1(%a6) # a1 is M 7342 add.w &0x3FFF,%d1 # biased expo. of 2^(M1) 7343 mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1) 7344 mov.l &0x80000000,ADJSCALE+4(%a6) 7345 clr.l ADJSCALE+8(%a6) 7346 mov.l L_SCR1(%a6),%d1 # D0 is M 7347 add.w &0x3FFF,%d1 # biased expo. of 2^(M) 7348 bra.w EXPCONT1 # go back to Step 3 7349 7350EXP2BIG: 7351#--Step 9 7352 tst.b (%a0) # is X positive or negative? 7353 bmi t_unfl2 7354 bra t_ovfl2 7355 7356 global setoxd 7357setoxd: 7358#--entry point for EXP(X), X is denormalized 7359 mov.l (%a0),-(%sp) 7360 andi.l &0x80000000,(%sp) 7361 ori.l &0x00800000,(%sp) # sign(X)*2^(-126) 7362 7363 fmov.s &0x3F800000,%fp0 7364 7365 fmov.l %d0,%fpcr 7366 fadd.s (%sp)+,%fp0 7367 bra t_pinx2 7368 7369 global setoxm1 7370setoxm1: 7371#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN 7372 7373#--Step 1. 7374#--Step 1.1 7375 mov.l (%a0),%d1 # load part of input X 7376 and.l &0x7FFF0000,%d1 # biased expo. of X 7377 cmp.l %d1,&0x3FFD0000 # 1/4 7378 bge.b EM1CON1 # |X| >= 1/4 7379 bra EM1SM 7380 7381EM1CON1: 7382#--Step 1.3 7383#--The case |X| >= 1/4 7384 mov.w 4(%a0),%d1 # expo. and partial sig. of |X| 7385 cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits 7386 ble.b EM1MAIN # 1/4 <= |X| <= 70log2 7387 bra EM1BIG 7388 7389EM1MAIN: 7390#--Step 2. 7391#--This is the case: 1/4 <= |X| <= 70 log2. 7392 fmov.x (%a0),%fp0 # load input from (a0) 7393 7394 fmov.x %fp0,%fp1 7395 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X 7396 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7397 fmov.l %fp0,%d1 # N = int( X * 64/log2 ) 7398 lea EEXPTBL(%pc),%a1 7399 fmov.l %d1,%fp0 # convert to floating-format 7400 7401 mov.l %d1,L_SCR1(%a6) # save N temporarily 7402 and.l &0x3F,%d1 # D0 is J = N mod 64 7403 lsl.l &4,%d1 7404 add.l %d1,%a1 # address of 2^(J/64) 7405 mov.l L_SCR1(%a6),%d1 7406 asr.l &6,%d1 # D0 is M 7407 mov.l %d1,L_SCR1(%a6) # save a copy of M 7408 7409#--Step 3. 7410#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, 7411#--a0 points to 2^(J/64), D0 and a1 both contain M 7412 fmov.x %fp0,%fp2 7413 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64) 7414 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64 7415 fadd.x %fp1,%fp0 # X + N*L1 7416 fadd.x %fp2,%fp0 # fp0 is R, reduced arg. 7417 add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M 7418 7419#--Step 4. 7420#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL 7421#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6))))) 7422#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R 7423#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))] 7424 7425 fmov.x %fp0,%fp1 7426 fmul.x %fp1,%fp1 # fp1 IS S = R*R 7427 7428 fmov.s &0x3950097B,%fp2 # fp2 IS a6 7429 7430 fmul.x %fp1,%fp2 # fp2 IS S*A6 7431 fmov.x %fp1,%fp3 7432 fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5 7433 7434 fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6 7435 fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5 7436 mov.w %d1,SC(%a6) # SC is 2^(M) in extended 7437 mov.l &0x80000000,SC+4(%a6) 7438 clr.l SC+8(%a6) 7439 7440 fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6) 7441 mov.l L_SCR1(%a6),%d1 # D0 is M 7442 neg.w %d1 # D0 is -M 7443 fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5) 7444 add.w &0x3FFF,%d1 # biased expo. of 2^(-M) 7445 fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6) 7446 fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5) 7447 7448 fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6)) 7449 or.w &0x8000,%d1 # signed/expo. of -2^(-M) 7450 mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M) 7451 mov.l &0x80000000,ONEBYSC+4(%a6) 7452 clr.l ONEBYSC+8(%a6) 7453 fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5)) 7454 7455 fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6)) 7456 fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5)) 7457 7458 fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1 7459 7460 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} 7461 7462#--Step 5 7463#--Compute 2^(J/64)*p 7464 7465 fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1) 7466 7467#--Step 6 7468#--Step 6.1 7469 mov.l L_SCR1(%a6),%d1 # retrieve M 7470 cmp.l %d1,&63 7471 ble.b MLE63 7472#--Step 6.2 M >= 64 7473 fmov.s 12(%a1),%fp1 # fp1 is t 7474 fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc 7475 fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released 7476 fadd.x (%a1),%fp0 # T+(p+(t+OnebySc)) 7477 bra EM1SCALE 7478MLE63: 7479#--Step 6.3 M <= 63 7480 cmp.l %d1,&-3 7481 bge.b MGEN3 7482MLTN3: 7483#--Step 6.4 M <= -4 7484 fadd.s 12(%a1),%fp0 # p+t 7485 fadd.x (%a1),%fp0 # T+(p+t) 7486 fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t)) 7487 bra EM1SCALE 7488MGEN3: 7489#--Step 6.5 -3 <= M <= 63 7490 fmov.x (%a1)+,%fp1 # fp1 is T 7491 fadd.s (%a1),%fp0 # fp0 is p+t 7492 fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc 7493 fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t) 7494 7495EM1SCALE: 7496#--Step 6.6 7497 fmov.l %d0,%fpcr 7498 fmul.x SC(%a6),%fp0 7499 bra t_inx2 7500 7501EM1SM: 7502#--Step 7 |X| < 1/4. 7503 cmp.l %d1,&0x3FBE0000 # 2^(-65) 7504 bge.b EM1POLY 7505 7506EM1TINY: 7507#--Step 8 |X| < 2^(-65) 7508 cmp.l %d1,&0x00330000 # 2^(-16312) 7509 blt.b EM12TINY 7510#--Step 8.2 7511 mov.l &0x80010000,SC(%a6) # SC is -2^(-16382) 7512 mov.l &0x80000000,SC+4(%a6) 7513 clr.l SC+8(%a6) 7514 fmov.x (%a0),%fp0 7515 fmov.l %d0,%fpcr 7516 mov.b &FADD_OP,%d1 # last inst is ADD 7517 fadd.x SC(%a6),%fp0 7518 bra t_catch 7519 7520EM12TINY: 7521#--Step 8.3 7522 fmov.x (%a0),%fp0 7523 fmul.d TWO140(%pc),%fp0 7524 mov.l &0x80010000,SC(%a6) 7525 mov.l &0x80000000,SC+4(%a6) 7526 clr.l SC+8(%a6) 7527 fadd.x SC(%a6),%fp0 7528 fmov.l %d0,%fpcr 7529 mov.b &FMUL_OP,%d1 # last inst is MUL 7530 fmul.d TWON140(%pc),%fp0 7531 bra t_catch 7532 7533EM1POLY: 7534#--Step 9 exp(X)-1 by a simple polynomial 7535 fmov.x (%a0),%fp0 # fp0 is X 7536 fmul.x %fp0,%fp0 # fp0 is S := X*X 7537 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} 7538 fmov.s &0x2F30CAA8,%fp1 # fp1 is B12 7539 fmul.x %fp0,%fp1 # fp1 is S*B12 7540 fmov.s &0x310F8290,%fp2 # fp2 is B11 7541 fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12 7542 7543 fmul.x %fp0,%fp2 # fp2 is S*B11 7544 fmul.x %fp0,%fp1 # fp1 is S*(B10 + ... 7545 7546 fadd.s &0x3493F281,%fp2 # fp2 is B9+S*... 7547 fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*... 7548 7549 fmul.x %fp0,%fp2 # fp2 is S*(B9+... 7550 fmul.x %fp0,%fp1 # fp1 is S*(B8+... 7551 7552 fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*... 7553 fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*... 7554 7555 fmul.x %fp0,%fp2 # fp2 is S*(B7+... 7556 fmul.x %fp0,%fp1 # fp1 is S*(B6+... 7557 7558 fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*... 7559 fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*... 7560 7561 fmul.x %fp0,%fp2 # fp2 is S*(B5+... 7562 fmul.x %fp0,%fp1 # fp1 is S*(B4+... 7563 7564 fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*... 7565 fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*... 7566 7567 fmul.x %fp0,%fp2 # fp2 is S*(B3+... 7568 fmul.x %fp0,%fp1 # fp1 is S*(B2+... 7569 7570 fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...) 7571 fmul.x (%a0),%fp1 # fp1 is X*S*(B2... 7572 7573 fmul.s &0x3F000000,%fp0 # fp0 is S*B1 7574 fadd.x %fp2,%fp1 # fp1 is Q 7575 7576 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} 7577 7578 fadd.x %fp1,%fp0 # fp0 is S*B1+Q 7579 7580 fmov.l %d0,%fpcr 7581 fadd.x (%a0),%fp0 7582 bra t_inx2 7583 7584EM1BIG: 7585#--Step 10 |X| > 70 log2 7586 mov.l (%a0),%d1 7587 cmp.l %d1,&0 7588 bgt.w EXPC1 7589#--Step 10.2 7590 fmov.s &0xBF800000,%fp0 # fp0 is -1 7591 fmov.l %d0,%fpcr 7592 fadd.s &0x00800000,%fp0 # -1 + 2^(-126) 7593 bra t_minx2 7594 7595 global setoxm1d 7596setoxm1d: 7597#--entry point for EXPM1(X), here X is denormalized 7598#--Step 0. 7599 bra t_extdnrm 7600 7601######################################################################### 7602# sgetexp(): returns the exponent portion of the input argument. # 7603# The exponent bias is removed and the exponent value is # 7604# returned as an extended precision number in fp0. # 7605# sgetexpd(): handles denormalized numbers. # 7606# # 7607# sgetman(): extracts the mantissa of the input argument. The # 7608# mantissa is converted to an extended precision number w/ # 7609# an exponent of $3fff and is returned in fp0. The range of # 7610# the result is [1.0 - 2.0). # 7611# sgetmand(): handles denormalized numbers. # 7612# # 7613# INPUT *************************************************************** # 7614# a0 = pointer to extended precision input # 7615# # 7616# OUTPUT ************************************************************** # 7617# fp0 = exponent(X) or mantissa(X) # 7618# # 7619######################################################################### 7620 7621 global sgetexp 7622sgetexp: 7623 mov.w SRC_EX(%a0),%d0 # get the exponent 7624 bclr &0xf,%d0 # clear the sign bit 7625 subi.w &0x3fff,%d0 # subtract off the bias 7626 fmov.w %d0,%fp0 # return exp in fp0 7627 blt.b sgetexpn # it's negative 7628 rts 7629 7630sgetexpn: 7631 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 7632 rts 7633 7634 global sgetexpd 7635sgetexpd: 7636 bsr.l norm # normalize 7637 neg.w %d0 # new exp = -(shft amt) 7638 subi.w &0x3fff,%d0 # subtract off the bias 7639 fmov.w %d0,%fp0 # return exp in fp0 7640 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 7641 rts 7642 7643 global sgetman 7644sgetman: 7645 mov.w SRC_EX(%a0),%d0 # get the exp 7646 ori.w &0x7fff,%d0 # clear old exp 7647 bclr &0xe,%d0 # make it the new exp +-3fff 7648 7649# here, we build the result in a tmp location so as not to disturb the input 7650 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc 7651 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc 7652 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 7653 fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0 7654 bmi.b sgetmann # it's negative 7655 rts 7656 7657sgetmann: 7658 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 7659 rts 7660 7661# 7662# For denormalized numbers, shift the mantissa until the j-bit = 1, 7663# then load the exponent with +/1 $3fff. 7664# 7665 global sgetmand 7666sgetmand: 7667 bsr.l norm # normalize exponent 7668 bra.b sgetman 7669 7670######################################################################### 7671# scosh(): computes the hyperbolic cosine of a normalized input # 7672# scoshd(): computes the hyperbolic cosine of a denormalized input # 7673# # 7674# INPUT *************************************************************** # 7675# a0 = pointer to extended precision input # 7676# d0 = round precision,mode # 7677# # 7678# OUTPUT ************************************************************** # 7679# fp0 = cosh(X) # 7680# # 7681# ACCURACY and MONOTONICITY ******************************************* # 7682# The returned result is within 3 ulps in 64 significant bit, # 7683# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 7684# rounded to double precision. The result is provably monotonic # 7685# in double precision. # 7686# # 7687# ALGORITHM *********************************************************** # 7688# # 7689# COSH # 7690# 1. If |X| > 16380 log2, go to 3. # 7691# # 7692# 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae # 7693# y = |X|, z = exp(Y), and # 7694# cosh(X) = (1/2)*( z + 1/z ). # 7695# Exit. # 7696# # 7697# 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. # 7698# # 7699# 4. (16380 log2 < |X| <= 16480 log2) # 7700# cosh(X) = sign(X) * exp(|X|)/2. # 7701# However, invoking exp(|X|) may cause premature # 7702# overflow. Thus, we calculate sinh(X) as follows: # 7703# Y := |X| # 7704# Fact := 2**(16380) # 7705# Y' := Y - 16381 log2 # 7706# cosh(X) := Fact * exp(Y'). # 7707# Exit. # 7708# # 7709# 5. (|X| > 16480 log2) sinh(X) must overflow. Return # 7710# Huge*Huge to generate overflow and an infinity with # 7711# the appropriate sign. Huge is the largest finite number # 7712# in extended format. Exit. # 7713# # 7714######################################################################### 7715 7716TWO16380: 7717 long 0x7FFB0000,0x80000000,0x00000000,0x00000000 7718 7719 global scosh 7720scosh: 7721 fmov.x (%a0),%fp0 # LOAD INPUT 7722 7723 mov.l (%a0),%d1 7724 mov.w 4(%a0),%d1 7725 and.l &0x7FFFFFFF,%d1 7726 cmp.l %d1,&0x400CB167 7727 bgt.b COSHBIG 7728 7729#--THIS IS THE USUAL CASE, |X| < 16380 LOG2 7730#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) ) 7731 7732 fabs.x %fp0 # |X| 7733 7734 mov.l %d0,-(%sp) 7735 clr.l %d0 7736 fmovm.x &0x01,-(%sp) # save |X| to stack 7737 lea (%sp),%a0 # pass ptr to |X| 7738 bsr setox # FP0 IS EXP(|X|) 7739 add.l &0xc,%sp # erase |X| from stack 7740 fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|) 7741 mov.l (%sp)+,%d0 7742 7743 fmov.s &0x3E800000,%fp1 # (1/4) 7744 fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|)) 7745 7746 fmov.l %d0,%fpcr 7747 mov.b &FADD_OP,%d1 # last inst is ADD 7748 fadd.x %fp1,%fp0 7749 bra t_catch 7750 7751COSHBIG: 7752 cmp.l %d1,&0x400CB2B3 7753 bgt.b COSHHUGE 7754 7755 fabs.x %fp0 7756 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD) 7757 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE 7758 7759 mov.l %d0,-(%sp) 7760 clr.l %d0 7761 fmovm.x &0x01,-(%sp) # save fp0 to stack 7762 lea (%sp),%a0 # pass ptr to fp0 7763 bsr setox 7764 add.l &0xc,%sp # clear fp0 from stack 7765 mov.l (%sp)+,%d0 7766 7767 fmov.l %d0,%fpcr 7768 mov.b &FMUL_OP,%d1 # last inst is MUL 7769 fmul.x TWO16380(%pc),%fp0 7770 bra t_catch 7771 7772COSHHUGE: 7773 bra t_ovfl2 7774 7775 global scoshd 7776#--COSH(X) = 1 FOR DENORMALIZED X 7777scoshd: 7778 fmov.s &0x3F800000,%fp0 7779 7780 fmov.l %d0,%fpcr 7781 fadd.s &0x00800000,%fp0 7782 bra t_pinx2 7783 7784######################################################################### 7785# ssinh(): computes the hyperbolic sine of a normalized input # 7786# ssinhd(): computes the hyperbolic sine of a denormalized input # 7787# # 7788# INPUT *************************************************************** # 7789# a0 = pointer to extended precision input # 7790# d0 = round precision,mode # 7791# # 7792# OUTPUT ************************************************************** # 7793# fp0 = sinh(X) # 7794# # 7795# ACCURACY and MONOTONICITY ******************************************* # 7796# The returned result is within 3 ulps in 64 significant bit, # 7797# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 7798# rounded to double precision. The result is provably monotonic # 7799# in double precision. # 7800# # 7801# ALGORITHM *********************************************************** # 7802# # 7803# SINH # 7804# 1. If |X| > 16380 log2, go to 3. # 7805# # 7806# 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula # 7807# y = |X|, sgn = sign(X), and z = expm1(Y), # 7808# sinh(X) = sgn*(1/2)*( z + z/(1+z) ). # 7809# Exit. # 7810# # 7811# 3. If |X| > 16480 log2, go to 5. # 7812# # 7813# 4. (16380 log2 < |X| <= 16480 log2) # 7814# sinh(X) = sign(X) * exp(|X|)/2. # 7815# However, invoking exp(|X|) may cause premature overflow. # 7816# Thus, we calculate sinh(X) as follows: # 7817# Y := |X| # 7818# sgn := sign(X) # 7819# sgnFact := sgn * 2**(16380) # 7820# Y' := Y - 16381 log2 # 7821# sinh(X) := sgnFact * exp(Y'). # 7822# Exit. # 7823# # 7824# 5. (|X| > 16480 log2) sinh(X) must overflow. Return # 7825# sign(X)*Huge*Huge to generate overflow and an infinity with # 7826# the appropriate sign. Huge is the largest finite number in # 7827# extended format. Exit. # 7828# # 7829######################################################################### 7830 7831 global ssinh 7832ssinh: 7833 fmov.x (%a0),%fp0 # LOAD INPUT 7834 7835 mov.l (%a0),%d1 7836 mov.w 4(%a0),%d1 7837 mov.l %d1,%a1 # save (compacted) operand 7838 and.l &0x7FFFFFFF,%d1 7839 cmp.l %d1,&0x400CB167 7840 bgt.b SINHBIG 7841 7842#--THIS IS THE USUAL CASE, |X| < 16380 LOG2 7843#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) ) 7844 7845 fabs.x %fp0 # Y = |X| 7846 7847 movm.l &0x8040,-(%sp) # {a1/d0} 7848 fmovm.x &0x01,-(%sp) # save Y on stack 7849 lea (%sp),%a0 # pass ptr to Y 7850 clr.l %d0 7851 bsr setoxm1 # FP0 IS Z = EXPM1(Y) 7852 add.l &0xc,%sp # clear Y from stack 7853 fmov.l &0,%fpcr 7854 movm.l (%sp)+,&0x0201 # {a1/d0} 7855 7856 fmov.x %fp0,%fp1 7857 fadd.s &0x3F800000,%fp1 # 1+Z 7858 fmov.x %fp0,-(%sp) 7859 fdiv.x %fp1,%fp0 # Z/(1+Z) 7860 mov.l %a1,%d1 7861 and.l &0x80000000,%d1 7862 or.l &0x3F000000,%d1 7863 fadd.x (%sp)+,%fp0 7864 mov.l %d1,-(%sp) 7865 7866 fmov.l %d0,%fpcr 7867 mov.b &FMUL_OP,%d1 # last inst is MUL 7868 fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set 7869 bra t_catch 7870 7871SINHBIG: 7872 cmp.l %d1,&0x400CB2B3 7873 bgt t_ovfl 7874 fabs.x %fp0 7875 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD) 7876 mov.l &0,-(%sp) 7877 mov.l &0x80000000,-(%sp) 7878 mov.l %a1,%d1 7879 and.l &0x80000000,%d1 7880 or.l &0x7FFB0000,%d1 7881 mov.l %d1,-(%sp) # EXTENDED FMT 7882 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE 7883 7884 mov.l %d0,-(%sp) 7885 clr.l %d0 7886 fmovm.x &0x01,-(%sp) # save fp0 on stack 7887 lea (%sp),%a0 # pass ptr to fp0 7888 bsr setox 7889 add.l &0xc,%sp # clear fp0 from stack 7890 7891 mov.l (%sp)+,%d0 7892 fmov.l %d0,%fpcr 7893 mov.b &FMUL_OP,%d1 # last inst is MUL 7894 fmul.x (%sp)+,%fp0 # possible exception 7895 bra t_catch 7896 7897 global ssinhd 7898#--SINH(X) = X FOR DENORMALIZED X 7899ssinhd: 7900 bra t_extdnrm 7901 7902######################################################################### 7903# stanh(): computes the hyperbolic tangent of a normalized input # 7904# stanhd(): computes the hyperbolic tangent of a denormalized input # 7905# # 7906# INPUT *************************************************************** # 7907# a0 = pointer to extended precision input # 7908# d0 = round precision,mode # 7909# # 7910# OUTPUT ************************************************************** # 7911# fp0 = tanh(X) # 7912# # 7913# ACCURACY and MONOTONICITY ******************************************* # 7914# The returned result is within 3 ulps in 64 significant bit, # 7915# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 7916# rounded to double precision. The result is provably monotonic # 7917# in double precision. # 7918# # 7919# ALGORITHM *********************************************************** # 7920# # 7921# TANH # 7922# 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. # 7923# # 7924# 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by # 7925# sgn := sign(X), y := 2|X|, z := expm1(Y), and # 7926# tanh(X) = sgn*( z/(2+z) ). # 7927# Exit. # 7928# # 7929# 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, # 7930# go to 7. # 7931# # 7932# 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. # 7933# # 7934# 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by # 7935# sgn := sign(X), y := 2|X|, z := exp(Y), # 7936# tanh(X) = sgn - [ sgn*2/(1+z) ]. # 7937# Exit. # 7938# # 7939# 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we # 7940# calculate Tanh(X) by # 7941# sgn := sign(X), Tiny := 2**(-126), # 7942# tanh(X) := sgn - sgn*Tiny. # 7943# Exit. # 7944# # 7945# 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. # 7946# # 7947######################################################################### 7948 7949 set X,FP_SCR0 7950 set XFRAC,X+4 7951 7952 set SGN,L_SCR3 7953 7954 set V,FP_SCR0 7955 7956 global stanh 7957stanh: 7958 fmov.x (%a0),%fp0 # LOAD INPUT 7959 7960 fmov.x %fp0,X(%a6) 7961 mov.l (%a0),%d1 7962 mov.w 4(%a0),%d1 7963 mov.l %d1,X(%a6) 7964 and.l &0x7FFFFFFF,%d1 7965 cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)? 7966 blt.w TANHBORS # yes 7967 cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2? 7968 bgt.w TANHBORS # yes 7969 7970#--THIS IS THE USUAL CASE 7971#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2). 7972 7973 mov.l X(%a6),%d1 7974 mov.l %d1,SGN(%a6) 7975 and.l &0x7FFF0000,%d1 7976 add.l &0x00010000,%d1 # EXPONENT OF 2|X| 7977 mov.l %d1,X(%a6) 7978 and.l &0x80000000,SGN(%a6) 7979 fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X| 7980 7981 mov.l %d0,-(%sp) 7982 clr.l %d0 7983 fmovm.x &0x1,-(%sp) # save Y on stack 7984 lea (%sp),%a0 # pass ptr to Y 7985 bsr setoxm1 # FP0 IS Z = EXPM1(Y) 7986 add.l &0xc,%sp # clear Y from stack 7987 mov.l (%sp)+,%d0 7988 7989 fmov.x %fp0,%fp1 7990 fadd.s &0x40000000,%fp1 # Z+2 7991 mov.l SGN(%a6),%d1 7992 fmov.x %fp1,V(%a6) 7993 eor.l %d1,V(%a6) 7994 7995 fmov.l %d0,%fpcr # restore users round prec,mode 7996 fdiv.x V(%a6),%fp0 7997 bra t_inx2 7998 7999TANHBORS: 8000 cmp.l %d1,&0x3FFF8000 8001 blt.w TANHSM 8002 8003 cmp.l %d1,&0x40048AA1 8004 bgt.w TANHHUGE 8005 8006#-- (5/2) LOG2 < |X| < 50 LOG2, 8007#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X), 8008#--TANH(X) = SGN - SGN*2/[EXP(Y)+1]. 8009 8010 mov.l X(%a6),%d1 8011 mov.l %d1,SGN(%a6) 8012 and.l &0x7FFF0000,%d1 8013 add.l &0x00010000,%d1 # EXPO OF 2|X| 8014 mov.l %d1,X(%a6) # Y = 2|X| 8015 and.l &0x80000000,SGN(%a6) 8016 mov.l SGN(%a6),%d1 8017 fmov.x X(%a6),%fp0 # Y = 2|X| 8018 8019 mov.l %d0,-(%sp) 8020 clr.l %d0 8021 fmovm.x &0x01,-(%sp) # save Y on stack 8022 lea (%sp),%a0 # pass ptr to Y 8023 bsr setox # FP0 IS EXP(Y) 8024 add.l &0xc,%sp # clear Y from stack 8025 mov.l (%sp)+,%d0 8026 mov.l SGN(%a6),%d1 8027 fadd.s &0x3F800000,%fp0 # EXP(Y)+1 8028 8029 eor.l &0xC0000000,%d1 # -SIGN(X)*2 8030 fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT 8031 fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ] 8032 8033 mov.l SGN(%a6),%d1 8034 or.l &0x3F800000,%d1 # SGN 8035 fmov.s %d1,%fp0 # SGN IN SGL FMT 8036 8037 fmov.l %d0,%fpcr # restore users round prec,mode 8038 mov.b &FADD_OP,%d1 # last inst is ADD 8039 fadd.x %fp1,%fp0 8040 bra t_inx2 8041 8042TANHSM: 8043 fmov.l %d0,%fpcr # restore users round prec,mode 8044 mov.b &FMOV_OP,%d1 # last inst is MOVE 8045 fmov.x X(%a6),%fp0 # last inst - possible exception set 8046 bra t_catch 8047 8048#---RETURN SGN(X) - SGN(X)EPS 8049TANHHUGE: 8050 mov.l X(%a6),%d1 8051 and.l &0x80000000,%d1 8052 or.l &0x3F800000,%d1 8053 fmov.s %d1,%fp0 8054 and.l &0x80000000,%d1 8055 eor.l &0x80800000,%d1 # -SIGN(X)*EPS 8056 8057 fmov.l %d0,%fpcr # restore users round prec,mode 8058 fadd.s %d1,%fp0 8059 bra t_inx2 8060 8061 global stanhd 8062#--TANH(X) = X FOR DENORMALIZED X 8063stanhd: 8064 bra t_extdnrm 8065 8066######################################################################### 8067# slogn(): computes the natural logarithm of a normalized input # 8068# slognd(): computes the natural logarithm of a denormalized input # 8069# slognp1(): computes the log(1+X) of a normalized input # 8070# slognp1d(): computes the log(1+X) of a denormalized input # 8071# # 8072# INPUT *************************************************************** # 8073# a0 = pointer to extended precision input # 8074# d0 = round precision,mode # 8075# # 8076# OUTPUT ************************************************************** # 8077# fp0 = log(X) or log(1+X) # 8078# # 8079# ACCURACY and MONOTONICITY ******************************************* # 8080# The returned result is within 2 ulps in 64 significant bit, # 8081# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 8082# rounded to double precision. The result is provably monotonic # 8083# in double precision. # 8084# # 8085# ALGORITHM *********************************************************** # 8086# LOGN: # 8087# Step 1. If |X-1| < 1/16, approximate log(X) by an odd # 8088# polynomial in u, where u = 2(X-1)/(X+1). Otherwise, # 8089# move on to Step 2. # 8090# # 8091# Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first # 8092# seven significant bits of Y plus 2**(-7), i.e. # 8093# F = 1.xxxxxx1 in base 2 where the six "x" match those # 8094# of Y. Note that |Y-F| <= 2**(-7). # 8095# # 8096# Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a # 8097# polynomial in u, log(1+u) = poly. # 8098# # 8099# Step 4. Reconstruct # 8100# log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) # 8101# by k*log(2) + (log(F) + poly). The values of log(F) are # 8102# calculated beforehand and stored in the program. # 8103# # 8104# lognp1: # 8105# Step 1: If |X| < 1/16, approximate log(1+X) by an odd # 8106# polynomial in u where u = 2X/(2+X). Otherwise, move on # 8107# to Step 2. # 8108# # 8109# Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done # 8110# in Step 2 of the algorithm for LOGN and compute # 8111# log(1+X) as k*log(2) + log(F) + poly where poly # 8112# approximates log(1+u), u = (Y-F)/F. # 8113# # 8114# Implementation Notes: # 8115# Note 1. There are 64 different possible values for F, thus 64 # 8116# log(F)'s need to be tabulated. Moreover, the values of # 8117# 1/F are also tabulated so that the division in (Y-F)/F # 8118# can be performed by a multiplication. # 8119# # 8120# Note 2. In Step 2 of lognp1, in order to preserved accuracy, # 8121# the value Y-F has to be calculated carefully when # 8122# 1/2 <= X < 3/2. # 8123# # 8124# Note 3. To fully exploit the pipeline, polynomials are usually # 8125# separated into two parts evaluated independently before # 8126# being added up. # 8127# # 8128######################################################################### 8129LOGOF2: 8130 long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 8131 8132one: 8133 long 0x3F800000 8134zero: 8135 long 0x00000000 8136infty: 8137 long 0x7F800000 8138negone: 8139 long 0xBF800000 8140 8141LOGA6: 8142 long 0x3FC2499A,0xB5E4040B 8143LOGA5: 8144 long 0xBFC555B5,0x848CB7DB 8145 8146LOGA4: 8147 long 0x3FC99999,0x987D8730 8148LOGA3: 8149 long 0xBFCFFFFF,0xFF6F7E97 8150 8151LOGA2: 8152 long 0x3FD55555,0x555555A4 8153LOGA1: 8154 long 0xBFE00000,0x00000008 8155 8156LOGB5: 8157 long 0x3F175496,0xADD7DAD6 8158LOGB4: 8159 long 0x3F3C71C2,0xFE80C7E0 8160 8161LOGB3: 8162 long 0x3F624924,0x928BCCFF 8163LOGB2: 8164 long 0x3F899999,0x999995EC 8165 8166LOGB1: 8167 long 0x3FB55555,0x55555555 8168TWO: 8169 long 0x40000000,0x00000000 8170 8171LTHOLD: 8172 long 0x3f990000,0x80000000,0x00000000,0x00000000 8173 8174LOGTBL: 8175 long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000 8176 long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000 8177 long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000 8178 long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000 8179 long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000 8180 long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000 8181 long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000 8182 long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000 8183 long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000 8184 long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000 8185 long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000 8186 long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000 8187 long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000 8188 long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000 8189 long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000 8190 long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000 8191 long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000 8192 long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000 8193 long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000 8194 long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000 8195 long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000 8196 long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000 8197 long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000 8198 long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000 8199 long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000 8200 long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000 8201 long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000 8202 long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000 8203 long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000 8204 long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000 8205 long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000 8206 long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000 8207 long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000 8208 long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000 8209 long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000 8210 long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000 8211 long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000 8212 long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000 8213 long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000 8214 long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000 8215 long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000 8216 long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000 8217 long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000 8218 long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000 8219 long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000 8220 long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000 8221 long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000 8222 long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000 8223 long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000 8224 long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000 8225 long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000 8226 long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000 8227 long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000 8228 long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000 8229 long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000 8230 long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000 8231 long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000 8232 long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000 8233 long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000 8234 long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000 8235 long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000 8236 long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000 8237 long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000 8238 long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000 8239 long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000 8240 long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000 8241 long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000 8242 long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000 8243 long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000 8244 long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000 8245 long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000 8246 long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000 8247 long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000 8248 long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000 8249 long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000 8250 long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000 8251 long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000 8252 long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000 8253 long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000 8254 long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000 8255 long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000 8256 long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000 8257 long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000 8258 long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000 8259 long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000 8260 long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000 8261 long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000 8262 long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000 8263 long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000 8264 long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000 8265 long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000 8266 long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000 8267 long 0x3FFE0000,0x94458094,0x45809446,0x00000000 8268 long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000 8269 long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000 8270 long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000 8271 long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000 8272 long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000 8273 long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000 8274 long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000 8275 long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000 8276 long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000 8277 long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000 8278 long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000 8279 long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000 8280 long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000 8281 long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000 8282 long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000 8283 long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000 8284 long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000 8285 long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000 8286 long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000 8287 long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000 8288 long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000 8289 long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000 8290 long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000 8291 long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000 8292 long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000 8293 long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000 8294 long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000 8295 long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000 8296 long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000 8297 long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000 8298 long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000 8299 long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000 8300 long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000 8301 long 0x3FFE0000,0x80808080,0x80808081,0x00000000 8302 long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000 8303 8304 set ADJK,L_SCR1 8305 8306 set X,FP_SCR0 8307 set XDCARE,X+2 8308 set XFRAC,X+4 8309 8310 set F,FP_SCR1 8311 set FFRAC,F+4 8312 8313 set KLOG2,FP_SCR0 8314 8315 set SAVEU,FP_SCR0 8316 8317 global slogn 8318#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S 8319slogn: 8320 fmov.x (%a0),%fp0 # LOAD INPUT 8321 mov.l &0x00000000,ADJK(%a6) 8322 8323LOGBGN: 8324#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS 8325#--A FINITE, NON-ZERO, NORMALIZED NUMBER. 8326 8327 mov.l (%a0),%d1 8328 mov.w 4(%a0),%d1 8329 8330 mov.l (%a0),X(%a6) 8331 mov.l 4(%a0),X+4(%a6) 8332 mov.l 8(%a0),X+8(%a6) 8333 8334 cmp.l %d1,&0 # CHECK IF X IS NEGATIVE 8335 blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID 8336# X IS POSITIVE, CHECK IF X IS NEAR 1 8337 cmp.l %d1,&0x3ffef07d # IS X < 15/16? 8338 blt.b LOGMAIN # YES 8339 cmp.l %d1,&0x3fff8841 # IS X > 17/16? 8340 ble.w LOGNEAR1 # NO 8341 8342LOGMAIN: 8343#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 8344 8345#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. 8346#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. 8347#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) 8348#-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). 8349#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING 8350#--LOG(1+U) CAN BE VERY EFFICIENT. 8351#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO 8352#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. 8353 8354#--GET K, Y, F, AND ADDRESS OF 1/F. 8355 asr.l &8,%d1 8356 asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X 8357 sub.l &0x3FFF,%d1 # THIS IS K 8358 add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM. 8359 lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F) 8360 fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT 8361 8362#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F 8363 mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X 8364 mov.l XFRAC(%a6),FFRAC(%a6) 8365 and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y 8366 or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT 8367 mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F 8368 and.l &0x7E000000,%d1 8369 asr.l &8,%d1 8370 asr.l &8,%d1 8371 asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT 8372 add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F 8373 8374 fmov.x X(%a6),%fp0 8375 mov.l &0x3fff0000,F(%a6) 8376 clr.l F+8(%a6) 8377 fsub.x F(%a6),%fp0 # Y-F 8378 fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY 8379#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K 8380#--REGISTERS SAVED: FPCR, FP1, FP2 8381 8382LP1CONT1: 8383#--AN RE-ENTRY POINT FOR LOGNP1 8384 fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F 8385 fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY 8386 fmov.x %fp0,%fp2 8387 fmul.x %fp2,%fp2 # FP2 IS V=U*U 8388 fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP1 8389 8390#--LOG(1+U) IS APPROXIMATED BY 8391#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS 8392#--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] 8393 8394 fmov.x %fp2,%fp3 8395 fmov.x %fp2,%fp1 8396 8397 fmul.d LOGA6(%pc),%fp1 # V*A6 8398 fmul.d LOGA5(%pc),%fp2 # V*A5 8399 8400 fadd.d LOGA4(%pc),%fp1 # A4+V*A6 8401 fadd.d LOGA3(%pc),%fp2 # A3+V*A5 8402 8403 fmul.x %fp3,%fp1 # V*(A4+V*A6) 8404 fmul.x %fp3,%fp2 # V*(A3+V*A5) 8405 8406 fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6) 8407 fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5) 8408 8409 fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6)) 8410 add.l &16,%a0 # ADDRESS OF LOG(F) 8411 fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5)) 8412 8413 fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6)) 8414 fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5)) 8415 8416 fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6)) 8417 fmovm.x (%sp)+,&0x30 # RESTORE FP2-3 8418 fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U) 8419 8420 fmov.l %d0,%fpcr 8421 fadd.x KLOG2(%a6),%fp0 # FINAL ADD 8422 bra t_inx2 8423 8424 8425LOGNEAR1: 8426 8427# if the input is exactly equal to one, then exit through ld_pzero. 8428# if these 2 lines weren't here, the correct answer would be returned 8429# but the INEX2 bit would be set. 8430 fcmp.b %fp0,&0x1 # is it equal to one? 8431 fbeq.l ld_pzero # yes 8432 8433#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. 8434 fmov.x %fp0,%fp1 8435 fsub.s one(%pc),%fp1 # FP1 IS X-1 8436 fadd.s one(%pc),%fp0 # FP0 IS X+1 8437 fadd.x %fp1,%fp1 # FP1 IS 2(X-1) 8438#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL 8439#--IN U, U = 2(X-1)/(X+1) = FP1/FP0 8440 8441LP1CONT2: 8442#--THIS IS AN RE-ENTRY POINT FOR LOGNP1 8443 fdiv.x %fp0,%fp1 # FP1 IS U 8444 fmovm.x &0xc,-(%sp) # SAVE FP2-3 8445#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 8446#--LET V=U*U, W=V*V, CALCULATE 8447#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY 8448#--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) 8449 fmov.x %fp1,%fp0 8450 fmul.x %fp0,%fp0 # FP0 IS V 8451 fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1 8452 fmov.x %fp0,%fp1 8453 fmul.x %fp1,%fp1 # FP1 IS W 8454 8455 fmov.d LOGB5(%pc),%fp3 8456 fmov.d LOGB4(%pc),%fp2 8457 8458 fmul.x %fp1,%fp3 # W*B5 8459 fmul.x %fp1,%fp2 # W*B4 8460 8461 fadd.d LOGB3(%pc),%fp3 # B3+W*B5 8462 fadd.d LOGB2(%pc),%fp2 # B2+W*B4 8463 8464 fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED 8465 8466 fmul.x %fp0,%fp2 # V*(B2+W*B4) 8467 8468 fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5) 8469 fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V 8470 8471 fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED 8472 fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED 8473 8474 fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) 8475 8476 fmov.l %d0,%fpcr 8477 fadd.x SAVEU(%a6),%fp0 8478 bra t_inx2 8479 8480#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID 8481LOGNEG: 8482 bra t_operr 8483 8484 global slognd 8485slognd: 8486#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT 8487 8488 mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0 8489 8490#----normalize the input value by left shifting k bits (k to be determined 8491#----below), adjusting exponent and storing -k to ADJK 8492#----the value TWOTO100 is no longer needed. 8493#----Note that this code assumes the denormalized input is NON-ZERO. 8494 8495 movm.l &0x3f00,-(%sp) # save some registers {d2-d7} 8496 mov.l (%a0),%d3 # D3 is exponent of smallest norm. # 8497 mov.l 4(%a0),%d4 8498 mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X) 8499 clr.l %d2 # D2 used for holding K 8500 8501 tst.l %d4 8502 bne.b Hi_not0 8503 8504Hi_0: 8505 mov.l %d5,%d4 8506 clr.l %d5 8507 mov.l &32,%d2 8508 clr.l %d6 8509 bfffo %d4{&0:&32},%d6 8510 lsl.l %d6,%d4 8511 add.l %d6,%d2 # (D3,D4,D5) is normalized 8512 8513 mov.l %d3,X(%a6) 8514 mov.l %d4,XFRAC(%a6) 8515 mov.l %d5,XFRAC+4(%a6) 8516 neg.l %d2 8517 mov.l %d2,ADJK(%a6) 8518 fmov.x X(%a6),%fp0 8519 movm.l (%sp)+,&0xfc # restore registers {d2-d7} 8520 lea X(%a6),%a0 8521 bra.w LOGBGN # begin regular log(X) 8522 8523Hi_not0: 8524 clr.l %d6 8525 bfffo %d4{&0:&32},%d6 # find first 1 8526 mov.l %d6,%d2 # get k 8527 lsl.l %d6,%d4 8528 mov.l %d5,%d7 # a copy of D5 8529 lsl.l %d6,%d5 8530 neg.l %d6 8531 add.l &32,%d6 8532 lsr.l %d6,%d7 8533 or.l %d7,%d4 # (D3,D4,D5) normalized 8534 8535 mov.l %d3,X(%a6) 8536 mov.l %d4,XFRAC(%a6) 8537 mov.l %d5,XFRAC+4(%a6) 8538 neg.l %d2 8539 mov.l %d2,ADJK(%a6) 8540 fmov.x X(%a6),%fp0 8541 movm.l (%sp)+,&0xfc # restore registers {d2-d7} 8542 lea X(%a6),%a0 8543 bra.w LOGBGN # begin regular log(X) 8544 8545 global slognp1 8546#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S 8547slognp1: 8548 fmov.x (%a0),%fp0 # LOAD INPUT 8549 fabs.x %fp0 # test magnitude 8550 fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold 8551 fbgt.w LP1REAL # if greater, continue 8552 fmov.l %d0,%fpcr 8553 mov.b &FMOV_OP,%d1 # last inst is MOVE 8554 fmov.x (%a0),%fp0 # return signed argument 8555 bra t_catch 8556 8557LP1REAL: 8558 fmov.x (%a0),%fp0 # LOAD INPUT 8559 mov.l &0x00000000,ADJK(%a6) 8560 fmov.x %fp0,%fp1 # FP1 IS INPUT Z 8561 fadd.s one(%pc),%fp0 # X := ROUND(1+Z) 8562 fmov.x %fp0,X(%a6) 8563 mov.w XFRAC(%a6),XDCARE(%a6) 8564 mov.l X(%a6),%d1 8565 cmp.l %d1,&0 8566 ble.w LP1NEG0 # LOG OF ZERO OR -VE 8567 cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]? 8568 blt.w LOGMAIN 8569 cmp.l %d1,&0x3fffc000 8570 bgt.w LOGMAIN 8571#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, 8572#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, 8573#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). 8574 8575LP1NEAR1: 8576#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) 8577 cmp.l %d1,&0x3ffef07d 8578 blt.w LP1CARE 8579 cmp.l %d1,&0x3fff8841 8580 bgt.w LP1CARE 8581 8582LP1ONE16: 8583#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) 8584#--WHERE U = 2Z/(2+Z) = 2Z/(1+X). 8585 fadd.x %fp1,%fp1 # FP1 IS 2Z 8586 fadd.s one(%pc),%fp0 # FP0 IS 1+X 8587#--U = FP1/FP0 8588 bra.w LP1CONT2 8589 8590LP1CARE: 8591#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE 8592#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST 8593#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], 8594#--THERE ARE ONLY TWO CASES. 8595#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z 8596#--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z 8597#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF 8598#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. 8599 8600 mov.l XFRAC(%a6),FFRAC(%a6) 8601 and.l &0xFE000000,FFRAC(%a6) 8602 or.l &0x01000000,FFRAC(%a6) # F OBTAINED 8603 cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1 8604 bge.b KISZERO 8605 8606KISNEG1: 8607 fmov.s TWO(%pc),%fp0 8608 mov.l &0x3fff0000,F(%a6) 8609 clr.l F+8(%a6) 8610 fsub.x F(%a6),%fp0 # 2-F 8611 mov.l FFRAC(%a6),%d1 8612 and.l &0x7E000000,%d1 8613 asr.l &8,%d1 8614 asr.l &8,%d1 8615 asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F 8616 fadd.x %fp1,%fp1 # GET 2Z 8617 fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3} 8618 fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z 8619 lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F 8620 add.l %d1,%a0 8621 fmov.s negone(%pc),%fp1 # FP1 IS K = -1 8622 bra.w LP1CONT1 8623 8624KISZERO: 8625 fmov.s one(%pc),%fp0 8626 mov.l &0x3fff0000,F(%a6) 8627 clr.l F+8(%a6) 8628 fsub.x F(%a6),%fp0 # 1-F 8629 mov.l FFRAC(%a6),%d1 8630 and.l &0x7E000000,%d1 8631 asr.l &8,%d1 8632 asr.l &8,%d1 8633 asr.l &4,%d1 8634 fadd.x %fp1,%fp0 # FP0 IS Y-F 8635 fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3} 8636 lea LOGTBL(%pc),%a0 8637 add.l %d1,%a0 # A0 IS ADDRESS OF 1/F 8638 fmov.s zero(%pc),%fp1 # FP1 IS K = 0 8639 bra.w LP1CONT1 8640 8641LP1NEG0: 8642#--FPCR SAVED. D0 IS X IN COMPACT FORM. 8643 cmp.l %d1,&0 8644 blt.b LP1NEG 8645LP1ZERO: 8646 fmov.s negone(%pc),%fp0 8647 8648 fmov.l %d0,%fpcr 8649 bra t_dz 8650 8651LP1NEG: 8652 fmov.s zero(%pc),%fp0 8653 8654 fmov.l %d0,%fpcr 8655 bra t_operr 8656 8657 global slognp1d 8658#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT 8659# Simply return the denorm 8660slognp1d: 8661 bra t_extdnrm 8662 8663######################################################################### 8664# satanh(): computes the inverse hyperbolic tangent of a norm input # 8665# satanhd(): computes the inverse hyperbolic tangent of a denorm input # 8666# # 8667# INPUT *************************************************************** # 8668# a0 = pointer to extended precision input # 8669# d0 = round precision,mode # 8670# # 8671# OUTPUT ************************************************************** # 8672# fp0 = arctanh(X) # 8673# # 8674# ACCURACY and MONOTONICITY ******************************************* # 8675# The returned result is within 3 ulps in 64 significant bit, # 8676# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 8677# rounded to double precision. The result is provably monotonic # 8678# in double precision. # 8679# # 8680# ALGORITHM *********************************************************** # 8681# # 8682# ATANH # 8683# 1. If |X| >= 1, go to 3. # 8684# # 8685# 2. (|X| < 1) Calculate atanh(X) by # 8686# sgn := sign(X) # 8687# y := |X| # 8688# z := 2y/(1-y) # 8689# atanh(X) := sgn * (1/2) * logp1(z) # 8690# Exit. # 8691# # 8692# 3. If |X| > 1, go to 5. # 8693# # 8694# 4. (|X| = 1) Generate infinity with an appropriate sign and # 8695# divide-by-zero by # 8696# sgn := sign(X) # 8697# atan(X) := sgn / (+0). # 8698# Exit. # 8699# # 8700# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # 8701# Exit. # 8702# # 8703######################################################################### 8704 8705 global satanh 8706satanh: 8707 mov.l (%a0),%d1 8708 mov.w 4(%a0),%d1 8709 and.l &0x7FFFFFFF,%d1 8710 cmp.l %d1,&0x3FFF8000 8711 bge.b ATANHBIG 8712 8713#--THIS IS THE USUAL CASE, |X| < 1 8714#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z). 8715 8716 fabs.x (%a0),%fp0 # Y = |X| 8717 fmov.x %fp0,%fp1 8718 fneg.x %fp1 # -Y 8719 fadd.x %fp0,%fp0 # 2Y 8720 fadd.s &0x3F800000,%fp1 # 1-Y 8721 fdiv.x %fp1,%fp0 # 2Y/(1-Y) 8722 mov.l (%a0),%d1 8723 and.l &0x80000000,%d1 8724 or.l &0x3F000000,%d1 # SIGN(X)*HALF 8725 mov.l %d1,-(%sp) 8726 8727 mov.l %d0,-(%sp) # save rnd prec,mode 8728 clr.l %d0 # pass ext prec,RN 8729 fmovm.x &0x01,-(%sp) # save Z on stack 8730 lea (%sp),%a0 # pass ptr to Z 8731 bsr slognp1 # LOG1P(Z) 8732 add.l &0xc,%sp # clear Z from stack 8733 8734 mov.l (%sp)+,%d0 # fetch old prec,mode 8735 fmov.l %d0,%fpcr # load it 8736 mov.b &FMUL_OP,%d1 # last inst is MUL 8737 fmul.s (%sp)+,%fp0 8738 bra t_catch 8739 8740ATANHBIG: 8741 fabs.x (%a0),%fp0 # |X| 8742 fcmp.s %fp0,&0x3F800000 8743 fbgt t_operr 8744 bra t_dz 8745 8746 global satanhd 8747#--ATANH(X) = X FOR DENORMALIZED X 8748satanhd: 8749 bra t_extdnrm 8750 8751######################################################################### 8752# slog10(): computes the base-10 logarithm of a normalized input # 8753# slog10d(): computes the base-10 logarithm of a denormalized input # 8754# slog2(): computes the base-2 logarithm of a normalized input # 8755# slog2d(): computes the base-2 logarithm of a denormalized input # 8756# # 8757# INPUT *************************************************************** # 8758# a0 = pointer to extended precision input # 8759# d0 = round precision,mode # 8760# # 8761# OUTPUT ************************************************************** # 8762# fp0 = log_10(X) or log_2(X) # 8763# # 8764# ACCURACY and MONOTONICITY ******************************************* # 8765# The returned result is within 1.7 ulps in 64 significant bit, # 8766# i.e. within 0.5003 ulp to 53 bits if the result is subsequently # 8767# rounded to double precision. The result is provably monotonic # 8768# in double precision. # 8769# # 8770# ALGORITHM *********************************************************** # 8771# # 8772# slog10d: # 8773# # 8774# Step 0. If X < 0, create a NaN and raise the invalid operation # 8775# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8776# Notes: Default means round-to-nearest mode, no floating-point # 8777# traps, and precision control = double extended. # 8778# # 8779# Step 1. Call slognd to obtain Y = log(X), the natural log of X. # 8780# Notes: Even if X is denormalized, log(X) is always normalized. # 8781# # 8782# Step 2. Compute log_10(X) = log(X) * (1/log(10)). # 8783# 2.1 Restore the user FPCR # 8784# 2.2 Return ans := Y * INV_L10. # 8785# # 8786# slog10: # 8787# # 8788# Step 0. If X < 0, create a NaN and raise the invalid operation # 8789# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8790# Notes: Default means round-to-nearest mode, no floating-point # 8791# traps, and precision control = double extended. # 8792# # 8793# Step 1. Call sLogN to obtain Y = log(X), the natural log of X. # 8794# # 8795# Step 2. Compute log_10(X) = log(X) * (1/log(10)). # 8796# 2.1 Restore the user FPCR # 8797# 2.2 Return ans := Y * INV_L10. # 8798# # 8799# sLog2d: # 8800# # 8801# Step 0. If X < 0, create a NaN and raise the invalid operation # 8802# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8803# Notes: Default means round-to-nearest mode, no floating-point # 8804# traps, and precision control = double extended. # 8805# # 8806# Step 1. Call slognd to obtain Y = log(X), the natural log of X. # 8807# Notes: Even if X is denormalized, log(X) is always normalized. # 8808# # 8809# Step 2. Compute log_10(X) = log(X) * (1/log(2)). # 8810# 2.1 Restore the user FPCR # 8811# 2.2 Return ans := Y * INV_L2. # 8812# # 8813# sLog2: # 8814# # 8815# Step 0. If X < 0, create a NaN and raise the invalid operation # 8816# flag. Otherwise, save FPCR in D1; set FpCR to default. # 8817# Notes: Default means round-to-nearest mode, no floating-point # 8818# traps, and precision control = double extended. # 8819# # 8820# Step 1. If X is not an integer power of two, i.e., X != 2^k, # 8821# go to Step 3. # 8822# # 8823# Step 2. Return k. # 8824# 2.1 Get integer k, X = 2^k. # 8825# 2.2 Restore the user FPCR. # 8826# 2.3 Return ans := convert-to-double-extended(k). # 8827# # 8828# Step 3. Call sLogN to obtain Y = log(X), the natural log of X. # 8829# # 8830# Step 4. Compute log_2(X) = log(X) * (1/log(2)). # 8831# 4.1 Restore the user FPCR # 8832# 4.2 Return ans := Y * INV_L2. # 8833# # 8834######################################################################### 8835 8836INV_L10: 8837 long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000 8838 8839INV_L2: 8840 long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000 8841 8842 global slog10 8843#--entry point for Log10(X), X is normalized 8844slog10: 8845 fmov.b &0x1,%fp0 8846 fcmp.x %fp0,(%a0) # if operand == 1, 8847 fbeq.l ld_pzero # return an EXACT zero 8848 8849 mov.l (%a0),%d1 8850 blt.w invalid 8851 mov.l %d0,-(%sp) 8852 clr.l %d0 8853 bsr slogn # log(X), X normal. 8854 fmov.l (%sp)+,%fpcr 8855 fmul.x INV_L10(%pc),%fp0 8856 bra t_inx2 8857 8858 global slog10d 8859#--entry point for Log10(X), X is denormalized 8860slog10d: 8861 mov.l (%a0),%d1 8862 blt.w invalid 8863 mov.l %d0,-(%sp) 8864 clr.l %d0 8865 bsr slognd # log(X), X denorm. 8866 fmov.l (%sp)+,%fpcr 8867 fmul.x INV_L10(%pc),%fp0 8868 bra t_minx2 8869 8870 global slog2 8871#--entry point for Log2(X), X is normalized 8872slog2: 8873 mov.l (%a0),%d1 8874 blt.w invalid 8875 8876 mov.l 8(%a0),%d1 8877 bne.b continue # X is not 2^k 8878 8879 mov.l 4(%a0),%d1 8880 and.l &0x7FFFFFFF,%d1 8881 bne.b continue 8882 8883#--X = 2^k. 8884 mov.w (%a0),%d1 8885 and.l &0x00007FFF,%d1 8886 sub.l &0x3FFF,%d1 8887 beq.l ld_pzero 8888 fmov.l %d0,%fpcr 8889 fmov.l %d1,%fp0 8890 bra t_inx2 8891 8892continue: 8893 mov.l %d0,-(%sp) 8894 clr.l %d0 8895 bsr slogn # log(X), X normal. 8896 fmov.l (%sp)+,%fpcr 8897 fmul.x INV_L2(%pc),%fp0 8898 bra t_inx2 8899 8900invalid: 8901 bra t_operr 8902 8903 global slog2d 8904#--entry point for Log2(X), X is denormalized 8905slog2d: 8906 mov.l (%a0),%d1 8907 blt.w invalid 8908 mov.l %d0,-(%sp) 8909 clr.l %d0 8910 bsr slognd # log(X), X denorm. 8911 fmov.l (%sp)+,%fpcr 8912 fmul.x INV_L2(%pc),%fp0 8913 bra t_minx2 8914 8915######################################################################### 8916# stwotox(): computes 2**X for a normalized input # 8917# stwotoxd(): computes 2**X for a denormalized input # 8918# stentox(): computes 10**X for a normalized input # 8919# stentoxd(): computes 10**X for a denormalized input # 8920# # 8921# INPUT *************************************************************** # 8922# a0 = pointer to extended precision input # 8923# d0 = round precision,mode # 8924# # 8925# OUTPUT ************************************************************** # 8926# fp0 = 2**X or 10**X # 8927# # 8928# ACCURACY and MONOTONICITY ******************************************* # 8929# The returned result is within 2 ulps in 64 significant bit, # 8930# i.e. within 0.5001 ulp to 53 bits if the result is subsequently # 8931# rounded to double precision. The result is provably monotonic # 8932# in double precision. # 8933# # 8934# ALGORITHM *********************************************************** # 8935# # 8936# twotox # 8937# 1. If |X| > 16480, go to ExpBig. # 8938# # 8939# 2. If |X| < 2**(-70), go to ExpSm. # 8940# # 8941# 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore # 8942# decompose N as # 8943# N = 64(M + M') + j, j = 0,1,2,...,63. # 8944# # 8945# 4. Overwrite r := r * log2. Then # 8946# 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). # 8947# Go to expr to compute that expression. # 8948# # 8949# tentox # 8950# 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. # 8951# # 8952# 2. If |X| < 2**(-70), go to ExpSm. # 8953# # 8954# 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set # 8955# N := round-to-int(y). Decompose N as # 8956# N = 64(M + M') + j, j = 0,1,2,...,63. # 8957# # 8958# 4. Define r as # 8959# r := ((X - N*L1)-N*L2) * L10 # 8960# where L1, L2 are the leading and trailing parts of # 8961# log_10(2)/64 and L10 is the natural log of 10. Then # 8962# 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). # 8963# Go to expr to compute that expression. # 8964# # 8965# expr # 8966# 1. Fetch 2**(j/64) from table as Fact1 and Fact2. # 8967# # 8968# 2. Overwrite Fact1 and Fact2 by # 8969# Fact1 := 2**(M) * Fact1 # 8970# Fact2 := 2**(M) * Fact2 # 8971# Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). # 8972# # 8973# 3. Calculate P where 1 + P approximates exp(r): # 8974# P = r + r*r*(A1+r*(A2+...+r*A5)). # 8975# # 8976# 4. Let AdjFact := 2**(M'). Return # 8977# AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). # 8978# Exit. # 8979# # 8980# ExpBig # 8981# 1. Generate overflow by Huge * Huge if X > 0; otherwise, # 8982# generate underflow by Tiny * Tiny. # 8983# # 8984# ExpSm # 8985# 1. Return 1 + X. # 8986# # 8987######################################################################### 8988 8989L2TEN64: 8990 long 0x406A934F,0x0979A371 # 64LOG10/LOG2 8991L10TWO1: 8992 long 0x3F734413,0x509F8000 # LOG2/64LOG10 8993 8994L10TWO2: 8995 long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000 8996 8997LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000 8998 8999LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 9000 9001EXPA5: long 0x3F56C16D,0x6F7BD0B2 9002EXPA4: long 0x3F811112,0x302C712C 9003EXPA3: long 0x3FA55555,0x55554CC1 9004EXPA2: long 0x3FC55555,0x55554A54 9005EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000 9006 9007TEXPTBL: 9008 long 0x3FFF0000,0x80000000,0x00000000,0x3F738000 9009 long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA 9010 long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9 9011 long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9 9012 long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA 9013 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C 9014 long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1 9015 long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA 9016 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373 9017 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670 9018 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700 9019 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0 9020 long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D 9021 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319 9022 long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B 9023 long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5 9024 long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A 9025 long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B 9026 long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF 9027 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA 9028 long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD 9029 long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E 9030 long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B 9031 long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB 9032 long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB 9033 long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274 9034 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C 9035 long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00 9036 long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301 9037 long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367 9038 long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F 9039 long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C 9040 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB 9041 long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB 9042 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C 9043 long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA 9044 long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD 9045 long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51 9046 long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A 9047 long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2 9048 long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB 9049 long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17 9050 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C 9051 long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8 9052 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53 9053 long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE 9054 long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124 9055 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243 9056 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A 9057 long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61 9058 long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610 9059 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1 9060 long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12 9061 long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE 9062 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4 9063 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F 9064 long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A 9065 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A 9066 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC 9067 long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F 9068 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A 9069 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795 9070 long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B 9071 long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581 9072 9073 set INT,L_SCR1 9074 9075 set X,FP_SCR0 9076 set XDCARE,X+2 9077 set XFRAC,X+4 9078 9079 set ADJFACT,FP_SCR0 9080 9081 set FACT1,FP_SCR0 9082 set FACT1HI,FACT1+4 9083 set FACT1LOW,FACT1+8 9084 9085 set FACT2,FP_SCR1 9086 set FACT2HI,FACT2+4 9087 set FACT2LOW,FACT2+8 9088 9089 global stwotox 9090#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 9091stwotox: 9092 fmovm.x (%a0),&0x80 # LOAD INPUT 9093 9094 mov.l (%a0),%d1 9095 mov.w 4(%a0),%d1 9096 fmov.x %fp0,X(%a6) 9097 and.l &0x7FFFFFFF,%d1 9098 9099 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)? 9100 bge.b TWOOK1 9101 bra.w EXPBORS 9102 9103TWOOK1: 9104 cmp.l %d1,&0x400D80C0 # |X| > 16480? 9105 ble.b TWOMAIN 9106 bra.w EXPBORS 9107 9108TWOMAIN: 9109#--USUAL CASE, 2^(-70) <= |X| <= 16480 9110 9111 fmov.x %fp0,%fp1 9112 fmul.s &0x42800000,%fp1 # 64 * X 9113 fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X) 9114 mov.l %d2,-(%sp) 9115 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64) 9116 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT 9117 mov.l INT(%a6),%d1 9118 mov.l %d1,%d2 9119 and.l &0x3F,%d1 # D0 IS J 9120 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64) 9121 add.l %d1,%a1 # ADDRESS FOR 2^(J/64) 9122 asr.l &6,%d2 # d2 IS L, N = 64L + J 9123 mov.l %d2,%d1 9124 asr.l &1,%d1 # D0 IS M 9125 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J 9126 add.l &0x3FFF,%d2 9127 9128#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), 9129#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. 9130#--ADJFACT = 2^(M'). 9131#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. 9132 9133 fmovm.x &0x0c,-(%sp) # save fp2/fp3 9134 9135 fmul.s &0x3C800000,%fp1 # (1/64)*N 9136 mov.l (%a1)+,FACT1(%a6) 9137 mov.l (%a1)+,FACT1HI(%a6) 9138 mov.l (%a1)+,FACT1LOW(%a6) 9139 mov.w (%a1)+,FACT2(%a6) 9140 9141 fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X) 9142 9143 mov.w (%a1)+,FACT2HI(%a6) 9144 clr.w FACT2HI+2(%a6) 9145 clr.l FACT2LOW(%a6) 9146 add.w %d1,FACT1(%a6) 9147 fmul.x LOG2(%pc),%fp0 # FP0 IS R 9148 add.w %d1,FACT2(%a6) 9149 9150 bra.w expr 9151 9152EXPBORS: 9153#--FPCR, D0 SAVED 9154 cmp.l %d1,&0x3FFF8000 9155 bgt.b TEXPBIG 9156 9157#--|X| IS SMALL, RETURN 1 + X 9158 9159 fmov.l %d0,%fpcr # restore users round prec,mode 9160 fadd.s &0x3F800000,%fp0 # RETURN 1 + X 9161 bra t_pinx2 9162 9163TEXPBIG: 9164#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW 9165#--REGISTERS SAVE SO FAR ARE FPCR AND D0 9166 mov.l X(%a6),%d1 9167 cmp.l %d1,&0 9168 blt.b EXPNEG 9169 9170 bra t_ovfl2 # t_ovfl expects positive value 9171 9172EXPNEG: 9173 bra t_unfl2 # t_unfl expects positive value 9174 9175 global stwotoxd 9176stwotoxd: 9177#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT 9178 9179 fmov.l %d0,%fpcr # set user's rounding mode/precision 9180 fmov.s &0x3F800000,%fp0 # RETURN 1 + X 9181 mov.l (%a0),%d1 9182 or.l &0x00800001,%d1 9183 fadd.s %d1,%fp0 9184 bra t_pinx2 9185 9186 global stentox 9187#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 9188stentox: 9189 fmovm.x (%a0),&0x80 # LOAD INPUT 9190 9191 mov.l (%a0),%d1 9192 mov.w 4(%a0),%d1 9193 fmov.x %fp0,X(%a6) 9194 and.l &0x7FFFFFFF,%d1 9195 9196 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)? 9197 bge.b TENOK1 9198 bra.w EXPBORS 9199 9200TENOK1: 9201 cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ? 9202 ble.b TENMAIN 9203 bra.w EXPBORS 9204 9205TENMAIN: 9206#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 9207 9208 fmov.x %fp0,%fp1 9209 fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG2 9210 fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2) 9211 mov.l %d2,-(%sp) 9212 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64) 9213 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT 9214 mov.l INT(%a6),%d1 9215 mov.l %d1,%d2 9216 and.l &0x3F,%d1 # D0 IS J 9217 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64) 9218 add.l %d1,%a1 # ADDRESS FOR 2^(J/64) 9219 asr.l &6,%d2 # d2 IS L, N = 64L + J 9220 mov.l %d2,%d1 9221 asr.l &1,%d1 # D0 IS M 9222 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J 9223 add.l &0x3FFF,%d2 9224 9225#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), 9226#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. 9227#--ADJFACT = 2^(M'). 9228#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. 9229 fmovm.x &0x0c,-(%sp) # save fp2/fp3 9230 9231 fmov.x %fp1,%fp2 9232 9233 fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD 9234 mov.l (%a1)+,FACT1(%a6) 9235 9236 fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL 9237 9238 mov.l (%a1)+,FACT1HI(%a6) 9239 mov.l (%a1)+,FACT1LOW(%a6) 9240 fsub.x %fp1,%fp0 # X - N L_LEAD 9241 mov.w (%a1)+,FACT2(%a6) 9242 9243 fsub.x %fp2,%fp0 # X - N L_TRAIL 9244 9245 mov.w (%a1)+,FACT2HI(%a6) 9246 clr.w FACT2HI+2(%a6) 9247 clr.l FACT2LOW(%a6) 9248 9249 fmul.x LOG10(%pc),%fp0 # FP0 IS R 9250 add.w %d1,FACT1(%a6) 9251 add.w %d1,FACT2(%a6) 9252 9253expr: 9254#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN. 9255#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64). 9256#--FP0 IS R. THE FOLLOWING CODE COMPUTES 9257#-- 2**(M'+M) * 2**(J/64) * EXP(R) 9258 9259 fmov.x %fp0,%fp1 9260 fmul.x %fp1,%fp1 # FP1 IS S = R*R 9261 9262 fmov.d EXPA5(%pc),%fp2 # FP2 IS A5 9263 fmov.d EXPA4(%pc),%fp3 # FP3 IS A4 9264 9265 fmul.x %fp1,%fp2 # FP2 IS S*A5 9266 fmul.x %fp1,%fp3 # FP3 IS S*A4 9267 9268 fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5 9269 fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4 9270 9271 fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5) 9272 fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4) 9273 9274 fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5) 9275 fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4) 9276 9277 fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5)) 9278 fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4) 9279 fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1 9280 9281 fmovm.x (%sp)+,&0x30 # restore fp2/fp3 9282 9283#--FINAL RECONSTRUCTION PROCESS 9284#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0) 9285 9286 fmul.x FACT1(%a6),%fp0 9287 fadd.x FACT2(%a6),%fp0 9288 fadd.x FACT1(%a6),%fp0 9289 9290 fmov.l %d0,%fpcr # restore users round prec,mode 9291 mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT 9292 mov.l (%sp)+,%d2 9293 mov.l &0x80000000,ADJFACT+4(%a6) 9294 clr.l ADJFACT+8(%a6) 9295 mov.b &FMUL_OP,%d1 # last inst is MUL 9296 fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT 9297 bra t_catch 9298 9299 global stentoxd 9300stentoxd: 9301#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT 9302 9303 fmov.l %d0,%fpcr # set user's rounding mode/precision 9304 fmov.s &0x3F800000,%fp0 # RETURN 1 + X 9305 mov.l (%a0),%d1 9306 or.l &0x00800001,%d1 9307 fadd.s %d1,%fp0 9308 bra t_pinx2 9309 9310######################################################################### 9311# smovcr(): returns the ROM constant at the offset specified in d1 # 9312# rounded to the mode and precision specified in d0. # 9313# # 9314# INPUT *************************************************************** # 9315# d0 = rnd prec,mode # 9316# d1 = ROM offset # 9317# # 9318# OUTPUT ************************************************************** # 9319# fp0 = the ROM constant rounded to the user's rounding mode,prec # 9320# # 9321######################################################################### 9322 9323 global smovcr 9324smovcr: 9325 mov.l %d1,-(%sp) # save rom offset for a sec 9326 9327 lsr.b &0x4,%d0 # shift ctrl bits to lo 9328 mov.l %d0,%d1 # make a copy 9329 andi.w &0x3,%d1 # extract rnd mode 9330 andi.w &0xc,%d0 # extract rnd prec 9331 swap %d0 # put rnd prec in hi 9332 mov.w %d1,%d0 # put rnd mode in lo 9333 9334 mov.l (%sp)+,%d1 # get rom offset 9335 9336# 9337# check range of offset 9338# 9339 tst.b %d1 # if zero, offset is to pi 9340 beq.b pi_tbl # it is pi 9341 cmpi.b %d1,&0x0a # check range $01 - $0a 9342 ble.b z_val # if in this range, return zero 9343 cmpi.b %d1,&0x0e # check range $0b - $0e 9344 ble.b sm_tbl # valid constants in this range 9345 cmpi.b %d1,&0x2f # check range $10 - $2f 9346 ble.b z_val # if in this range, return zero 9347 cmpi.b %d1,&0x3f # check range $30 - $3f 9348 ble.b bg_tbl # valid constants in this range 9349 9350z_val: 9351 bra.l ld_pzero # return a zero 9352 9353# 9354# the answer is PI rounded to the proper precision. 9355# 9356# fetch a pointer to the answer table relating to the proper rounding 9357# precision. 9358# 9359pi_tbl: 9360 tst.b %d0 # is rmode RN? 9361 bne.b pi_not_rn # no 9362pi_rn: 9363 lea.l PIRN(%pc),%a0 # yes; load PI RN table addr 9364 bra.w set_finx 9365pi_not_rn: 9366 cmpi.b %d0,&rp_mode # is rmode RP? 9367 beq.b pi_rp # yes 9368pi_rzrm: 9369 lea.l PIRZRM(%pc),%a0 # no; load PI RZ,RM table addr 9370 bra.b set_finx 9371pi_rp: 9372 lea.l PIRP(%pc),%a0 # load PI RP table addr 9373 bra.b set_finx 9374 9375# 9376# the answer is one of: 9377# $0B log10(2) (inexact) 9378# $0C e (inexact) 9379# $0D log2(e) (inexact) 9380# $0E log10(e) (exact) 9381# 9382# fetch a pointer to the answer table relating to the proper rounding 9383# precision. 9384# 9385sm_tbl: 9386 subi.b &0xb,%d1 # make offset in 0-4 range 9387 tst.b %d0 # is rmode RN? 9388 bne.b sm_not_rn # no 9389sm_rn: 9390 lea.l SMALRN(%pc),%a0 # yes; load RN table addr 9391sm_tbl_cont: 9392 cmpi.b %d1,&0x2 # is result log10(e)? 9393 ble.b set_finx # no; answer is inexact 9394 bra.b no_finx # yes; answer is exact 9395sm_not_rn: 9396 cmpi.b %d0,&rp_mode # is rmode RP? 9397 beq.b sm_rp # yes 9398sm_rzrm: 9399 lea.l SMALRZRM(%pc),%a0 # no; load RZ,RM table addr 9400 bra.b sm_tbl_cont 9401sm_rp: 9402 lea.l SMALRP(%pc),%a0 # load RP table addr 9403 bra.b sm_tbl_cont 9404 9405# 9406# the answer is one of: 9407# $30 ln(2) (inexact) 9408# $31 ln(10) (inexact) 9409# $32 10^0 (exact) 9410# $33 10^1 (exact) 9411# $34 10^2 (exact) 9412# $35 10^4 (exact) 9413# $36 10^8 (exact) 9414# $37 10^16 (exact) 9415# $38 10^32 (inexact) 9416# $39 10^64 (inexact) 9417# $3A 10^128 (inexact) 9418# $3B 10^256 (inexact) 9419# $3C 10^512 (inexact) 9420# $3D 10^1024 (inexact) 9421# $3E 10^2048 (inexact) 9422# $3F 10^4096 (inexact) 9423# 9424# fetch a pointer to the answer table relating to the proper rounding 9425# precision. 9426# 9427bg_tbl: 9428 subi.b &0x30,%d1 # make offset in 0-f range 9429 tst.b %d0 # is rmode RN? 9430 bne.b bg_not_rn # no 9431bg_rn: 9432 lea.l BIGRN(%pc),%a0 # yes; load RN table addr 9433bg_tbl_cont: 9434 cmpi.b %d1,&0x1 # is offset <= $31? 9435 ble.b set_finx # yes; answer is inexact 9436 cmpi.b %d1,&0x7 # is $32 <= offset <= $37? 9437 ble.b no_finx # yes; answer is exact 9438 bra.b set_finx # no; answer is inexact 9439bg_not_rn: 9440 cmpi.b %d0,&rp_mode # is rmode RP? 9441 beq.b bg_rp # yes 9442bg_rzrm: 9443 lea.l BIGRZRM(%pc),%a0 # no; load RZ,RM table addr 9444 bra.b bg_tbl_cont 9445bg_rp: 9446 lea.l BIGRP(%pc),%a0 # load RP table addr 9447 bra.b bg_tbl_cont 9448 9449# answer is inexact, so set INEX2 and AINEX in the user's FPSR. 9450set_finx: 9451 ori.l &inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX 9452no_finx: 9453 mulu.w &0xc,%d1 # offset points into tables 9454 swap %d0 # put rnd prec in lo word 9455 tst.b %d0 # is precision extended? 9456 9457 bne.b not_ext # if xprec, do not call round 9458 9459# Precision is extended 9460 fmovm.x (%a0,%d1.w),&0x80 # return result in fp0 9461 rts 9462 9463# Precision is single or double 9464not_ext: 9465 swap %d0 # rnd prec in upper word 9466 9467# call round() to round the answer to the proper precision. 9468# exponents out of range for single or double DO NOT cause underflow 9469# or overflow. 9470 mov.w 0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word 9471 mov.l 0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word 9472 mov.l 0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word 9473 mov.l %d0,%d1 9474 clr.l %d0 # clear g,r,s 9475 lea FP_SCR1(%a6),%a0 # pass ptr to answer 9476 clr.w LOCAL_SGN(%a0) # sign always positive 9477 bsr.l _round # round the mantissa 9478 9479 fmovm.x (%a0),&0x80 # return rounded result in fp0 9480 rts 9481 9482 align 0x4 9483 9484PIRN: long 0x40000000,0xc90fdaa2,0x2168c235 # pi 9485PIRZRM: long 0x40000000,0xc90fdaa2,0x2168c234 # pi 9486PIRP: long 0x40000000,0xc90fdaa2,0x2168c235 # pi 9487 9488SMALRN: long 0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2) 9489 long 0x40000000,0xadf85458,0xa2bb4a9a # e 9490 long 0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e) 9491 long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e) 9492 long 0x00000000,0x00000000,0x00000000 # 0.0 9493 9494SMALRZRM: 9495 long 0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2) 9496 long 0x40000000,0xadf85458,0xa2bb4a9a # e 9497 long 0x3fff0000,0xb8aa3b29,0x5c17f0bb # log2(e) 9498 long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e) 9499 long 0x00000000,0x00000000,0x00000000 # 0.0 9500 9501SMALRP: long 0x3ffd0000,0x9a209a84,0xfbcff799 # log10(2) 9502 long 0x40000000,0xadf85458,0xa2bb4a9b # e 9503 long 0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e) 9504 long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e) 9505 long 0x00000000,0x00000000,0x00000000 # 0.0 9506 9507BIGRN: long 0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2) 9508 long 0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10) 9509 9510 long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0 9511 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 9512 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 9513 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 9514 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 9515 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 9516 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32 9517 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64 9518 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128 9519 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256 9520 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512 9521 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024 9522 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048 9523 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096 9524 9525BIGRZRM: 9526 long 0x3ffe0000,0xb17217f7,0xd1cf79ab # ln(2) 9527 long 0x40000000,0x935d8ddd,0xaaa8ac16 # ln(10) 9528 9529 long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0 9530 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 9531 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 9532 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 9533 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 9534 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 9535 long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32 9536 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64 9537 long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128 9538 long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256 9539 long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512 9540 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024 9541 long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048 9542 long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096 9543 9544BIGRP: 9545 long 0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2) 9546 long 0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10) 9547 9548 long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0 9549 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 9550 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 9551 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 9552 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 9553 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 9554 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32 9555 long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64 9556 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128 9557 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256 9558 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512 9559 long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024 9560 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048 9561 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096 9562 9563######################################################################### 9564# sscale(): computes the destination operand scaled by the source # 9565# operand. If the absoulute value of the source operand is # 9566# >= 2^14, an overflow or underflow is returned. # 9567# # 9568# INPUT *************************************************************** # 9569# a0 = pointer to double-extended source operand X # 9570# a1 = pointer to double-extended destination operand Y # 9571# # 9572# OUTPUT ************************************************************** # 9573# fp0 = scale(X,Y) # 9574# # 9575######################################################################### 9576 9577set SIGN, L_SCR1 9578 9579 global sscale 9580sscale: 9581 mov.l %d0,-(%sp) # store off ctrl bits for now 9582 9583 mov.w DST_EX(%a1),%d1 # get dst exponent 9584 smi.b SIGN(%a6) # use SIGN to hold dst sign 9585 andi.l &0x00007fff,%d1 # strip sign from dst exp 9586 9587 mov.w SRC_EX(%a0),%d0 # check src bounds 9588 andi.w &0x7fff,%d0 # clr src sign bit 9589 cmpi.w %d0,&0x3fff # is src ~ ZERO? 9590 blt.w src_small # yes 9591 cmpi.w %d0,&0x400c # no; is src too big? 9592 bgt.w src_out # yes 9593 9594# 9595# Source is within 2^14 range. 9596# 9597src_ok: 9598 fintrz.x SRC(%a0),%fp0 # calc int of src 9599 fmov.l %fp0,%d0 # int src to d0 9600# don't want any accrued bits from the fintrz showing up later since 9601# we may need to read the fpsr for the last fp op in t_catch2(). 9602 fmov.l &0x0,%fpsr 9603 9604 tst.b DST_HI(%a1) # is dst denormalized? 9605 bmi.b sok_norm 9606 9607# the dst is a DENORM. normalize the DENORM and add the adjustment to 9608# the src value. then, jump to the norm part of the routine. 9609sok_dnrm: 9610 mov.l %d0,-(%sp) # save src for now 9611 9612 mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy 9613 mov.l DST_HI(%a1),FP_SCR0_HI(%a6) 9614 mov.l DST_LO(%a1),FP_SCR0_LO(%a6) 9615 9616 lea FP_SCR0(%a6),%a0 # pass ptr to DENORM 9617 bsr.l norm # normalize the DENORM 9618 neg.l %d0 9619 add.l (%sp)+,%d0 # add adjustment to src 9620 9621 fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM 9622 9623 cmpi.w %d0,&-0x3fff # is the shft amt really low? 9624 bge.b sok_norm2 # thank goodness no 9625 9626# the multiply factor that we're trying to create should be a denorm 9627# for the multiply to work. Therefore, we're going to actually do a 9628# multiply with a denorm which will cause an unimplemented data type 9629# exception to be put into the machine which will be caught and corrected 9630# later. we don't do this with the DENORMs above because this method 9631# is slower. but, don't fret, I don't see it being used much either. 9632 fmov.l (%sp)+,%fpcr # restore user fpcr 9633 mov.l &0x80000000,%d1 # load normalized mantissa 9634 subi.l &-0x3fff,%d0 # how many should we shift? 9635 neg.l %d0 # make it positive 9636 cmpi.b %d0,&0x20 # is it > 32? 9637 bge.b sok_dnrm_32 # yes 9638 lsr.l %d0,%d1 # no; bit stays in upper lw 9639 clr.l -(%sp) # insert zero low mantissa 9640 mov.l %d1,-(%sp) # insert new high mantissa 9641 clr.l -(%sp) # make zero exponent 9642 bra.b sok_norm_cont 9643sok_dnrm_32: 9644 subi.b &0x20,%d0 # get shift count 9645 lsr.l %d0,%d1 # make low mantissa longword 9646 mov.l %d1,-(%sp) # insert new low mantissa 9647 clr.l -(%sp) # insert zero high mantissa 9648 clr.l -(%sp) # make zero exponent 9649 bra.b sok_norm_cont 9650 9651# the src will force the dst to a DENORM value or worse. so, let's 9652# create an fp multiply that will create the result. 9653sok_norm: 9654 fmovm.x DST(%a1),&0x80 # load fp0 with normalized src 9655sok_norm2: 9656 fmov.l (%sp)+,%fpcr # restore user fpcr 9657 9658 addi.w &0x3fff,%d0 # turn src amt into exp value 9659 swap %d0 # put exponent in high word 9660 clr.l -(%sp) # insert new exponent 9661 mov.l &0x80000000,-(%sp) # insert new high mantissa 9662 mov.l %d0,-(%sp) # insert new lo mantissa 9663 9664sok_norm_cont: 9665 fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2 9666 mov.b &FMUL_OP,%d1 # last inst is MUL 9667 fmul.x (%sp)+,%fp0 # do the multiply 9668 bra t_catch2 # catch any exceptions 9669 9670# 9671# Source is outside of 2^14 range. Test the sign and branch 9672# to the appropriate exception handler. 9673# 9674src_out: 9675 mov.l (%sp)+,%d0 # restore ctrl bits 9676 exg %a0,%a1 # swap src,dst ptrs 9677 tst.b SRC_EX(%a1) # is src negative? 9678 bmi t_unfl # yes; underflow 9679 bra t_ovfl_sc # no; overflow 9680 9681# 9682# The source input is below 1, so we check for denormalized numbers 9683# and set unfl. 9684# 9685src_small: 9686 tst.b DST_HI(%a1) # is dst denormalized? 9687 bpl.b ssmall_done # yes 9688 9689 mov.l (%sp)+,%d0 9690 fmov.l %d0,%fpcr # no; load control bits 9691 mov.b &FMOV_OP,%d1 # last inst is MOVE 9692 fmov.x DST(%a1),%fp0 # simply return dest 9693 bra t_catch2 9694ssmall_done: 9695 mov.l (%sp)+,%d0 # load control bits into d1 9696 mov.l %a1,%a0 # pass ptr to dst 9697 bra t_resdnrm 9698 9699######################################################################### 9700# smod(): computes the fp MOD of the input values X,Y. # 9701# srem(): computes the fp (IEEE) REM of the input values X,Y. # 9702# # 9703# INPUT *************************************************************** # 9704# a0 = pointer to extended precision input X # 9705# a1 = pointer to extended precision input Y # 9706# d0 = round precision,mode # 9707# # 9708# The input operands X and Y can be either normalized or # 9709# denormalized. # 9710# # 9711# OUTPUT ************************************************************** # 9712# fp0 = FREM(X,Y) or FMOD(X,Y) # 9713# # 9714# ALGORITHM *********************************************************** # 9715# # 9716# Step 1. Save and strip signs of X and Y: signX := sign(X), # 9717# signY := sign(Y), X := |X|, Y := |Y|, # 9718# signQ := signX EOR signY. Record whether MOD or REM # 9719# is requested. # 9720# # 9721# Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. # 9722# If (L < 0) then # 9723# R := X, go to Step 4. # 9724# else # 9725# R := 2^(-L)X, j := L. # 9726# endif # 9727# # 9728# Step 3. Perform MOD(X,Y) # 9729# 3.1 If R = Y, go to Step 9. # 9730# 3.2 If R > Y, then { R := R - Y, Q := Q + 1} # 9731# 3.3 If j = 0, go to Step 4. # 9732# 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to # 9733# Step 3.1. # 9734# # 9735# Step 4. At this point, R = X - QY = MOD(X,Y). Set # 9736# Last_Subtract := false (used in Step 7 below). If # 9737# MOD is requested, go to Step 6. # 9738# # 9739# Step 5. R = MOD(X,Y), but REM(X,Y) is requested. # 9740# 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to # 9741# Step 6. # 9742# 5.2 If R > Y/2, then { set Last_Subtract := true, # 9743# Q := Q + 1, Y := signY*Y }. Go to Step 6. # 9744# 5.3 This is the tricky case of R = Y/2. If Q is odd, # 9745# then { Q := Q + 1, signX := -signX }. # 9746# # 9747# Step 6. R := signX*R. # 9748# # 9749# Step 7. If Last_Subtract = true, R := R - Y. # 9750# # 9751# Step 8. Return signQ, last 7 bits of Q, and R as required. # 9752# # 9753# Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, # 9754# X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), # 9755# R := 0. Return signQ, last 7 bits of Q, and R. # 9756# # 9757######################################################################### 9758 9759 set Mod_Flag,L_SCR3 9760 set Sc_Flag,L_SCR3+1 9761 9762 set SignY,L_SCR2 9763 set SignX,L_SCR2+2 9764 set SignQ,L_SCR3+2 9765 9766 set Y,FP_SCR0 9767 set Y_Hi,Y+4 9768 set Y_Lo,Y+8 9769 9770 set R,FP_SCR1 9771 set R_Hi,R+4 9772 set R_Lo,R+8 9773 9774Scale: 9775 long 0x00010000,0x80000000,0x00000000,0x00000000 9776 9777 global smod 9778smod: 9779 clr.b FPSR_QBYTE(%a6) 9780 mov.l %d0,-(%sp) # save ctrl bits 9781 clr.b Mod_Flag(%a6) 9782 bra.b Mod_Rem 9783 9784 global srem 9785srem: 9786 clr.b FPSR_QBYTE(%a6) 9787 mov.l %d0,-(%sp) # save ctrl bits 9788 mov.b &0x1,Mod_Flag(%a6) 9789 9790Mod_Rem: 9791#..Save sign of X and Y 9792 movm.l &0x3f00,-(%sp) # save data registers 9793 mov.w SRC_EX(%a0),%d3 9794 mov.w %d3,SignY(%a6) 9795 and.l &0x00007FFF,%d3 # Y := |Y| 9796 9797# 9798 mov.l SRC_HI(%a0),%d4 9799 mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y| 9800 9801 tst.l %d3 9802 bne.b Y_Normal 9803 9804 mov.l &0x00003FFE,%d3 # $3FFD + 1 9805 tst.l %d4 9806 bne.b HiY_not0 9807 9808HiY_0: 9809 mov.l %d5,%d4 9810 clr.l %d5 9811 sub.l &32,%d3 9812 clr.l %d6 9813 bfffo %d4{&0:&32},%d6 9814 lsl.l %d6,%d4 9815 sub.l %d6,%d3 # (D3,D4,D5) is normalized 9816# ...with bias $7FFD 9817 bra.b Chk_X 9818 9819HiY_not0: 9820 clr.l %d6 9821 bfffo %d4{&0:&32},%d6 9822 sub.l %d6,%d3 9823 lsl.l %d6,%d4 9824 mov.l %d5,%d7 # a copy of D5 9825 lsl.l %d6,%d5 9826 neg.l %d6 9827 add.l &32,%d6 9828 lsr.l %d6,%d7 9829 or.l %d7,%d4 # (D3,D4,D5) normalized 9830# ...with bias $7FFD 9831 bra.b Chk_X 9832 9833Y_Normal: 9834 add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized 9835# ...with bias $7FFD 9836 9837Chk_X: 9838 mov.w DST_EX(%a1),%d0 9839 mov.w %d0,SignX(%a6) 9840 mov.w SignY(%a6),%d1 9841 eor.l %d0,%d1 9842 and.l &0x00008000,%d1 9843 mov.w %d1,SignQ(%a6) # sign(Q) obtained 9844 and.l &0x00007FFF,%d0 9845 mov.l DST_HI(%a1),%d1 9846 mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X| 9847 tst.l %d0 9848 bne.b X_Normal 9849 mov.l &0x00003FFE,%d0 9850 tst.l %d1 9851 bne.b HiX_not0 9852 9853HiX_0: 9854 mov.l %d2,%d1 9855 clr.l %d2 9856 sub.l &32,%d0 9857 clr.l %d6 9858 bfffo %d1{&0:&32},%d6 9859 lsl.l %d6,%d1 9860 sub.l %d6,%d0 # (D0,D1,D2) is normalized 9861# ...with bias $7FFD 9862 bra.b Init 9863 9864HiX_not0: 9865 clr.l %d6 9866 bfffo %d1{&0:&32},%d6 9867 sub.l %d6,%d0 9868 lsl.l %d6,%d1 9869 mov.l %d2,%d7 # a copy of D2 9870 lsl.l %d6,%d2 9871 neg.l %d6 9872 add.l &32,%d6 9873 lsr.l %d6,%d7 9874 or.l %d7,%d1 # (D0,D1,D2) normalized 9875# ...with bias $7FFD 9876 bra.b Init 9877 9878X_Normal: 9879 add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized 9880# ...with bias $7FFD 9881 9882Init: 9883# 9884 mov.l %d3,L_SCR1(%a6) # save biased exp(Y) 9885 mov.l %d0,-(%sp) # save biased exp(X) 9886 sub.l %d3,%d0 # L := expo(X)-expo(Y) 9887 9888 clr.l %d6 # D6 := carry <- 0 9889 clr.l %d3 # D3 is Q 9890 mov.l &0,%a1 # A1 is k; j+k=L, Q=0 9891 9892#..(Carry,D1,D2) is R 9893 tst.l %d0 9894 bge.b Mod_Loop_pre 9895 9896#..expo(X) < expo(Y). Thus X = mod(X,Y) 9897# 9898 mov.l (%sp)+,%d0 # restore d0 9899 bra.w Get_Mod 9900 9901Mod_Loop_pre: 9902 addq.l &0x4,%sp # erase exp(X) 9903#..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L 9904Mod_Loop: 9905 tst.l %d6 # test carry bit 9906 bgt.b R_GT_Y 9907 9908#..At this point carry = 0, R = (D1,D2), Y = (D4,D5) 9909 cmp.l %d1,%d4 # compare hi(R) and hi(Y) 9910 bne.b R_NE_Y 9911 cmp.l %d2,%d5 # compare lo(R) and lo(Y) 9912 bne.b R_NE_Y 9913 9914#..At this point, R = Y 9915 bra.w Rem_is_0 9916 9917R_NE_Y: 9918#..use the borrow of the previous compare 9919 bcs.b R_LT_Y # borrow is set iff R < Y 9920 9921R_GT_Y: 9922#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0 9923#..and Y < (D1,D2) < 2Y. Either way, perform R - Y 9924 sub.l %d5,%d2 # lo(R) - lo(Y) 9925 subx.l %d4,%d1 # hi(R) - hi(Y) 9926 clr.l %d6 # clear carry 9927 addq.l &1,%d3 # Q := Q + 1 9928 9929R_LT_Y: 9930#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0. 9931 tst.l %d0 # see if j = 0. 9932 beq.b PostLoop 9933 9934 add.l %d3,%d3 # Q := 2Q 9935 add.l %d2,%d2 # lo(R) = 2lo(R) 9936 roxl.l &1,%d1 # hi(R) = 2hi(R) + carry 9937 scs %d6 # set Carry if 2(R) overflows 9938 addq.l &1,%a1 # k := k+1 9939 subq.l &1,%d0 # j := j - 1 9940#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y. 9941 9942 bra.b Mod_Loop 9943 9944PostLoop: 9945#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y. 9946 9947#..normalize R. 9948 mov.l L_SCR1(%a6),%d0 # new biased expo of R 9949 tst.l %d1 9950 bne.b HiR_not0 9951 9952HiR_0: 9953 mov.l %d2,%d1 9954 clr.l %d2 9955 sub.l &32,%d0 9956 clr.l %d6 9957 bfffo %d1{&0:&32},%d6 9958 lsl.l %d6,%d1 9959 sub.l %d6,%d0 # (D0,D1,D2) is normalized 9960# ...with bias $7FFD 9961 bra.b Get_Mod 9962 9963HiR_not0: 9964 clr.l %d6 9965 bfffo %d1{&0:&32},%d6 9966 bmi.b Get_Mod # already normalized 9967 sub.l %d6,%d0 9968 lsl.l %d6,%d1 9969 mov.l %d2,%d7 # a copy of D2 9970 lsl.l %d6,%d2 9971 neg.l %d6 9972 add.l &32,%d6 9973 lsr.l %d6,%d7 9974 or.l %d7,%d1 # (D0,D1,D2) normalized 9975 9976# 9977Get_Mod: 9978 cmp.l %d0,&0x000041FE 9979 bge.b No_Scale 9980Do_Scale: 9981 mov.w %d0,R(%a6) 9982 mov.l %d1,R_Hi(%a6) 9983 mov.l %d2,R_Lo(%a6) 9984 mov.l L_SCR1(%a6),%d6 9985 mov.w %d6,Y(%a6) 9986 mov.l %d4,Y_Hi(%a6) 9987 mov.l %d5,Y_Lo(%a6) 9988 fmov.x R(%a6),%fp0 # no exception 9989 mov.b &1,Sc_Flag(%a6) 9990 bra.b ModOrRem 9991No_Scale: 9992 mov.l %d1,R_Hi(%a6) 9993 mov.l %d2,R_Lo(%a6) 9994 sub.l &0x3FFE,%d0 9995 mov.w %d0,R(%a6) 9996 mov.l L_SCR1(%a6),%d6 9997 sub.l &0x3FFE,%d6 9998 mov.l %d6,L_SCR1(%a6) 9999 fmov.x R(%a6),%fp0 10000 mov.w %d6,Y(%a6) 10001 mov.l %d4,Y_Hi(%a6) 10002 mov.l %d5,Y_Lo(%a6) 10003 clr.b Sc_Flag(%a6) 10004 10005# 10006ModOrRem: 10007 tst.b Mod_Flag(%a6) 10008 beq.b Fix_Sign 10009 10010 mov.l L_SCR1(%a6),%d6 # new biased expo(Y) 10011 subq.l &1,%d6 # biased expo(Y/2) 10012 cmp.l %d0,%d6 10013 blt.b Fix_Sign 10014 bgt.b Last_Sub 10015 10016 cmp.l %d1,%d4 10017 bne.b Not_EQ 10018 cmp.l %d2,%d5 10019 bne.b Not_EQ 10020 bra.w Tie_Case 10021 10022Not_EQ: 10023 bcs.b Fix_Sign 10024 10025Last_Sub: 10026# 10027 fsub.x Y(%a6),%fp0 # no exceptions 10028 addq.l &1,%d3 # Q := Q + 1 10029 10030# 10031Fix_Sign: 10032#..Get sign of X 10033 mov.w SignX(%a6),%d6 10034 bge.b Get_Q 10035 fneg.x %fp0 10036 10037#..Get Q 10038# 10039Get_Q: 10040 clr.l %d6 10041 mov.w SignQ(%a6),%d6 # D6 is sign(Q) 10042 mov.l &8,%d7 10043 lsr.l %d7,%d6 10044 and.l &0x0000007F,%d3 # 7 bits of Q 10045 or.l %d6,%d3 # sign and bits of Q 10046# swap %d3 10047# fmov.l %fpsr,%d6 10048# and.l &0xFF00FFFF,%d6 10049# or.l %d3,%d6 10050# fmov.l %d6,%fpsr # put Q in fpsr 10051 mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr 10052 10053# 10054Restore: 10055 movm.l (%sp)+,&0xfc # {%d2-%d7} 10056 mov.l (%sp)+,%d0 10057 fmov.l %d0,%fpcr 10058 tst.b Sc_Flag(%a6) 10059 beq.b Finish 10060 mov.b &FMUL_OP,%d1 # last inst is MUL 10061 fmul.x Scale(%pc),%fp0 # may cause underflow 10062 bra t_catch2 10063# the '040 package did this apparently to see if the dst operand for the 10064# preceding fmul was a denorm. but, it better not have been since the 10065# algorithm just got done playing with fp0 and expected no exceptions 10066# as a result. trust me... 10067# bra t_avoid_unsupp # check for denorm as a 10068# ;result of the scaling 10069 10070Finish: 10071 mov.b &FMOV_OP,%d1 # last inst is MOVE 10072 fmov.x %fp0,%fp0 # capture exceptions & round 10073 bra t_catch2 10074 10075Rem_is_0: 10076#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1) 10077 addq.l &1,%d3 10078 cmp.l %d0,&8 # D0 is j 10079 bge.b Q_Big 10080 10081 lsl.l %d0,%d3 10082 bra.b Set_R_0 10083 10084Q_Big: 10085 clr.l %d3 10086 10087Set_R_0: 10088 fmov.s &0x00000000,%fp0 10089 clr.b Sc_Flag(%a6) 10090 bra.w Fix_Sign 10091 10092Tie_Case: 10093#..Check parity of Q 10094 mov.l %d3,%d6 10095 and.l &0x00000001,%d6 10096 tst.l %d6 10097 beq.w Fix_Sign # Q is even 10098 10099#..Q is odd, Q := Q + 1, signX := -signX 10100 addq.l &1,%d3 10101 mov.w SignX(%a6),%d6 10102 eor.l &0x00008000,%d6 10103 mov.w %d6,SignX(%a6) 10104 bra.w Fix_Sign 10105 10106qnan: long 0x7fff0000, 0xffffffff, 0xffffffff 10107 10108######################################################################### 10109# XDEF **************************************************************** # 10110# t_dz(): Handle DZ exception during transcendental emulation. # 10111# Sets N bit according to sign of source operand. # 10112# t_dz2(): Handle DZ exception during transcendental emulation. # 10113# Sets N bit always. # 10114# # 10115# XREF **************************************************************** # 10116# None # 10117# # 10118# INPUT *************************************************************** # 10119# a0 = pointer to source operand # 10120# # 10121# OUTPUT ************************************************************** # 10122# fp0 = default result # 10123# # 10124# ALGORITHM *********************************************************** # 10125# - Store properly signed INF into fp0. # 10126# - Set FPSR exception status dz bit, ccode inf bit, and # 10127# accrued dz bit. # 10128# # 10129######################################################################### 10130 10131 global t_dz 10132t_dz: 10133 tst.b SRC_EX(%a0) # no; is src negative? 10134 bmi.b t_dz2 # yes 10135 10136dz_pinf: 10137 fmov.s &0x7f800000,%fp0 # return +INF in fp0 10138 ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ 10139 rts 10140 10141 global t_dz2 10142t_dz2: 10143 fmov.s &0xff800000,%fp0 # return -INF in fp0 10144 ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ 10145 rts 10146 10147################################################################# 10148# OPERR exception: # 10149# - set FPSR exception status operr bit, condition code # 10150# nan bit; Store default NAN into fp0 # 10151################################################################# 10152 global t_operr 10153t_operr: 10154 ori.l &opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP 10155 fmovm.x qnan(%pc),&0x80 # return default NAN in fp0 10156 rts 10157 10158################################################################# 10159# Extended DENORM: # 10160# - For all functions that have a denormalized input and # 10161# that f(x)=x, this is the entry point. # 10162# - we only return the EXOP here if either underflow or # 10163# inexact is enabled. # 10164################################################################# 10165 10166# Entry point for scale w/ extended denorm. The function does 10167# NOT set INEX2/AUNFL/AINEX. 10168 global t_resdnrm 10169t_resdnrm: 10170 ori.l &unfl_mask,USER_FPSR(%a6) # set UNFL 10171 bra.b xdnrm_con 10172 10173 global t_extdnrm 10174t_extdnrm: 10175 ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX 10176 10177xdnrm_con: 10178 mov.l %a0,%a1 # make copy of src ptr 10179 mov.l %d0,%d1 # make copy of rnd prec,mode 10180 andi.b &0xc0,%d1 # extended precision? 10181 bne.b xdnrm_sd # no 10182 10183# result precision is extended. 10184 tst.b LOCAL_EX(%a0) # is denorm negative? 10185 bpl.b xdnrm_exit # no 10186 10187 bset &neg_bit,FPSR_CC(%a6) # yes; set 'N' ccode bit 10188 bra.b xdnrm_exit 10189 10190# result precision is single or double 10191xdnrm_sd: 10192 mov.l %a1,-(%sp) 10193 tst.b LOCAL_EX(%a0) # is denorm pos or neg? 10194 smi.b %d1 # set d0 accordingly 10195 bsr.l unf_sub 10196 mov.l (%sp)+,%a1 10197xdnrm_exit: 10198 fmovm.x (%a0),&0x80 # return default result in fp0 10199 10200 mov.b FPCR_ENABLE(%a6),%d0 10201 andi.b &0x0a,%d0 # is UNFL or INEX enabled? 10202 bne.b xdnrm_ena # yes 10203 rts 10204 10205################ 10206# unfl enabled # 10207################ 10208# we have a DENORM that needs to be converted into an EXOP. 10209# so, normalize the mantissa, add 0x6000 to the new exponent, 10210# and return the result in fp1. 10211xdnrm_ena: 10212 mov.w LOCAL_EX(%a1),FP_SCR0_EX(%a6) 10213 mov.l LOCAL_HI(%a1),FP_SCR0_HI(%a6) 10214 mov.l LOCAL_LO(%a1),FP_SCR0_LO(%a6) 10215 10216 lea FP_SCR0(%a6),%a0 10217 bsr.l norm # normalize mantissa 10218 addi.l &0x6000,%d0 # add extra bias 10219 andi.w &0x8000,FP_SCR0_EX(%a6) # keep old sign 10220 or.w %d0,FP_SCR0_EX(%a6) # insert new exponent 10221 10222 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10223 rts 10224 10225################################################################# 10226# UNFL exception: # 10227# - This routine is for cases where even an EXOP isn't # 10228# large enough to hold the range of this result. # 10229# In such a case, the EXOP equals zero. # 10230# - Return the default result to the proper precision # 10231# with the sign of this result being the same as that # 10232# of the src operand. # 10233# - t_unfl2() is provided to force the result sign to # 10234# positive which is the desired result for fetox(). # 10235################################################################# 10236 global t_unfl 10237t_unfl: 10238 ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX 10239 10240 tst.b (%a0) # is result pos or neg? 10241 smi.b %d1 # set d1 accordingly 10242 bsr.l unf_sub # calc default unfl result 10243 fmovm.x (%a0),&0x80 # return default result in fp0 10244 10245 fmov.s &0x00000000,%fp1 # return EXOP in fp1 10246 rts 10247 10248# t_unfl2 ALWAYS tells unf_sub to create a positive result 10249 global t_unfl2 10250t_unfl2: 10251 ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX 10252 10253 sf.b %d1 # set d0 to represent positive 10254 bsr.l unf_sub # calc default unfl result 10255 fmovm.x (%a0),&0x80 # return default result in fp0 10256 10257 fmov.s &0x0000000,%fp1 # return EXOP in fp1 10258 rts 10259 10260################################################################# 10261# OVFL exception: # 10262# - This routine is for cases where even an EXOP isn't # 10263# large enough to hold the range of this result. # 10264# - Return the default result to the proper precision # 10265# with the sign of this result being the same as that # 10266# of the src operand. # 10267# - t_ovfl2() is provided to force the result sign to # 10268# positive which is the desired result for fcosh(). # 10269# - t_ovfl_sc() is provided for scale() which only sets # 10270# the inexact bits if the number is inexact for the # 10271# precision indicated. # 10272################################################################# 10273 10274 global t_ovfl_sc 10275t_ovfl_sc: 10276 ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX 10277 10278 mov.b %d0,%d1 # fetch rnd mode/prec 10279 andi.b &0xc0,%d1 # extract rnd prec 10280 beq.b ovfl_work # prec is extended 10281 10282 tst.b LOCAL_HI(%a0) # is dst a DENORM? 10283 bmi.b ovfl_sc_norm # no 10284 10285# dst op is a DENORM. we have to normalize the mantissa to see if the 10286# result would be inexact for the given precision. make a copy of the 10287# dst so we don't screw up the version passed to us. 10288 mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6) 10289 mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6) 10290 mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6) 10291 lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0 10292 movm.l &0xc080,-(%sp) # save d0-d1/a0 10293 bsr.l norm # normalize mantissa 10294 movm.l (%sp)+,&0x0103 # restore d0-d1/a0 10295 10296ovfl_sc_norm: 10297 cmpi.b %d1,&0x40 # is prec dbl? 10298 bne.b ovfl_sc_dbl # no; sgl 10299ovfl_sc_sgl: 10300 tst.l LOCAL_LO(%a0) # is lo lw of sgl set? 10301 bne.b ovfl_sc_inx # yes 10302 tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set? 10303 bne.b ovfl_sc_inx # yes 10304 bra.b ovfl_work # don't set INEX2 10305ovfl_sc_dbl: 10306 mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of 10307 andi.l &0x7ff,%d1 # dbl mantissa set? 10308 beq.b ovfl_work # no; don't set INEX2 10309ovfl_sc_inx: 10310 ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2 10311 bra.b ovfl_work # continue 10312 10313 global t_ovfl 10314t_ovfl: 10315 ori.l &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX 10316 10317ovfl_work: 10318 tst.b LOCAL_EX(%a0) # what is the sign? 10319 smi.b %d1 # set d1 accordingly 10320 bsr.l ovf_res # calc default ovfl result 10321 mov.b %d0,FPSR_CC(%a6) # insert new ccodes 10322 fmovm.x (%a0),&0x80 # return default result in fp0 10323 10324 fmov.s &0x00000000,%fp1 # return EXOP in fp1 10325 rts 10326 10327# t_ovfl2 ALWAYS tells ovf_res to create a positive result 10328 global t_ovfl2 10329t_ovfl2: 10330 ori.l &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX 10331 10332 sf.b %d1 # clear sign flag for positive 10333 bsr.l ovf_res # calc default ovfl result 10334 mov.b %d0,FPSR_CC(%a6) # insert new ccodes 10335 fmovm.x (%a0),&0x80 # return default result in fp0 10336 10337 fmov.s &0x00000000,%fp1 # return EXOP in fp1 10338 rts 10339 10340################################################################# 10341# t_catch(): # 10342# - the last operation of a transcendental emulation # 10343# routine may have caused an underflow or overflow. # 10344# we find out if this occurred by doing an fsave and # 10345# checking the exception bit. if one did occur, then we # 10346# jump to fgen_except() which creates the default # 10347# result and EXOP for us. # 10348################################################################# 10349 global t_catch 10350t_catch: 10351 10352 fsave -(%sp) 10353 tst.b 0x2(%sp) 10354 bmi.b catch 10355 add.l &0xc,%sp 10356 10357################################################################# 10358# INEX2 exception: # 10359# - The inex2 and ainex bits are set. # 10360################################################################# 10361 global t_inx2 10362t_inx2: 10363 fblt.w t_minx2 10364 fbeq.w inx2_zero 10365 10366 global t_pinx2 10367t_pinx2: 10368 ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX 10369 rts 10370 10371 global t_minx2 10372t_minx2: 10373 ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX 10374 rts 10375 10376inx2_zero: 10377 mov.b &z_bmask,FPSR_CC(%a6) 10378 ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX 10379 rts 10380 10381# an underflow or overflow exception occurred. 10382# we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not! 10383catch: 10384 ori.w &inx2a_mask,FPSR_EXCEPT(%a6) 10385catch2: 10386 bsr.l fgen_except 10387 add.l &0xc,%sp 10388 rts 10389 10390 global t_catch2 10391t_catch2: 10392 10393 fsave -(%sp) 10394 10395 tst.b 0x2(%sp) 10396 bmi.b catch2 10397 add.l &0xc,%sp 10398 10399 fmov.l %fpsr,%d0 10400 or.l %d0,USER_FPSR(%a6) 10401 10402 rts 10403 10404######################################################################### 10405 10406######################################################################### 10407# unf_res(): underflow default result calculation for transcendentals # 10408# # 10409# INPUT: # 10410# d0 : rnd mode,precision # 10411# d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+)) # 10412# OUTPUT: # 10413# a0 : points to result (in instruction memory) # 10414######################################################################### 10415unf_sub: 10416 ori.l &unfinx_mask,USER_FPSR(%a6) 10417 10418 andi.w &0x10,%d1 # keep sign bit in 4th spot 10419 10420 lsr.b &0x4,%d0 # shift rnd prec,mode to lo bits 10421 andi.b &0xf,%d0 # strip hi rnd mode bit 10422 or.b %d1,%d0 # concat {sgn,mode,prec} 10423 10424 mov.l %d0,%d1 # make a copy 10425 lsl.b &0x1,%d1 # mult index 2 by 2 10426 10427 mov.b (tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits 10428 lea (tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr 10429 rts 10430 10431tbl_unf_cc: 10432 byte 0x4, 0x4, 0x4, 0x0 10433 byte 0x4, 0x4, 0x4, 0x0 10434 byte 0x4, 0x4, 0x4, 0x0 10435 byte 0x0, 0x0, 0x0, 0x0 10436 byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4 10437 byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4 10438 byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4 10439 10440tbl_unf_result: 10441 long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext 10442 long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext 10443 long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext 10444 long 0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext 10445 10446 long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl 10447 long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl 10448 long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl 10449 long 0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl 10450 10451 long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl 10452 long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl 10453 long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl 10454 long 0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl 10455 10456 long 0x0,0x0,0x0,0x0 10457 long 0x0,0x0,0x0,0x0 10458 long 0x0,0x0,0x0,0x0 10459 long 0x0,0x0,0x0,0x0 10460 10461 long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext 10462 long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext 10463 long 0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext 10464 long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext 10465 10466 long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl 10467 long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl 10468 long 0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl 10469 long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl 10470 10471 long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl 10472 long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl 10473 long 0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl 10474 long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl 10475 10476############################################################ 10477 10478######################################################################### 10479# src_zero(): Return signed zero according to sign of src operand. # 10480######################################################################### 10481 global src_zero 10482src_zero: 10483 tst.b SRC_EX(%a0) # get sign of src operand 10484 bmi.b ld_mzero # if neg, load neg zero 10485 10486# 10487# ld_pzero(): return a positive zero. 10488# 10489 global ld_pzero 10490ld_pzero: 10491 fmov.s &0x00000000,%fp0 # load +0 10492 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 10493 rts 10494 10495# ld_mzero(): return a negative zero. 10496 global ld_mzero 10497ld_mzero: 10498 fmov.s &0x80000000,%fp0 # load -0 10499 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits 10500 rts 10501 10502######################################################################### 10503# dst_zero(): Return signed zero according to sign of dst operand. # 10504######################################################################### 10505 global dst_zero 10506dst_zero: 10507 tst.b DST_EX(%a1) # get sign of dst operand 10508 bmi.b ld_mzero # if neg, load neg zero 10509 bra.b ld_pzero # load positive zero 10510 10511######################################################################### 10512# src_inf(): Return signed inf according to sign of src operand. # 10513######################################################################### 10514 global src_inf 10515src_inf: 10516 tst.b SRC_EX(%a0) # get sign of src operand 10517 bmi.b ld_minf # if negative branch 10518 10519# 10520# ld_pinf(): return a positive infinity. 10521# 10522 global ld_pinf 10523ld_pinf: 10524 fmov.s &0x7f800000,%fp0 # load +INF 10525 mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit 10526 rts 10527 10528# 10529# ld_minf():return a negative infinity. 10530# 10531 global ld_minf 10532ld_minf: 10533 fmov.s &0xff800000,%fp0 # load -INF 10534 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits 10535 rts 10536 10537######################################################################### 10538# dst_inf(): Return signed inf according to sign of dst operand. # 10539######################################################################### 10540 global dst_inf 10541dst_inf: 10542 tst.b DST_EX(%a1) # get sign of dst operand 10543 bmi.b ld_minf # if negative branch 10544 bra.b ld_pinf 10545 10546 global szr_inf 10547################################################################# 10548# szr_inf(): Return +ZERO for a negative src operand or # 10549# +INF for a positive src operand. # 10550# Routine used for fetox, ftwotox, and ftentox. # 10551################################################################# 10552szr_inf: 10553 tst.b SRC_EX(%a0) # check sign of source 10554 bmi.b ld_pzero 10555 bra.b ld_pinf 10556 10557######################################################################### 10558# sopr_inf(): Return +INF for a positive src operand or # 10559# jump to operand error routine for a negative src operand. # 10560# Routine used for flogn, flognp1, flog10, and flog2. # 10561######################################################################### 10562 global sopr_inf 10563sopr_inf: 10564 tst.b SRC_EX(%a0) # check sign of source 10565 bmi.w t_operr 10566 bra.b ld_pinf 10567 10568################################################################# 10569# setoxm1i(): Return minus one for a negative src operand or # 10570# positive infinity for a positive src operand. # 10571# Routine used for fetoxm1. # 10572################################################################# 10573 global setoxm1i 10574setoxm1i: 10575 tst.b SRC_EX(%a0) # check sign of source 10576 bmi.b ld_mone 10577 bra.b ld_pinf 10578 10579######################################################################### 10580# src_one(): Return signed one according to sign of src operand. # 10581######################################################################### 10582 global src_one 10583src_one: 10584 tst.b SRC_EX(%a0) # check sign of source 10585 bmi.b ld_mone 10586 10587# 10588# ld_pone(): return positive one. 10589# 10590 global ld_pone 10591ld_pone: 10592 fmov.s &0x3f800000,%fp0 # load +1 10593 clr.b FPSR_CC(%a6) 10594 rts 10595 10596# 10597# ld_mone(): return negative one. 10598# 10599 global ld_mone 10600ld_mone: 10601 fmov.s &0xbf800000,%fp0 # load -1 10602 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 10603 rts 10604 10605ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235 10606mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235 10607 10608################################################################# 10609# spi_2(): Return signed PI/2 according to sign of src operand. # 10610################################################################# 10611 global spi_2 10612spi_2: 10613 tst.b SRC_EX(%a0) # check sign of source 10614 bmi.b ld_mpi2 10615 10616# 10617# ld_ppi2(): return positive PI/2. 10618# 10619 global ld_ppi2 10620ld_ppi2: 10621 fmov.l %d0,%fpcr 10622 fmov.x ppiby2(%pc),%fp0 # load +pi/2 10623 bra.w t_pinx2 # set INEX2 10624 10625# 10626# ld_mpi2(): return negative PI/2. 10627# 10628 global ld_mpi2 10629ld_mpi2: 10630 fmov.l %d0,%fpcr 10631 fmov.x mpiby2(%pc),%fp0 # load -pi/2 10632 bra.w t_minx2 # set INEX2 10633 10634#################################################### 10635# The following routines give support for fsincos. # 10636#################################################### 10637 10638# 10639# ssincosz(): When the src operand is ZERO, store a one in the 10640# cosine register and return a ZERO in fp0 w/ the same sign 10641# as the src operand. 10642# 10643 global ssincosz 10644ssincosz: 10645 fmov.s &0x3f800000,%fp1 10646 tst.b SRC_EX(%a0) # test sign 10647 bpl.b sincoszp 10648 fmov.s &0x80000000,%fp0 # return sin result in fp0 10649 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) 10650 bra.b sto_cos # store cosine result 10651sincoszp: 10652 fmov.s &0x00000000,%fp0 # return sin result in fp0 10653 mov.b &z_bmask,FPSR_CC(%a6) 10654 bra.b sto_cos # store cosine result 10655 10656# 10657# ssincosi(): When the src operand is INF, store a QNAN in the cosine 10658# register and jump to the operand error routine for negative 10659# src operands. 10660# 10661 global ssincosi 10662ssincosi: 10663 fmov.x qnan(%pc),%fp1 # load NAN 10664 bsr.l sto_cos # store cosine result 10665 bra.w t_operr 10666 10667# 10668# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine 10669# register and branch to the src QNAN routine. 10670# 10671 global ssincosqnan 10672ssincosqnan: 10673 fmov.x LOCAL_EX(%a0),%fp1 10674 bsr.l sto_cos 10675 bra.w src_qnan 10676 10677# 10678# ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set 10679# in the cosine register and branch to the src SNAN routine. 10680# 10681 global ssincossnan 10682ssincossnan: 10683 fmov.x LOCAL_EX(%a0),%fp1 10684 bsr.l sto_cos 10685 bra.w src_snan 10686 10687######################################################################## 10688 10689######################################################################### 10690# sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field. # 10691# fp1 holds the result of the cosine portion of ssincos(). # 10692# the value in fp1 will not take any exceptions when moved. # 10693# INPUT: # 10694# fp1 : fp value to store # 10695# MODIFIED: # 10696# d0 # 10697######################################################################### 10698 global sto_cos 10699sto_cos: 10700 mov.b 1+EXC_CMDREG(%a6),%d0 10701 andi.w &0x7,%d0 10702 mov.w (tbl_sto_cos.b,%pc,%d0.w*2),%d0 10703 jmp (tbl_sto_cos.b,%pc,%d0.w*1) 10704 10705tbl_sto_cos: 10706 short sto_cos_0 - tbl_sto_cos 10707 short sto_cos_1 - tbl_sto_cos 10708 short sto_cos_2 - tbl_sto_cos 10709 short sto_cos_3 - tbl_sto_cos 10710 short sto_cos_4 - tbl_sto_cos 10711 short sto_cos_5 - tbl_sto_cos 10712 short sto_cos_6 - tbl_sto_cos 10713 short sto_cos_7 - tbl_sto_cos 10714 10715sto_cos_0: 10716 fmovm.x &0x40,EXC_FP0(%a6) 10717 rts 10718sto_cos_1: 10719 fmovm.x &0x40,EXC_FP1(%a6) 10720 rts 10721sto_cos_2: 10722 fmov.x %fp1,%fp2 10723 rts 10724sto_cos_3: 10725 fmov.x %fp1,%fp3 10726 rts 10727sto_cos_4: 10728 fmov.x %fp1,%fp4 10729 rts 10730sto_cos_5: 10731 fmov.x %fp1,%fp5 10732 rts 10733sto_cos_6: 10734 fmov.x %fp1,%fp6 10735 rts 10736sto_cos_7: 10737 fmov.x %fp1,%fp7 10738 rts 10739 10740################################################################## 10741 global smod_sdnrm 10742 global smod_snorm 10743smod_sdnrm: 10744smod_snorm: 10745 mov.b DTAG(%a6),%d1 10746 beq.l smod 10747 cmpi.b %d1,&ZERO 10748 beq.w smod_zro 10749 cmpi.b %d1,&INF 10750 beq.l t_operr 10751 cmpi.b %d1,&DENORM 10752 beq.l smod 10753 cmpi.b %d1,&SNAN 10754 beq.l dst_snan 10755 bra.l dst_qnan 10756 10757 global smod_szero 10758smod_szero: 10759 mov.b DTAG(%a6),%d1 10760 beq.l t_operr 10761 cmpi.b %d1,&ZERO 10762 beq.l t_operr 10763 cmpi.b %d1,&INF 10764 beq.l t_operr 10765 cmpi.b %d1,&DENORM 10766 beq.l t_operr 10767 cmpi.b %d1,&QNAN 10768 beq.l dst_qnan 10769 bra.l dst_snan 10770 10771 global smod_sinf 10772smod_sinf: 10773 mov.b DTAG(%a6),%d1 10774 beq.l smod_fpn 10775 cmpi.b %d1,&ZERO 10776 beq.l smod_zro 10777 cmpi.b %d1,&INF 10778 beq.l t_operr 10779 cmpi.b %d1,&DENORM 10780 beq.l smod_fpn 10781 cmpi.b %d1,&QNAN 10782 beq.l dst_qnan 10783 bra.l dst_snan 10784 10785smod_zro: 10786srem_zro: 10787 mov.b SRC_EX(%a0),%d1 # get src sign 10788 mov.b DST_EX(%a1),%d0 # get dst sign 10789 eor.b %d0,%d1 # get qbyte sign 10790 andi.b &0x80,%d1 10791 mov.b %d1,FPSR_QBYTE(%a6) 10792 tst.b %d0 10793 bpl.w ld_pzero 10794 bra.w ld_mzero 10795 10796smod_fpn: 10797srem_fpn: 10798 clr.b FPSR_QBYTE(%a6) 10799 mov.l %d0,-(%sp) 10800 mov.b SRC_EX(%a0),%d1 # get src sign 10801 mov.b DST_EX(%a1),%d0 # get dst sign 10802 eor.b %d0,%d1 # get qbyte sign 10803 andi.b &0x80,%d1 10804 mov.b %d1,FPSR_QBYTE(%a6) 10805 cmpi.b DTAG(%a6),&DENORM 10806 bne.b smod_nrm 10807 lea DST(%a1),%a0 10808 mov.l (%sp)+,%d0 10809 bra t_resdnrm 10810smod_nrm: 10811 fmov.l (%sp)+,%fpcr 10812 fmov.x DST(%a1),%fp0 10813 tst.b DST_EX(%a1) 10814 bmi.b smod_nrm_neg 10815 rts 10816 10817smod_nrm_neg: 10818 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode 10819 rts 10820 10821######################################################################### 10822 global srem_snorm 10823 global srem_sdnrm 10824srem_sdnrm: 10825srem_snorm: 10826 mov.b DTAG(%a6),%d1 10827 beq.l srem 10828 cmpi.b %d1,&ZERO 10829 beq.w srem_zro 10830 cmpi.b %d1,&INF 10831 beq.l t_operr 10832 cmpi.b %d1,&DENORM 10833 beq.l srem 10834 cmpi.b %d1,&QNAN 10835 beq.l dst_qnan 10836 bra.l dst_snan 10837 10838 global srem_szero 10839srem_szero: 10840 mov.b DTAG(%a6),%d1 10841 beq.l t_operr 10842 cmpi.b %d1,&ZERO 10843 beq.l t_operr 10844 cmpi.b %d1,&INF 10845 beq.l t_operr 10846 cmpi.b %d1,&DENORM 10847 beq.l t_operr 10848 cmpi.b %d1,&QNAN 10849 beq.l dst_qnan 10850 bra.l dst_snan 10851 10852 global srem_sinf 10853srem_sinf: 10854 mov.b DTAG(%a6),%d1 10855 beq.w srem_fpn 10856 cmpi.b %d1,&ZERO 10857 beq.w srem_zro 10858 cmpi.b %d1,&INF 10859 beq.l t_operr 10860 cmpi.b %d1,&DENORM 10861 beq.l srem_fpn 10862 cmpi.b %d1,&QNAN 10863 beq.l dst_qnan 10864 bra.l dst_snan 10865 10866######################################################################### 10867 global sscale_snorm 10868 global sscale_sdnrm 10869sscale_snorm: 10870sscale_sdnrm: 10871 mov.b DTAG(%a6),%d1 10872 beq.l sscale 10873 cmpi.b %d1,&ZERO 10874 beq.l dst_zero 10875 cmpi.b %d1,&INF 10876 beq.l dst_inf 10877 cmpi.b %d1,&DENORM 10878 beq.l sscale 10879 cmpi.b %d1,&QNAN 10880 beq.l dst_qnan 10881 bra.l dst_snan 10882 10883 global sscale_szero 10884sscale_szero: 10885 mov.b DTAG(%a6),%d1 10886 beq.l sscale 10887 cmpi.b %d1,&ZERO 10888 beq.l dst_zero 10889 cmpi.b %d1,&INF 10890 beq.l dst_inf 10891 cmpi.b %d1,&DENORM 10892 beq.l sscale 10893 cmpi.b %d1,&QNAN 10894 beq.l dst_qnan 10895 bra.l dst_snan 10896 10897 global sscale_sinf 10898sscale_sinf: 10899 mov.b DTAG(%a6),%d1 10900 beq.l t_operr 10901 cmpi.b %d1,&QNAN 10902 beq.l dst_qnan 10903 cmpi.b %d1,&SNAN 10904 beq.l dst_snan 10905 bra.l t_operr 10906 10907######################################################################## 10908 10909# 10910# sop_sqnan(): The src op for frem/fmod/fscale was a QNAN. 10911# 10912 global sop_sqnan 10913sop_sqnan: 10914 mov.b DTAG(%a6),%d1 10915 cmpi.b %d1,&QNAN 10916 beq.b dst_qnan 10917 cmpi.b %d1,&SNAN 10918 beq.b dst_snan 10919 bra.b src_qnan 10920 10921# 10922# sop_ssnan(): The src op for frem/fmod/fscale was an SNAN. 10923# 10924 global sop_ssnan 10925sop_ssnan: 10926 mov.b DTAG(%a6),%d1 10927 cmpi.b %d1,&QNAN 10928 beq.b dst_qnan_src_snan 10929 cmpi.b %d1,&SNAN 10930 beq.b dst_snan 10931 bra.b src_snan 10932 10933dst_qnan_src_snan: 10934 ori.l &snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP 10935 bra.b dst_qnan 10936 10937# 10938# dst_qnan(): Return the dst SNAN w/ the SNAN bit set. 10939# 10940 global dst_snan 10941dst_snan: 10942 fmov.x DST(%a1),%fp0 # the fmove sets the SNAN bit 10943 fmov.l %fpsr,%d0 # catch resulting status 10944 or.l %d0,USER_FPSR(%a6) # store status 10945 rts 10946 10947# 10948# dst_qnan(): Return the dst QNAN. 10949# 10950 global dst_qnan 10951dst_qnan: 10952 fmov.x DST(%a1),%fp0 # return the non-signalling nan 10953 tst.b DST_EX(%a1) # set ccodes according to QNAN sign 10954 bmi.b dst_qnan_m 10955dst_qnan_p: 10956 mov.b &nan_bmask,FPSR_CC(%a6) 10957 rts 10958dst_qnan_m: 10959 mov.b &neg_bmask+nan_bmask,FPSR_CC(%a6) 10960 rts 10961 10962# 10963# src_snan(): Return the src SNAN w/ the SNAN bit set. 10964# 10965 global src_snan 10966src_snan: 10967 fmov.x SRC(%a0),%fp0 # the fmove sets the SNAN bit 10968 fmov.l %fpsr,%d0 # catch resulting status 10969 or.l %d0,USER_FPSR(%a6) # store status 10970 rts 10971 10972# 10973# src_qnan(): Return the src QNAN. 10974# 10975 global src_qnan 10976src_qnan: 10977 fmov.x SRC(%a0),%fp0 # return the non-signalling nan 10978 tst.b SRC_EX(%a0) # set ccodes according to QNAN sign 10979 bmi.b dst_qnan_m 10980src_qnan_p: 10981 mov.b &nan_bmask,FPSR_CC(%a6) 10982 rts 10983src_qnan_m: 10984 mov.b &neg_bmask+nan_bmask,FPSR_CC(%a6) 10985 rts 10986 10987# 10988# fkern2.s: 10989# These entry points are used by the exception handler 10990# routines where an instruction is selected by an index into 10991# a large jump table corresponding to a given instruction which 10992# has been decoded. Flow continues here where we now decode 10993# further according to the source operand type. 10994# 10995 10996 global fsinh 10997fsinh: 10998 mov.b STAG(%a6),%d1 10999 beq.l ssinh 11000 cmpi.b %d1,&ZERO 11001 beq.l src_zero 11002 cmpi.b %d1,&INF 11003 beq.l src_inf 11004 cmpi.b %d1,&DENORM 11005 beq.l ssinhd 11006 cmpi.b %d1,&QNAN 11007 beq.l src_qnan 11008 bra.l src_snan 11009 11010 global flognp1 11011flognp1: 11012 mov.b STAG(%a6),%d1 11013 beq.l slognp1 11014 cmpi.b %d1,&ZERO 11015 beq.l src_zero 11016 cmpi.b %d1,&INF 11017 beq.l sopr_inf 11018 cmpi.b %d1,&DENORM 11019 beq.l slognp1d 11020 cmpi.b %d1,&QNAN 11021 beq.l src_qnan 11022 bra.l src_snan 11023 11024 global fetoxm1 11025fetoxm1: 11026 mov.b STAG(%a6),%d1 11027 beq.l setoxm1 11028 cmpi.b %d1,&ZERO 11029 beq.l src_zero 11030 cmpi.b %d1,&INF 11031 beq.l setoxm1i 11032 cmpi.b %d1,&DENORM 11033 beq.l setoxm1d 11034 cmpi.b %d1,&QNAN 11035 beq.l src_qnan 11036 bra.l src_snan 11037 11038 global ftanh 11039ftanh: 11040 mov.b STAG(%a6),%d1 11041 beq.l stanh 11042 cmpi.b %d1,&ZERO 11043 beq.l src_zero 11044 cmpi.b %d1,&INF 11045 beq.l src_one 11046 cmpi.b %d1,&DENORM 11047 beq.l stanhd 11048 cmpi.b %d1,&QNAN 11049 beq.l src_qnan 11050 bra.l src_snan 11051 11052 global fatan 11053fatan: 11054 mov.b STAG(%a6),%d1 11055 beq.l satan 11056 cmpi.b %d1,&ZERO 11057 beq.l src_zero 11058 cmpi.b %d1,&INF 11059 beq.l spi_2 11060 cmpi.b %d1,&DENORM 11061 beq.l satand 11062 cmpi.b %d1,&QNAN 11063 beq.l src_qnan 11064 bra.l src_snan 11065 11066 global fasin 11067fasin: 11068 mov.b STAG(%a6),%d1 11069 beq.l sasin 11070 cmpi.b %d1,&ZERO 11071 beq.l src_zero 11072 cmpi.b %d1,&INF 11073 beq.l t_operr 11074 cmpi.b %d1,&DENORM 11075 beq.l sasind 11076 cmpi.b %d1,&QNAN 11077 beq.l src_qnan 11078 bra.l src_snan 11079 11080 global fatanh 11081fatanh: 11082 mov.b STAG(%a6),%d1 11083 beq.l satanh 11084 cmpi.b %d1,&ZERO 11085 beq.l src_zero 11086 cmpi.b %d1,&INF 11087 beq.l t_operr 11088 cmpi.b %d1,&DENORM 11089 beq.l satanhd 11090 cmpi.b %d1,&QNAN 11091 beq.l src_qnan 11092 bra.l src_snan 11093 11094 global fsine 11095fsine: 11096 mov.b STAG(%a6),%d1 11097 beq.l ssin 11098 cmpi.b %d1,&ZERO 11099 beq.l src_zero 11100 cmpi.b %d1,&INF 11101 beq.l t_operr 11102 cmpi.b %d1,&DENORM 11103 beq.l ssind 11104 cmpi.b %d1,&QNAN 11105 beq.l src_qnan 11106 bra.l src_snan 11107 11108 global ftan 11109ftan: 11110 mov.b STAG(%a6),%d1 11111 beq.l stan 11112 cmpi.b %d1,&ZERO 11113 beq.l src_zero 11114 cmpi.b %d1,&INF 11115 beq.l t_operr 11116 cmpi.b %d1,&DENORM 11117 beq.l stand 11118 cmpi.b %d1,&QNAN 11119 beq.l src_qnan 11120 bra.l src_snan 11121 11122 global fetox 11123fetox: 11124 mov.b STAG(%a6),%d1 11125 beq.l setox 11126 cmpi.b %d1,&ZERO 11127 beq.l ld_pone 11128 cmpi.b %d1,&INF 11129 beq.l szr_inf 11130 cmpi.b %d1,&DENORM 11131 beq.l setoxd 11132 cmpi.b %d1,&QNAN 11133 beq.l src_qnan 11134 bra.l src_snan 11135 11136 global ftwotox 11137ftwotox: 11138 mov.b STAG(%a6),%d1 11139 beq.l stwotox 11140 cmpi.b %d1,&ZERO 11141 beq.l ld_pone 11142 cmpi.b %d1,&INF 11143 beq.l szr_inf 11144 cmpi.b %d1,&DENORM 11145 beq.l stwotoxd 11146 cmpi.b %d1,&QNAN 11147 beq.l src_qnan 11148 bra.l src_snan 11149 11150 global ftentox 11151ftentox: 11152 mov.b STAG(%a6),%d1 11153 beq.l stentox 11154 cmpi.b %d1,&ZERO 11155 beq.l ld_pone 11156 cmpi.b %d1,&INF 11157 beq.l szr_inf 11158 cmpi.b %d1,&DENORM 11159 beq.l stentoxd 11160 cmpi.b %d1,&QNAN 11161 beq.l src_qnan 11162 bra.l src_snan 11163 11164 global flogn 11165flogn: 11166 mov.b STAG(%a6),%d1 11167 beq.l slogn 11168 cmpi.b %d1,&ZERO 11169 beq.l t_dz2 11170 cmpi.b %d1,&INF 11171 beq.l sopr_inf 11172 cmpi.b %d1,&DENORM 11173 beq.l slognd 11174 cmpi.b %d1,&QNAN 11175 beq.l src_qnan 11176 bra.l src_snan 11177 11178 global flog10 11179flog10: 11180 mov.b STAG(%a6),%d1 11181 beq.l slog10 11182 cmpi.b %d1,&ZERO 11183 beq.l t_dz2 11184 cmpi.b %d1,&INF 11185 beq.l sopr_inf 11186 cmpi.b %d1,&DENORM 11187 beq.l slog10d 11188 cmpi.b %d1,&QNAN 11189 beq.l src_qnan 11190 bra.l src_snan 11191 11192 global flog2 11193flog2: 11194 mov.b STAG(%a6),%d1 11195 beq.l slog2 11196 cmpi.b %d1,&ZERO 11197 beq.l t_dz2 11198 cmpi.b %d1,&INF 11199 beq.l sopr_inf 11200 cmpi.b %d1,&DENORM 11201 beq.l slog2d 11202 cmpi.b %d1,&QNAN 11203 beq.l src_qnan 11204 bra.l src_snan 11205 11206 global fcosh 11207fcosh: 11208 mov.b STAG(%a6),%d1 11209 beq.l scosh 11210 cmpi.b %d1,&ZERO 11211 beq.l ld_pone 11212 cmpi.b %d1,&INF 11213 beq.l ld_pinf 11214 cmpi.b %d1,&DENORM 11215 beq.l scoshd 11216 cmpi.b %d1,&QNAN 11217 beq.l src_qnan 11218 bra.l src_snan 11219 11220 global facos 11221facos: 11222 mov.b STAG(%a6),%d1 11223 beq.l sacos 11224 cmpi.b %d1,&ZERO 11225 beq.l ld_ppi2 11226 cmpi.b %d1,&INF 11227 beq.l t_operr 11228 cmpi.b %d1,&DENORM 11229 beq.l sacosd 11230 cmpi.b %d1,&QNAN 11231 beq.l src_qnan 11232 bra.l src_snan 11233 11234 global fcos 11235fcos: 11236 mov.b STAG(%a6),%d1 11237 beq.l scos 11238 cmpi.b %d1,&ZERO 11239 beq.l ld_pone 11240 cmpi.b %d1,&INF 11241 beq.l t_operr 11242 cmpi.b %d1,&DENORM 11243 beq.l scosd 11244 cmpi.b %d1,&QNAN 11245 beq.l src_qnan 11246 bra.l src_snan 11247 11248 global fgetexp 11249fgetexp: 11250 mov.b STAG(%a6),%d1 11251 beq.l sgetexp 11252 cmpi.b %d1,&ZERO 11253 beq.l src_zero 11254 cmpi.b %d1,&INF 11255 beq.l t_operr 11256 cmpi.b %d1,&DENORM 11257 beq.l sgetexpd 11258 cmpi.b %d1,&QNAN 11259 beq.l src_qnan 11260 bra.l src_snan 11261 11262 global fgetman 11263fgetman: 11264 mov.b STAG(%a6),%d1 11265 beq.l sgetman 11266 cmpi.b %d1,&ZERO 11267 beq.l src_zero 11268 cmpi.b %d1,&INF 11269 beq.l t_operr 11270 cmpi.b %d1,&DENORM 11271 beq.l sgetmand 11272 cmpi.b %d1,&QNAN 11273 beq.l src_qnan 11274 bra.l src_snan 11275 11276 global fsincos 11277fsincos: 11278 mov.b STAG(%a6),%d1 11279 beq.l ssincos 11280 cmpi.b %d1,&ZERO 11281 beq.l ssincosz 11282 cmpi.b %d1,&INF 11283 beq.l ssincosi 11284 cmpi.b %d1,&DENORM 11285 beq.l ssincosd 11286 cmpi.b %d1,&QNAN 11287 beq.l ssincosqnan 11288 bra.l ssincossnan 11289 11290 global fmod 11291fmod: 11292 mov.b STAG(%a6),%d1 11293 beq.l smod_snorm 11294 cmpi.b %d1,&ZERO 11295 beq.l smod_szero 11296 cmpi.b %d1,&INF 11297 beq.l smod_sinf 11298 cmpi.b %d1,&DENORM 11299 beq.l smod_sdnrm 11300 cmpi.b %d1,&QNAN 11301 beq.l sop_sqnan 11302 bra.l sop_ssnan 11303 11304 global frem 11305frem: 11306 mov.b STAG(%a6),%d1 11307 beq.l srem_snorm 11308 cmpi.b %d1,&ZERO 11309 beq.l srem_szero 11310 cmpi.b %d1,&INF 11311 beq.l srem_sinf 11312 cmpi.b %d1,&DENORM 11313 beq.l srem_sdnrm 11314 cmpi.b %d1,&QNAN 11315 beq.l sop_sqnan 11316 bra.l sop_ssnan 11317 11318 global fscale 11319fscale: 11320 mov.b STAG(%a6),%d1 11321 beq.l sscale_snorm 11322 cmpi.b %d1,&ZERO 11323 beq.l sscale_szero 11324 cmpi.b %d1,&INF 11325 beq.l sscale_sinf 11326 cmpi.b %d1,&DENORM 11327 beq.l sscale_sdnrm 11328 cmpi.b %d1,&QNAN 11329 beq.l sop_sqnan 11330 bra.l sop_ssnan 11331 11332######################################################################### 11333# XDEF **************************************************************** # 11334# fgen_except(): catch an exception during transcendental # 11335# emulation # 11336# # 11337# XREF **************************************************************** # 11338# fmul() - emulate a multiply instruction # 11339# fadd() - emulate an add instruction # 11340# fin() - emulate an fmove instruction # 11341# # 11342# INPUT *************************************************************** # 11343# fp0 = destination operand # 11344# d0 = type of instruction that took exception # 11345# fsave frame = source operand # 11346# # 11347# OUTPUT ************************************************************** # 11348# fp0 = result # 11349# fp1 = EXOP # 11350# # 11351# ALGORITHM *********************************************************** # 11352# An exception occurred on the last instruction of the # 11353# transcendental emulation. hopefully, this won't be happening much # 11354# because it will be VERY slow. # 11355# The only exceptions capable of passing through here are # 11356# Overflow, Underflow, and Unsupported Data Type. # 11357# # 11358######################################################################### 11359 11360 global fgen_except 11361fgen_except: 11362 cmpi.b 0x3(%sp),&0x7 # is exception UNSUPP? 11363 beq.b fge_unsupp # yes 11364 11365 mov.b &NORM,STAG(%a6) 11366 11367fge_cont: 11368 mov.b &NORM,DTAG(%a6) 11369 11370# ok, I have a problem with putting the dst op at FP_DST. the emulation 11371# routines aren't supposed to alter the operands but we've just squashed 11372# FP_DST here... 11373 11374# 8/17/93 - this turns out to be more of a "cleanliness" standpoint 11375# then a potential bug. to begin with, only the dyadic functions 11376# frem,fmod, and fscale would get the dst trashed here. But, for 11377# the 060SP, the FP_DST is never used again anyways. 11378 fmovm.x &0x80,FP_DST(%a6) # dst op is in fp0 11379 11380 lea 0x4(%sp),%a0 # pass: ptr to src op 11381 lea FP_DST(%a6),%a1 # pass: ptr to dst op 11382 11383 cmpi.b %d1,&FMOV_OP 11384 beq.b fge_fin # it was an "fmov" 11385 cmpi.b %d1,&FADD_OP 11386 beq.b fge_fadd # it was an "fadd" 11387fge_fmul: 11388 bsr.l fmul 11389 rts 11390fge_fadd: 11391 bsr.l fadd 11392 rts 11393fge_fin: 11394 bsr.l fin 11395 rts 11396 11397fge_unsupp: 11398 mov.b &DENORM,STAG(%a6) 11399 bra.b fge_cont 11400 11401# 11402# This table holds the offsets of the emulation routines for each individual 11403# math operation relative to the address of this table. Included are 11404# routines like fadd/fmul/fabs as well as the transcendentals. 11405# The location within the table is determined by the extension bits of the 11406# operation longword. 11407# 11408 11409 swbeg &109 11410tbl_unsupp: 11411 long fin - tbl_unsupp # 00: fmove 11412 long fint - tbl_unsupp # 01: fint 11413 long fsinh - tbl_unsupp # 02: fsinh 11414 long fintrz - tbl_unsupp # 03: fintrz 11415 long fsqrt - tbl_unsupp # 04: fsqrt 11416 long tbl_unsupp - tbl_unsupp 11417 long flognp1 - tbl_unsupp # 06: flognp1 11418 long tbl_unsupp - tbl_unsupp 11419 long fetoxm1 - tbl_unsupp # 08: fetoxm1 11420 long ftanh - tbl_unsupp # 09: ftanh 11421 long fatan - tbl_unsupp # 0a: fatan 11422 long tbl_unsupp - tbl_unsupp 11423 long fasin - tbl_unsupp # 0c: fasin 11424 long fatanh - tbl_unsupp # 0d: fatanh 11425 long fsine - tbl_unsupp # 0e: fsin 11426 long ftan - tbl_unsupp # 0f: ftan 11427 long fetox - tbl_unsupp # 10: fetox 11428 long ftwotox - tbl_unsupp # 11: ftwotox 11429 long ftentox - tbl_unsupp # 12: ftentox 11430 long tbl_unsupp - tbl_unsupp 11431 long flogn - tbl_unsupp # 14: flogn 11432 long flog10 - tbl_unsupp # 15: flog10 11433 long flog2 - tbl_unsupp # 16: flog2 11434 long tbl_unsupp - tbl_unsupp 11435 long fabs - tbl_unsupp # 18: fabs 11436 long fcosh - tbl_unsupp # 19: fcosh 11437 long fneg - tbl_unsupp # 1a: fneg 11438 long tbl_unsupp - tbl_unsupp 11439 long facos - tbl_unsupp # 1c: facos 11440 long fcos - tbl_unsupp # 1d: fcos 11441 long fgetexp - tbl_unsupp # 1e: fgetexp 11442 long fgetman - tbl_unsupp # 1f: fgetman 11443 long fdiv - tbl_unsupp # 20: fdiv 11444 long fmod - tbl_unsupp # 21: fmod 11445 long fadd - tbl_unsupp # 22: fadd 11446 long fmul - tbl_unsupp # 23: fmul 11447 long fsgldiv - tbl_unsupp # 24: fsgldiv 11448 long frem - tbl_unsupp # 25: frem 11449 long fscale - tbl_unsupp # 26: fscale 11450 long fsglmul - tbl_unsupp # 27: fsglmul 11451 long fsub - tbl_unsupp # 28: fsub 11452 long tbl_unsupp - tbl_unsupp 11453 long tbl_unsupp - tbl_unsupp 11454 long tbl_unsupp - tbl_unsupp 11455 long tbl_unsupp - tbl_unsupp 11456 long tbl_unsupp - tbl_unsupp 11457 long tbl_unsupp - tbl_unsupp 11458 long tbl_unsupp - tbl_unsupp 11459 long fsincos - tbl_unsupp # 30: fsincos 11460 long fsincos - tbl_unsupp # 31: fsincos 11461 long fsincos - tbl_unsupp # 32: fsincos 11462 long fsincos - tbl_unsupp # 33: fsincos 11463 long fsincos - tbl_unsupp # 34: fsincos 11464 long fsincos - tbl_unsupp # 35: fsincos 11465 long fsincos - tbl_unsupp # 36: fsincos 11466 long fsincos - tbl_unsupp # 37: fsincos 11467 long fcmp - tbl_unsupp # 38: fcmp 11468 long tbl_unsupp - tbl_unsupp 11469 long ftst - tbl_unsupp # 3a: ftst 11470 long tbl_unsupp - tbl_unsupp 11471 long tbl_unsupp - tbl_unsupp 11472 long tbl_unsupp - tbl_unsupp 11473 long tbl_unsupp - tbl_unsupp 11474 long tbl_unsupp - tbl_unsupp 11475 long fsin - tbl_unsupp # 40: fsmove 11476 long fssqrt - tbl_unsupp # 41: fssqrt 11477 long tbl_unsupp - tbl_unsupp 11478 long tbl_unsupp - tbl_unsupp 11479 long fdin - tbl_unsupp # 44: fdmove 11480 long fdsqrt - tbl_unsupp # 45: fdsqrt 11481 long tbl_unsupp - tbl_unsupp 11482 long tbl_unsupp - tbl_unsupp 11483 long tbl_unsupp - tbl_unsupp 11484 long tbl_unsupp - tbl_unsupp 11485 long tbl_unsupp - tbl_unsupp 11486 long tbl_unsupp - tbl_unsupp 11487 long tbl_unsupp - tbl_unsupp 11488 long tbl_unsupp - tbl_unsupp 11489 long tbl_unsupp - tbl_unsupp 11490 long tbl_unsupp - tbl_unsupp 11491 long tbl_unsupp - tbl_unsupp 11492 long tbl_unsupp - tbl_unsupp 11493 long tbl_unsupp - tbl_unsupp 11494 long tbl_unsupp - tbl_unsupp 11495 long tbl_unsupp - tbl_unsupp 11496 long tbl_unsupp - tbl_unsupp 11497 long tbl_unsupp - tbl_unsupp 11498 long tbl_unsupp - tbl_unsupp 11499 long fsabs - tbl_unsupp # 58: fsabs 11500 long tbl_unsupp - tbl_unsupp 11501 long fsneg - tbl_unsupp # 5a: fsneg 11502 long tbl_unsupp - tbl_unsupp 11503 long fdabs - tbl_unsupp # 5c: fdabs 11504 long tbl_unsupp - tbl_unsupp 11505 long fdneg - tbl_unsupp # 5e: fdneg 11506 long tbl_unsupp - tbl_unsupp 11507 long fsdiv - tbl_unsupp # 60: fsdiv 11508 long tbl_unsupp - tbl_unsupp 11509 long fsadd - tbl_unsupp # 62: fsadd 11510 long fsmul - tbl_unsupp # 63: fsmul 11511 long fddiv - tbl_unsupp # 64: fddiv 11512 long tbl_unsupp - tbl_unsupp 11513 long fdadd - tbl_unsupp # 66: fdadd 11514 long fdmul - tbl_unsupp # 67: fdmul 11515 long fssub - tbl_unsupp # 68: fssub 11516 long tbl_unsupp - tbl_unsupp 11517 long tbl_unsupp - tbl_unsupp 11518 long tbl_unsupp - tbl_unsupp 11519 long fdsub - tbl_unsupp # 6c: fdsub 11520 11521######################################################################### 11522# XDEF **************************************************************** # 11523# fmul(): emulates the fmul instruction # 11524# fsmul(): emulates the fsmul instruction # 11525# fdmul(): emulates the fdmul instruction # 11526# # 11527# XREF **************************************************************** # 11528# scale_to_zero_src() - scale src exponent to zero # 11529# scale_to_zero_dst() - scale dst exponent to zero # 11530# unf_res() - return default underflow result # 11531# ovf_res() - return default overflow result # 11532# res_qnan() - return QNAN result # 11533# res_snan() - return SNAN result # 11534# # 11535# INPUT *************************************************************** # 11536# a0 = pointer to extended precision source operand # 11537# a1 = pointer to extended precision destination operand # 11538# d0 rnd prec,mode # 11539# # 11540# OUTPUT ************************************************************** # 11541# fp0 = result # 11542# fp1 = EXOP (if exception occurred) # 11543# # 11544# ALGORITHM *********************************************************** # 11545# Handle NANs, infinities, and zeroes as special cases. Divide # 11546# norms/denorms into ext/sgl/dbl precision. # 11547# For norms/denorms, scale the exponents such that a multiply # 11548# instruction won't cause an exception. Use the regular fmul to # 11549# compute a result. Check if the regular operands would have taken # 11550# an exception. If so, return the default overflow/underflow result # 11551# and return the EXOP if exceptions are enabled. Else, scale the # 11552# result operand to the proper exponent. # 11553# # 11554######################################################################### 11555 11556 align 0x10 11557tbl_fmul_ovfl: 11558 long 0x3fff - 0x7ffe # ext_max 11559 long 0x3fff - 0x407e # sgl_max 11560 long 0x3fff - 0x43fe # dbl_max 11561tbl_fmul_unfl: 11562 long 0x3fff + 0x0001 # ext_unfl 11563 long 0x3fff - 0x3f80 # sgl_unfl 11564 long 0x3fff - 0x3c00 # dbl_unfl 11565 11566 global fsmul 11567fsmul: 11568 andi.b &0x30,%d0 # clear rnd prec 11569 ori.b &s_mode*0x10,%d0 # insert sgl prec 11570 bra.b fmul 11571 11572 global fdmul 11573fdmul: 11574 andi.b &0x30,%d0 11575 ori.b &d_mode*0x10,%d0 # insert dbl prec 11576 11577 global fmul 11578fmul: 11579 mov.l %d0,L_SCR3(%a6) # store rnd info 11580 11581 clr.w %d1 11582 mov.b DTAG(%a6),%d1 11583 lsl.b &0x3,%d1 11584 or.b STAG(%a6),%d1 # combine src tags 11585 bne.w fmul_not_norm # optimize on non-norm input 11586 11587fmul_norm: 11588 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 11589 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 11590 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 11591 11592 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 11593 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 11594 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 11595 11596 bsr.l scale_to_zero_src # scale src exponent 11597 mov.l %d0,-(%sp) # save scale factor 1 11598 11599 bsr.l scale_to_zero_dst # scale dst exponent 11600 11601 add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2 11602 11603 mov.w 2+L_SCR3(%a6),%d1 # fetch precision 11604 lsr.b &0x6,%d1 # shift to lo bits 11605 mov.l (%sp)+,%d0 # load S.F. 11606 cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl? 11607 beq.w fmul_may_ovfl # result may rnd to overflow 11608 blt.w fmul_ovfl # result will overflow 11609 11610 cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl? 11611 beq.w fmul_may_unfl # result may rnd to no unfl 11612 bgt.w fmul_unfl # result will underflow 11613 11614# 11615# NORMAL: 11616# - the result of the multiply operation will neither overflow nor underflow. 11617# - do the multiply to the proper precision and rounding mode. 11618# - scale the result exponent using the scale factor. if both operands were 11619# normalized then we really don't need to go through this scaling. but for now, 11620# this will do. 11621# 11622fmul_normal: 11623 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 11624 11625 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11626 fmov.l &0x0,%fpsr # clear FPSR 11627 11628 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 11629 11630 fmov.l %fpsr,%d1 # save status 11631 fmov.l &0x0,%fpcr # clear FPCR 11632 11633 or.l %d1,USER_FPSR(%a6) # save INEX2,N 11634 11635fmul_normal_exit: 11636 fmovm.x &0x80,FP_SCR0(%a6) # store out result 11637 mov.l %d2,-(%sp) # save d2 11638 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 11639 mov.l %d1,%d2 # make a copy 11640 andi.l &0x7fff,%d1 # strip sign 11641 andi.w &0x8000,%d2 # keep old sign 11642 sub.l %d0,%d1 # add scale factor 11643 or.w %d2,%d1 # concat old sign,new exp 11644 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 11645 mov.l (%sp)+,%d2 # restore d2 11646 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 11647 rts 11648 11649# 11650# OVERFLOW: 11651# - the result of the multiply operation is an overflow. 11652# - do the multiply to the proper precision and rounding mode in order to 11653# set the inexact bits. 11654# - calculate the default result and return it in fp0. 11655# - if overflow or inexact is enabled, we need a multiply result rounded to 11656# extended precision. if the original operation was extended, then we have this 11657# result. if the original operation was single or double, we have to do another 11658# multiply using extended precision and the correct rounding mode. the result 11659# of this operation then has its exponent scaled by -0x6000 to create the 11660# exceptional operand. 11661# 11662fmul_ovfl: 11663 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 11664 11665 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11666 fmov.l &0x0,%fpsr # clear FPSR 11667 11668 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 11669 11670 fmov.l %fpsr,%d1 # save status 11671 fmov.l &0x0,%fpcr # clear FPCR 11672 11673 or.l %d1,USER_FPSR(%a6) # save INEX2,N 11674 11675# save setting this until now because this is where fmul_may_ovfl may jump in 11676fmul_ovfl_tst: 11677 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 11678 11679 mov.b FPCR_ENABLE(%a6),%d1 11680 andi.b &0x13,%d1 # is OVFL or INEX enabled? 11681 bne.b fmul_ovfl_ena # yes 11682 11683# calculate the default result 11684fmul_ovfl_dis: 11685 btst &neg_bit,FPSR_CC(%a6) # is result negative? 11686 sne %d1 # set sign param accordingly 11687 mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode 11688 bsr.l ovf_res # calculate default result 11689 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 11690 fmovm.x (%a0),&0x80 # return default result in fp0 11691 rts 11692 11693# 11694# OVFL is enabled; Create EXOP: 11695# - if precision is extended, then we have the EXOP. simply bias the exponent 11696# with an extra -0x6000. if the precision is single or double, we need to 11697# calculate a result rounded to extended precision. 11698# 11699fmul_ovfl_ena: 11700 mov.l L_SCR3(%a6),%d1 11701 andi.b &0xc0,%d1 # test the rnd prec 11702 bne.b fmul_ovfl_ena_sd # it's sgl or dbl 11703 11704fmul_ovfl_ena_cont: 11705 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 11706 11707 mov.l %d2,-(%sp) # save d2 11708 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 11709 mov.w %d1,%d2 # make a copy 11710 andi.l &0x7fff,%d1 # strip sign 11711 sub.l %d0,%d1 # add scale factor 11712 subi.l &0x6000,%d1 # subtract bias 11713 andi.w &0x7fff,%d1 # clear sign bit 11714 andi.w &0x8000,%d2 # keep old sign 11715 or.w %d2,%d1 # concat old sign,new exp 11716 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 11717 mov.l (%sp)+,%d2 # restore d2 11718 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 11719 bra.b fmul_ovfl_dis 11720 11721fmul_ovfl_ena_sd: 11722 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 11723 11724 mov.l L_SCR3(%a6),%d1 11725 andi.b &0x30,%d1 # keep rnd mode only 11726 fmov.l %d1,%fpcr # set FPCR 11727 11728 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 11729 11730 fmov.l &0x0,%fpcr # clear FPCR 11731 bra.b fmul_ovfl_ena_cont 11732 11733# 11734# may OVERFLOW: 11735# - the result of the multiply operation MAY overflow. 11736# - do the multiply to the proper precision and rounding mode in order to 11737# set the inexact bits. 11738# - calculate the default result and return it in fp0. 11739# 11740fmul_may_ovfl: 11741 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11742 11743 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11744 fmov.l &0x0,%fpsr # clear FPSR 11745 11746 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 11747 11748 fmov.l %fpsr,%d1 # save status 11749 fmov.l &0x0,%fpcr # clear FPCR 11750 11751 or.l %d1,USER_FPSR(%a6) # save INEX2,N 11752 11753 fabs.x %fp0,%fp1 # make a copy of result 11754 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 11755 fbge.w fmul_ovfl_tst # yes; overflow has occurred 11756 11757# no, it didn't overflow; we have correct result 11758 bra.w fmul_normal_exit 11759 11760# 11761# UNDERFLOW: 11762# - the result of the multiply operation is an underflow. 11763# - do the multiply to the proper precision and rounding mode in order to 11764# set the inexact bits. 11765# - calculate the default result and return it in fp0. 11766# - if overflow or inexact is enabled, we need a multiply result rounded to 11767# extended precision. if the original operation was extended, then we have this 11768# result. if the original operation was single or double, we have to do another 11769# multiply using extended precision and the correct rounding mode. the result 11770# of this operation then has its exponent scaled by -0x6000 to create the 11771# exceptional operand. 11772# 11773fmul_unfl: 11774 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 11775 11776# for fun, let's use only extended precision, round to zero. then, let 11777# the unf_res() routine figure out all the rest. 11778# will we get the correct answer. 11779 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 11780 11781 fmov.l &rz_mode*0x10,%fpcr # set FPCR 11782 fmov.l &0x0,%fpsr # clear FPSR 11783 11784 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 11785 11786 fmov.l %fpsr,%d1 # save status 11787 fmov.l &0x0,%fpcr # clear FPCR 11788 11789 or.l %d1,USER_FPSR(%a6) # save INEX2,N 11790 11791 mov.b FPCR_ENABLE(%a6),%d1 11792 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 11793 bne.b fmul_unfl_ena # yes 11794 11795fmul_unfl_dis: 11796 fmovm.x &0x80,FP_SCR0(%a6) # store out result 11797 11798 lea FP_SCR0(%a6),%a0 # pass: result addr 11799 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 11800 bsr.l unf_res # calculate default result 11801 or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z' 11802 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 11803 rts 11804 11805# 11806# UNFL is enabled. 11807# 11808fmul_unfl_ena: 11809 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 11810 11811 mov.l L_SCR3(%a6),%d1 11812 andi.b &0xc0,%d1 # is precision extended? 11813 bne.b fmul_unfl_ena_sd # no, sgl or dbl 11814 11815# if the rnd mode is anything but RZ, then we have to re-do the above 11816# multiplication because we used RZ for all. 11817 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11818 11819fmul_unfl_ena_cont: 11820 fmov.l &0x0,%fpsr # clear FPSR 11821 11822 fmul.x FP_SCR0(%a6),%fp1 # execute multiply 11823 11824 fmov.l &0x0,%fpcr # clear FPCR 11825 11826 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 11827 mov.l %d2,-(%sp) # save d2 11828 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 11829 mov.l %d1,%d2 # make a copy 11830 andi.l &0x7fff,%d1 # strip sign 11831 andi.w &0x8000,%d2 # keep old sign 11832 sub.l %d0,%d1 # add scale factor 11833 addi.l &0x6000,%d1 # add bias 11834 andi.w &0x7fff,%d1 11835 or.w %d2,%d1 # concat old sign,new exp 11836 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 11837 mov.l (%sp)+,%d2 # restore d2 11838 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 11839 bra.w fmul_unfl_dis 11840 11841fmul_unfl_ena_sd: 11842 mov.l L_SCR3(%a6),%d1 11843 andi.b &0x30,%d1 # use only rnd mode 11844 fmov.l %d1,%fpcr # set FPCR 11845 11846 bra.b fmul_unfl_ena_cont 11847 11848# MAY UNDERFLOW: 11849# -use the correct rounding mode and precision. this code favors operations 11850# that do not underflow. 11851fmul_may_unfl: 11852 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 11853 11854 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11855 fmov.l &0x0,%fpsr # clear FPSR 11856 11857 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 11858 11859 fmov.l %fpsr,%d1 # save status 11860 fmov.l &0x0,%fpcr # clear FPCR 11861 11862 or.l %d1,USER_FPSR(%a6) # save INEX2,N 11863 11864 fabs.x %fp0,%fp1 # make a copy of result 11865 fcmp.b %fp1,&0x2 # is |result| > 2.b? 11866 fbgt.w fmul_normal_exit # no; no underflow occurred 11867 fblt.w fmul_unfl # yes; underflow occurred 11868 11869# 11870# we still don't know if underflow occurred. result is ~ equal to 2. but, 11871# we don't know if the result was an underflow that rounded up to a 2 or 11872# a normalized number that rounded down to a 2. so, redo the entire operation 11873# using RZ as the rounding mode to see what the pre-rounded result is. 11874# this case should be relatively rare. 11875# 11876 fmovm.x FP_SCR1(%a6),&0x40 # load dst operand 11877 11878 mov.l L_SCR3(%a6),%d1 11879 andi.b &0xc0,%d1 # keep rnd prec 11880 ori.b &rz_mode*0x10,%d1 # insert RZ 11881 11882 fmov.l %d1,%fpcr # set FPCR 11883 fmov.l &0x0,%fpsr # clear FPSR 11884 11885 fmul.x FP_SCR0(%a6),%fp1 # execute multiply 11886 11887 fmov.l &0x0,%fpcr # clear FPCR 11888 fabs.x %fp1 # make absolute value 11889 fcmp.b %fp1,&0x2 # is |result| < 2.b? 11890 fbge.w fmul_normal_exit # no; no underflow occurred 11891 bra.w fmul_unfl # yes, underflow occurred 11892 11893################################################################################ 11894 11895# 11896# Multiply: inputs are not both normalized; what are they? 11897# 11898fmul_not_norm: 11899 mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1 11900 jmp (tbl_fmul_op.b,%pc,%d1.w) 11901 11902 swbeg &48 11903tbl_fmul_op: 11904 short fmul_norm - tbl_fmul_op # NORM x NORM 11905 short fmul_zero - tbl_fmul_op # NORM x ZERO 11906 short fmul_inf_src - tbl_fmul_op # NORM x INF 11907 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN 11908 short fmul_norm - tbl_fmul_op # NORM x DENORM 11909 short fmul_res_snan - tbl_fmul_op # NORM x SNAN 11910 short tbl_fmul_op - tbl_fmul_op # 11911 short tbl_fmul_op - tbl_fmul_op # 11912 11913 short fmul_zero - tbl_fmul_op # ZERO x NORM 11914 short fmul_zero - tbl_fmul_op # ZERO x ZERO 11915 short fmul_res_operr - tbl_fmul_op # ZERO x INF 11916 short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN 11917 short fmul_zero - tbl_fmul_op # ZERO x DENORM 11918 short fmul_res_snan - tbl_fmul_op # ZERO x SNAN 11919 short tbl_fmul_op - tbl_fmul_op # 11920 short tbl_fmul_op - tbl_fmul_op # 11921 11922 short fmul_inf_dst - tbl_fmul_op # INF x NORM 11923 short fmul_res_operr - tbl_fmul_op # INF x ZERO 11924 short fmul_inf_dst - tbl_fmul_op # INF x INF 11925 short fmul_res_qnan - tbl_fmul_op # INF x QNAN 11926 short fmul_inf_dst - tbl_fmul_op # INF x DENORM 11927 short fmul_res_snan - tbl_fmul_op # INF x SNAN 11928 short tbl_fmul_op - tbl_fmul_op # 11929 short tbl_fmul_op - tbl_fmul_op # 11930 11931 short fmul_res_qnan - tbl_fmul_op # QNAN x NORM 11932 short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO 11933 short fmul_res_qnan - tbl_fmul_op # QNAN x INF 11934 short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN 11935 short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM 11936 short fmul_res_snan - tbl_fmul_op # QNAN x SNAN 11937 short tbl_fmul_op - tbl_fmul_op # 11938 short tbl_fmul_op - tbl_fmul_op # 11939 11940 short fmul_norm - tbl_fmul_op # NORM x NORM 11941 short fmul_zero - tbl_fmul_op # NORM x ZERO 11942 short fmul_inf_src - tbl_fmul_op # NORM x INF 11943 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN 11944 short fmul_norm - tbl_fmul_op # NORM x DENORM 11945 short fmul_res_snan - tbl_fmul_op # NORM x SNAN 11946 short tbl_fmul_op - tbl_fmul_op # 11947 short tbl_fmul_op - tbl_fmul_op # 11948 11949 short fmul_res_snan - tbl_fmul_op # SNAN x NORM 11950 short fmul_res_snan - tbl_fmul_op # SNAN x ZERO 11951 short fmul_res_snan - tbl_fmul_op # SNAN x INF 11952 short fmul_res_snan - tbl_fmul_op # SNAN x QNAN 11953 short fmul_res_snan - tbl_fmul_op # SNAN x DENORM 11954 short fmul_res_snan - tbl_fmul_op # SNAN x SNAN 11955 short tbl_fmul_op - tbl_fmul_op # 11956 short tbl_fmul_op - tbl_fmul_op # 11957 11958fmul_res_operr: 11959 bra.l res_operr 11960fmul_res_snan: 11961 bra.l res_snan 11962fmul_res_qnan: 11963 bra.l res_qnan 11964 11965# 11966# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm) 11967# 11968 global fmul_zero # global for fsglmul 11969fmul_zero: 11970 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 11971 mov.b DST_EX(%a1),%d1 11972 eor.b %d0,%d1 11973 bpl.b fmul_zero_p # result ZERO is pos. 11974fmul_zero_n: 11975 fmov.s &0x80000000,%fp0 # load -ZERO 11976 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N 11977 rts 11978fmul_zero_p: 11979 fmov.s &0x00000000,%fp0 # load +ZERO 11980 mov.b &z_bmask,FPSR_CC(%a6) # set Z 11981 rts 11982 11983# 11984# Multiply: (inf x inf) || (inf x norm) || (inf x denorm) 11985# 11986# Note: The j-bit for an infinity is a don't-care. However, to be 11987# strictly compatible w/ the 68881/882, we make sure to return an 11988# INF w/ the j-bit set if the input INF j-bit was set. Destination 11989# INFs take priority. 11990# 11991 global fmul_inf_dst # global for fsglmul 11992fmul_inf_dst: 11993 fmovm.x DST(%a1),&0x80 # return INF result in fp0 11994 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 11995 mov.b DST_EX(%a1),%d1 11996 eor.b %d0,%d1 11997 bpl.b fmul_inf_dst_p # result INF is pos. 11998fmul_inf_dst_n: 11999 fabs.x %fp0 # clear result sign 12000 fneg.x %fp0 # set result sign 12001 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N 12002 rts 12003fmul_inf_dst_p: 12004 fabs.x %fp0 # clear result sign 12005 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 12006 rts 12007 12008 global fmul_inf_src # global for fsglmul 12009fmul_inf_src: 12010 fmovm.x SRC(%a0),&0x80 # return INF result in fp0 12011 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 12012 mov.b DST_EX(%a1),%d1 12013 eor.b %d0,%d1 12014 bpl.b fmul_inf_dst_p # result INF is pos. 12015 bra.b fmul_inf_dst_n 12016 12017######################################################################### 12018# XDEF **************************************************************** # 12019# fin(): emulates the fmove instruction # 12020# fsin(): emulates the fsmove instruction # 12021# fdin(): emulates the fdmove instruction # 12022# # 12023# XREF **************************************************************** # 12024# norm() - normalize mantissa for EXOP on denorm # 12025# scale_to_zero_src() - scale src exponent to zero # 12026# ovf_res() - return default overflow result # 12027# unf_res() - return default underflow result # 12028# res_qnan_1op() - return QNAN result # 12029# res_snan_1op() - return SNAN result # 12030# # 12031# INPUT *************************************************************** # 12032# a0 = pointer to extended precision source operand # 12033# d0 = round prec/mode # 12034# # 12035# OUTPUT ************************************************************** # 12036# fp0 = result # 12037# fp1 = EXOP (if exception occurred) # 12038# # 12039# ALGORITHM *********************************************************** # 12040# Handle NANs, infinities, and zeroes as special cases. Divide # 12041# norms into extended, single, and double precision. # 12042# Norms can be emulated w/ a regular fmove instruction. For # 12043# sgl/dbl, must scale exponent and perform an "fmove". Check to see # 12044# if the result would have overflowed/underflowed. If so, use unf_res() # 12045# or ovf_res() to return the default result. Also return EXOP if # 12046# exception is enabled. If no exception, return the default result. # 12047# Unnorms don't pass through here. # 12048# # 12049######################################################################### 12050 12051 global fsin 12052fsin: 12053 andi.b &0x30,%d0 # clear rnd prec 12054 ori.b &s_mode*0x10,%d0 # insert sgl precision 12055 bra.b fin 12056 12057 global fdin 12058fdin: 12059 andi.b &0x30,%d0 # clear rnd prec 12060 ori.b &d_mode*0x10,%d0 # insert dbl precision 12061 12062 global fin 12063fin: 12064 mov.l %d0,L_SCR3(%a6) # store rnd info 12065 12066 mov.b STAG(%a6),%d1 # fetch src optype tag 12067 bne.w fin_not_norm # optimize on non-norm input 12068 12069# 12070# FP MOVE IN: NORMs and DENORMs ONLY! 12071# 12072fin_norm: 12073 andi.b &0xc0,%d0 # is precision extended? 12074 bne.w fin_not_ext # no, so go handle dbl or sgl 12075 12076# 12077# precision selected is extended. so...we cannot get an underflow 12078# or overflow because of rounding to the correct precision. so... 12079# skip the scaling and unscaling... 12080# 12081 tst.b SRC_EX(%a0) # is the operand negative? 12082 bpl.b fin_norm_done # no 12083 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit 12084fin_norm_done: 12085 fmovm.x SRC(%a0),&0x80 # return result in fp0 12086 rts 12087 12088# 12089# for an extended precision DENORM, the UNFL exception bit is set 12090# the accrued bit is NOT set in this instance(no inexactness!) 12091# 12092fin_denorm: 12093 andi.b &0xc0,%d0 # is precision extended? 12094 bne.w fin_not_ext # no, so go handle dbl or sgl 12095 12096 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 12097 tst.b SRC_EX(%a0) # is the operand negative? 12098 bpl.b fin_denorm_done # no 12099 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit 12100fin_denorm_done: 12101 fmovm.x SRC(%a0),&0x80 # return result in fp0 12102 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 12103 bne.b fin_denorm_unfl_ena # yes 12104 rts 12105 12106# 12107# the input is an extended DENORM and underflow is enabled in the FPCR. 12108# normalize the mantissa and add the bias of 0x6000 to the resulting negative 12109# exponent and insert back into the operand. 12110# 12111fin_denorm_unfl_ena: 12112 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12113 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12114 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12115 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 12116 bsr.l norm # normalize result 12117 neg.w %d0 # new exponent = -(shft val) 12118 addi.w &0x6000,%d0 # add new bias to exponent 12119 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 12120 andi.w &0x8000,%d1 # keep old sign 12121 andi.w &0x7fff,%d0 # clear sign position 12122 or.w %d1,%d0 # concat new exo,old sign 12123 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 12124 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 12125 rts 12126 12127# 12128# operand is to be rounded to single or double precision 12129# 12130fin_not_ext: 12131 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 12132 bne.b fin_dbl 12133 12134# 12135# operand is to be rounded to single precision 12136# 12137fin_sgl: 12138 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12139 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12140 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12141 bsr.l scale_to_zero_src # calculate scale factor 12142 12143 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 12144 bge.w fin_sd_unfl # yes; go handle underflow 12145 cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 12146 beq.w fin_sd_may_ovfl # maybe; go check 12147 blt.w fin_sd_ovfl # yes; go handle overflow 12148 12149# 12150# operand will NOT overflow or underflow when moved into the fp reg file 12151# 12152fin_sd_normal: 12153 fmov.l &0x0,%fpsr # clear FPSR 12154 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12155 12156 fmov.x FP_SCR0(%a6),%fp0 # perform move 12157 12158 fmov.l %fpsr,%d1 # save FPSR 12159 fmov.l &0x0,%fpcr # clear FPCR 12160 12161 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12162 12163fin_sd_normal_exit: 12164 mov.l %d2,-(%sp) # save d2 12165 fmovm.x &0x80,FP_SCR0(%a6) # store out result 12166 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 12167 mov.w %d1,%d2 # make a copy 12168 andi.l &0x7fff,%d1 # strip sign 12169 sub.l %d0,%d1 # add scale factor 12170 andi.w &0x8000,%d2 # keep old sign 12171 or.w %d1,%d2 # concat old sign,new exponent 12172 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 12173 mov.l (%sp)+,%d2 # restore d2 12174 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 12175 rts 12176 12177# 12178# operand is to be rounded to double precision 12179# 12180fin_dbl: 12181 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12182 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12183 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12184 bsr.l scale_to_zero_src # calculate scale factor 12185 12186 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 12187 bge.w fin_sd_unfl # yes; go handle underflow 12188 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 12189 beq.w fin_sd_may_ovfl # maybe; go check 12190 blt.w fin_sd_ovfl # yes; go handle overflow 12191 bra.w fin_sd_normal # no; ho handle normalized op 12192 12193# 12194# operand WILL underflow when moved in to the fp register file 12195# 12196fin_sd_unfl: 12197 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 12198 12199 tst.b FP_SCR0_EX(%a6) # is operand negative? 12200 bpl.b fin_sd_unfl_tst 12201 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit 12202 12203# if underflow or inexact is enabled, then go calculate the EXOP first. 12204fin_sd_unfl_tst: 12205 mov.b FPCR_ENABLE(%a6),%d1 12206 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 12207 bne.b fin_sd_unfl_ena # yes 12208 12209fin_sd_unfl_dis: 12210 lea FP_SCR0(%a6),%a0 # pass: result addr 12211 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 12212 bsr.l unf_res # calculate default result 12213 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z' 12214 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 12215 rts 12216 12217# 12218# operand will underflow AND underflow or inexact is enabled. 12219# Therefore, we must return the result rounded to extended precision. 12220# 12221fin_sd_unfl_ena: 12222 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 12223 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 12224 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 12225 12226 mov.l %d2,-(%sp) # save d2 12227 mov.w %d1,%d2 # make a copy 12228 andi.l &0x7fff,%d1 # strip sign 12229 sub.l %d0,%d1 # subtract scale factor 12230 andi.w &0x8000,%d2 # extract old sign 12231 addi.l &0x6000,%d1 # add new bias 12232 andi.w &0x7fff,%d1 12233 or.w %d1,%d2 # concat old sign,new exp 12234 mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent 12235 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 12236 mov.l (%sp)+,%d2 # restore d2 12237 bra.b fin_sd_unfl_dis 12238 12239# 12240# operand WILL overflow. 12241# 12242fin_sd_ovfl: 12243 fmov.l &0x0,%fpsr # clear FPSR 12244 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12245 12246 fmov.x FP_SCR0(%a6),%fp0 # perform move 12247 12248 fmov.l &0x0,%fpcr # clear FPCR 12249 fmov.l %fpsr,%d1 # save FPSR 12250 12251 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12252 12253fin_sd_ovfl_tst: 12254 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 12255 12256 mov.b FPCR_ENABLE(%a6),%d1 12257 andi.b &0x13,%d1 # is OVFL or INEX enabled? 12258 bne.b fin_sd_ovfl_ena # yes 12259 12260# 12261# OVFL is not enabled; therefore, we must create the default result by 12262# calling ovf_res(). 12263# 12264fin_sd_ovfl_dis: 12265 btst &neg_bit,FPSR_CC(%a6) # is result negative? 12266 sne %d1 # set sign param accordingly 12267 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 12268 bsr.l ovf_res # calculate default result 12269 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 12270 fmovm.x (%a0),&0x80 # return default result in fp0 12271 rts 12272 12273# 12274# OVFL is enabled. 12275# the INEX2 bit has already been updated by the round to the correct precision. 12276# now, round to extended(and don't alter the FPSR). 12277# 12278fin_sd_ovfl_ena: 12279 mov.l %d2,-(%sp) # save d2 12280 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 12281 mov.l %d1,%d2 # make a copy 12282 andi.l &0x7fff,%d1 # strip sign 12283 andi.w &0x8000,%d2 # keep old sign 12284 sub.l %d0,%d1 # add scale factor 12285 sub.l &0x6000,%d1 # subtract bias 12286 andi.w &0x7fff,%d1 12287 or.w %d2,%d1 12288 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 12289 mov.l (%sp)+,%d2 # restore d2 12290 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 12291 bra.b fin_sd_ovfl_dis 12292 12293# 12294# the move in MAY overflow. so... 12295# 12296fin_sd_may_ovfl: 12297 fmov.l &0x0,%fpsr # clear FPSR 12298 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12299 12300 fmov.x FP_SCR0(%a6),%fp0 # perform the move 12301 12302 fmov.l %fpsr,%d1 # save status 12303 fmov.l &0x0,%fpcr # clear FPCR 12304 12305 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12306 12307 fabs.x %fp0,%fp1 # make a copy of result 12308 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 12309 fbge.w fin_sd_ovfl_tst # yes; overflow has occurred 12310 12311# no, it didn't overflow; we have correct result 12312 bra.w fin_sd_normal_exit 12313 12314########################################################################## 12315 12316# 12317# operand is not a NORM: check its optype and branch accordingly 12318# 12319fin_not_norm: 12320 cmpi.b %d1,&DENORM # weed out DENORM 12321 beq.w fin_denorm 12322 cmpi.b %d1,&SNAN # weed out SNANs 12323 beq.l res_snan_1op 12324 cmpi.b %d1,&QNAN # weed out QNANs 12325 beq.l res_qnan_1op 12326 12327# 12328# do the fmove in; at this point, only possible ops are ZERO and INF. 12329# use fmov to determine ccodes. 12330# prec:mode should be zero at this point but it won't affect answer anyways. 12331# 12332 fmov.x SRC(%a0),%fp0 # do fmove in 12333 fmov.l %fpsr,%d0 # no exceptions possible 12334 rol.l &0x8,%d0 # put ccodes in lo byte 12335 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes 12336 rts 12337 12338######################################################################### 12339# XDEF **************************************************************** # 12340# fdiv(): emulates the fdiv instruction # 12341# fsdiv(): emulates the fsdiv instruction # 12342# fddiv(): emulates the fddiv instruction # 12343# # 12344# XREF **************************************************************** # 12345# scale_to_zero_src() - scale src exponent to zero # 12346# scale_to_zero_dst() - scale dst exponent to zero # 12347# unf_res() - return default underflow result # 12348# ovf_res() - return default overflow result # 12349# res_qnan() - return QNAN result # 12350# res_snan() - return SNAN result # 12351# # 12352# INPUT *************************************************************** # 12353# a0 = pointer to extended precision source operand # 12354# a1 = pointer to extended precision destination operand # 12355# d0 rnd prec,mode # 12356# # 12357# OUTPUT ************************************************************** # 12358# fp0 = result # 12359# fp1 = EXOP (if exception occurred) # 12360# # 12361# ALGORITHM *********************************************************** # 12362# Handle NANs, infinities, and zeroes as special cases. Divide # 12363# norms/denorms into ext/sgl/dbl precision. # 12364# For norms/denorms, scale the exponents such that a divide # 12365# instruction won't cause an exception. Use the regular fdiv to # 12366# compute a result. Check if the regular operands would have taken # 12367# an exception. If so, return the default overflow/underflow result # 12368# and return the EXOP if exceptions are enabled. Else, scale the # 12369# result operand to the proper exponent. # 12370# # 12371######################################################################### 12372 12373 align 0x10 12374tbl_fdiv_unfl: 12375 long 0x3fff - 0x0000 # ext_unfl 12376 long 0x3fff - 0x3f81 # sgl_unfl 12377 long 0x3fff - 0x3c01 # dbl_unfl 12378 12379tbl_fdiv_ovfl: 12380 long 0x3fff - 0x7ffe # ext overflow exponent 12381 long 0x3fff - 0x407e # sgl overflow exponent 12382 long 0x3fff - 0x43fe # dbl overflow exponent 12383 12384 global fsdiv 12385fsdiv: 12386 andi.b &0x30,%d0 # clear rnd prec 12387 ori.b &s_mode*0x10,%d0 # insert sgl prec 12388 bra.b fdiv 12389 12390 global fddiv 12391fddiv: 12392 andi.b &0x30,%d0 # clear rnd prec 12393 ori.b &d_mode*0x10,%d0 # insert dbl prec 12394 12395 global fdiv 12396fdiv: 12397 mov.l %d0,L_SCR3(%a6) # store rnd info 12398 12399 clr.w %d1 12400 mov.b DTAG(%a6),%d1 12401 lsl.b &0x3,%d1 12402 or.b STAG(%a6),%d1 # combine src tags 12403 12404 bne.w fdiv_not_norm # optimize on non-norm input 12405 12406# 12407# DIVIDE: NORMs and DENORMs ONLY! 12408# 12409fdiv_norm: 12410 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 12411 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 12412 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 12413 12414 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12415 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12416 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12417 12418 bsr.l scale_to_zero_src # scale src exponent 12419 mov.l %d0,-(%sp) # save scale factor 1 12420 12421 bsr.l scale_to_zero_dst # scale dst exponent 12422 12423 neg.l (%sp) # SCALE FACTOR = scale1 - scale2 12424 add.l %d0,(%sp) 12425 12426 mov.w 2+L_SCR3(%a6),%d1 # fetch precision 12427 lsr.b &0x6,%d1 # shift to lo bits 12428 mov.l (%sp)+,%d0 # load S.F. 12429 cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow? 12430 ble.w fdiv_may_ovfl # result will overflow 12431 12432 cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow? 12433 beq.w fdiv_may_unfl # maybe 12434 bgt.w fdiv_unfl # yes; go handle underflow 12435 12436fdiv_normal: 12437 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 12438 12439 fmov.l L_SCR3(%a6),%fpcr # save FPCR 12440 fmov.l &0x0,%fpsr # clear FPSR 12441 12442 fdiv.x FP_SCR0(%a6),%fp0 # perform divide 12443 12444 fmov.l %fpsr,%d1 # save FPSR 12445 fmov.l &0x0,%fpcr # clear FPCR 12446 12447 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12448 12449fdiv_normal_exit: 12450 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack 12451 mov.l %d2,-(%sp) # store d2 12452 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 12453 mov.l %d1,%d2 # make a copy 12454 andi.l &0x7fff,%d1 # strip sign 12455 andi.w &0x8000,%d2 # keep old sign 12456 sub.l %d0,%d1 # add scale factor 12457 or.w %d2,%d1 # concat old sign,new exp 12458 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 12459 mov.l (%sp)+,%d2 # restore d2 12460 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 12461 rts 12462 12463tbl_fdiv_ovfl2: 12464 long 0x7fff 12465 long 0x407f 12466 long 0x43ff 12467 12468fdiv_no_ovfl: 12469 mov.l (%sp)+,%d0 # restore scale factor 12470 bra.b fdiv_normal_exit 12471 12472fdiv_may_ovfl: 12473 mov.l %d0,-(%sp) # save scale factor 12474 12475 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 12476 12477 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12478 fmov.l &0x0,%fpsr # set FPSR 12479 12480 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 12481 12482 fmov.l %fpsr,%d0 12483 fmov.l &0x0,%fpcr 12484 12485 or.l %d0,USER_FPSR(%a6) # save INEX,N 12486 12487 fmovm.x &0x01,-(%sp) # save result to stack 12488 mov.w (%sp),%d0 # fetch new exponent 12489 add.l &0xc,%sp # clear result from stack 12490 andi.l &0x7fff,%d0 # strip sign 12491 sub.l (%sp),%d0 # add scale factor 12492 cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4) 12493 blt.b fdiv_no_ovfl 12494 mov.l (%sp)+,%d0 12495 12496fdiv_ovfl_tst: 12497 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 12498 12499 mov.b FPCR_ENABLE(%a6),%d1 12500 andi.b &0x13,%d1 # is OVFL or INEX enabled? 12501 bne.b fdiv_ovfl_ena # yes 12502 12503fdiv_ovfl_dis: 12504 btst &neg_bit,FPSR_CC(%a6) # is result negative? 12505 sne %d1 # set sign param accordingly 12506 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 12507 bsr.l ovf_res # calculate default result 12508 or.b %d0,FPSR_CC(%a6) # set INF if applicable 12509 fmovm.x (%a0),&0x80 # return default result in fp0 12510 rts 12511 12512fdiv_ovfl_ena: 12513 mov.l L_SCR3(%a6),%d1 12514 andi.b &0xc0,%d1 # is precision extended? 12515 bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl 12516 12517fdiv_ovfl_ena_cont: 12518 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 12519 12520 mov.l %d2,-(%sp) # save d2 12521 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 12522 mov.w %d1,%d2 # make a copy 12523 andi.l &0x7fff,%d1 # strip sign 12524 sub.l %d0,%d1 # add scale factor 12525 subi.l &0x6000,%d1 # subtract bias 12526 andi.w &0x7fff,%d1 # clear sign bit 12527 andi.w &0x8000,%d2 # keep old sign 12528 or.w %d2,%d1 # concat old sign,new exp 12529 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 12530 mov.l (%sp)+,%d2 # restore d2 12531 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 12532 bra.b fdiv_ovfl_dis 12533 12534fdiv_ovfl_ena_sd: 12535 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 12536 12537 mov.l L_SCR3(%a6),%d1 12538 andi.b &0x30,%d1 # keep rnd mode 12539 fmov.l %d1,%fpcr # set FPCR 12540 12541 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 12542 12543 fmov.l &0x0,%fpcr # clear FPCR 12544 bra.b fdiv_ovfl_ena_cont 12545 12546fdiv_unfl: 12547 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 12548 12549 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 12550 12551 fmov.l &rz_mode*0x10,%fpcr # set FPCR 12552 fmov.l &0x0,%fpsr # clear FPSR 12553 12554 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 12555 12556 fmov.l %fpsr,%d1 # save status 12557 fmov.l &0x0,%fpcr # clear FPCR 12558 12559 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12560 12561 mov.b FPCR_ENABLE(%a6),%d1 12562 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 12563 bne.b fdiv_unfl_ena # yes 12564 12565fdiv_unfl_dis: 12566 fmovm.x &0x80,FP_SCR0(%a6) # store out result 12567 12568 lea FP_SCR0(%a6),%a0 # pass: result addr 12569 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 12570 bsr.l unf_res # calculate default result 12571 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set 12572 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 12573 rts 12574 12575# 12576# UNFL is enabled. 12577# 12578fdiv_unfl_ena: 12579 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 12580 12581 mov.l L_SCR3(%a6),%d1 12582 andi.b &0xc0,%d1 # is precision extended? 12583 bne.b fdiv_unfl_ena_sd # no, sgl or dbl 12584 12585 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12586 12587fdiv_unfl_ena_cont: 12588 fmov.l &0x0,%fpsr # clear FPSR 12589 12590 fdiv.x FP_SCR0(%a6),%fp1 # execute divide 12591 12592 fmov.l &0x0,%fpcr # clear FPCR 12593 12594 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 12595 mov.l %d2,-(%sp) # save d2 12596 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 12597 mov.l %d1,%d2 # make a copy 12598 andi.l &0x7fff,%d1 # strip sign 12599 andi.w &0x8000,%d2 # keep old sign 12600 sub.l %d0,%d1 # add scale factoer 12601 addi.l &0x6000,%d1 # add bias 12602 andi.w &0x7fff,%d1 12603 or.w %d2,%d1 # concat old sign,new exp 12604 mov.w %d1,FP_SCR0_EX(%a6) # insert new exp 12605 mov.l (%sp)+,%d2 # restore d2 12606 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 12607 bra.w fdiv_unfl_dis 12608 12609fdiv_unfl_ena_sd: 12610 mov.l L_SCR3(%a6),%d1 12611 andi.b &0x30,%d1 # use only rnd mode 12612 fmov.l %d1,%fpcr # set FPCR 12613 12614 bra.b fdiv_unfl_ena_cont 12615 12616# 12617# the divide operation MAY underflow: 12618# 12619fdiv_may_unfl: 12620 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 12621 12622 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12623 fmov.l &0x0,%fpsr # clear FPSR 12624 12625 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 12626 12627 fmov.l %fpsr,%d1 # save status 12628 fmov.l &0x0,%fpcr # clear FPCR 12629 12630 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12631 12632 fabs.x %fp0,%fp1 # make a copy of result 12633 fcmp.b %fp1,&0x1 # is |result| > 1.b? 12634 fbgt.w fdiv_normal_exit # no; no underflow occurred 12635 fblt.w fdiv_unfl # yes; underflow occurred 12636 12637# 12638# we still don't know if underflow occurred. result is ~ equal to 1. but, 12639# we don't know if the result was an underflow that rounded up to a 1 12640# or a normalized number that rounded down to a 1. so, redo the entire 12641# operation using RZ as the rounding mode to see what the pre-rounded 12642# result is. this case should be relatively rare. 12643# 12644 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 12645 12646 mov.l L_SCR3(%a6),%d1 12647 andi.b &0xc0,%d1 # keep rnd prec 12648 ori.b &rz_mode*0x10,%d1 # insert RZ 12649 12650 fmov.l %d1,%fpcr # set FPCR 12651 fmov.l &0x0,%fpsr # clear FPSR 12652 12653 fdiv.x FP_SCR0(%a6),%fp1 # execute divide 12654 12655 fmov.l &0x0,%fpcr # clear FPCR 12656 fabs.x %fp1 # make absolute value 12657 fcmp.b %fp1,&0x1 # is |result| < 1.b? 12658 fbge.w fdiv_normal_exit # no; no underflow occurred 12659 bra.w fdiv_unfl # yes; underflow occurred 12660 12661############################################################################ 12662 12663# 12664# Divide: inputs are not both normalized; what are they? 12665# 12666fdiv_not_norm: 12667 mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1 12668 jmp (tbl_fdiv_op.b,%pc,%d1.w*1) 12669 12670 swbeg &48 12671tbl_fdiv_op: 12672 short fdiv_norm - tbl_fdiv_op # NORM / NORM 12673 short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO 12674 short fdiv_zero_load - tbl_fdiv_op # NORM / INF 12675 short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN 12676 short fdiv_norm - tbl_fdiv_op # NORM / DENORM 12677 short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN 12678 short tbl_fdiv_op - tbl_fdiv_op # 12679 short tbl_fdiv_op - tbl_fdiv_op # 12680 12681 short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM 12682 short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO 12683 short fdiv_zero_load - tbl_fdiv_op # ZERO / INF 12684 short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN 12685 short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM 12686 short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN 12687 short tbl_fdiv_op - tbl_fdiv_op # 12688 short tbl_fdiv_op - tbl_fdiv_op # 12689 12690 short fdiv_inf_dst - tbl_fdiv_op # INF / NORM 12691 short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO 12692 short fdiv_res_operr - tbl_fdiv_op # INF / INF 12693 short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN 12694 short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM 12695 short fdiv_res_snan - tbl_fdiv_op # INF / SNAN 12696 short tbl_fdiv_op - tbl_fdiv_op # 12697 short tbl_fdiv_op - tbl_fdiv_op # 12698 12699 short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM 12700 short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO 12701 short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF 12702 short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN 12703 short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM 12704 short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN 12705 short tbl_fdiv_op - tbl_fdiv_op # 12706 short tbl_fdiv_op - tbl_fdiv_op # 12707 12708 short fdiv_norm - tbl_fdiv_op # DENORM / NORM 12709 short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO 12710 short fdiv_zero_load - tbl_fdiv_op # DENORM / INF 12711 short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN 12712 short fdiv_norm - tbl_fdiv_op # DENORM / DENORM 12713 short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN 12714 short tbl_fdiv_op - tbl_fdiv_op # 12715 short tbl_fdiv_op - tbl_fdiv_op # 12716 12717 short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM 12718 short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO 12719 short fdiv_res_snan - tbl_fdiv_op # SNAN / INF 12720 short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN 12721 short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM 12722 short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN 12723 short tbl_fdiv_op - tbl_fdiv_op # 12724 short tbl_fdiv_op - tbl_fdiv_op # 12725 12726fdiv_res_qnan: 12727 bra.l res_qnan 12728fdiv_res_snan: 12729 bra.l res_snan 12730fdiv_res_operr: 12731 bra.l res_operr 12732 12733 global fdiv_zero_load # global for fsgldiv 12734fdiv_zero_load: 12735 mov.b SRC_EX(%a0),%d0 # result sign is exclusive 12736 mov.b DST_EX(%a1),%d1 # or of input signs. 12737 eor.b %d0,%d1 12738 bpl.b fdiv_zero_load_p # result is positive 12739 fmov.s &0x80000000,%fp0 # load a -ZERO 12740 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N 12741 rts 12742fdiv_zero_load_p: 12743 fmov.s &0x00000000,%fp0 # load a +ZERO 12744 mov.b &z_bmask,FPSR_CC(%a6) # set Z 12745 rts 12746 12747# 12748# The destination was In Range and the source was a ZERO. The result, 12749# Therefore, is an INF w/ the proper sign. 12750# So, determine the sign and return a new INF (w/ the j-bit cleared). 12751# 12752 global fdiv_inf_load # global for fsgldiv 12753fdiv_inf_load: 12754 ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ 12755 mov.b SRC_EX(%a0),%d0 # load both signs 12756 mov.b DST_EX(%a1),%d1 12757 eor.b %d0,%d1 12758 bpl.b fdiv_inf_load_p # result is positive 12759 fmov.s &0xff800000,%fp0 # make result -INF 12760 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N 12761 rts 12762fdiv_inf_load_p: 12763 fmov.s &0x7f800000,%fp0 # make result +INF 12764 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 12765 rts 12766 12767# 12768# The destination was an INF w/ an In Range or ZERO source, the result is 12769# an INF w/ the proper sign. 12770# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the 12771# dst INF is set, then then j-bit of the result INF is also set). 12772# 12773 global fdiv_inf_dst # global for fsgldiv 12774fdiv_inf_dst: 12775 mov.b DST_EX(%a1),%d0 # load both signs 12776 mov.b SRC_EX(%a0),%d1 12777 eor.b %d0,%d1 12778 bpl.b fdiv_inf_dst_p # result is positive 12779 12780 fmovm.x DST(%a1),&0x80 # return result in fp0 12781 fabs.x %fp0 # clear sign bit 12782 fneg.x %fp0 # set sign bit 12783 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG 12784 rts 12785 12786fdiv_inf_dst_p: 12787 fmovm.x DST(%a1),&0x80 # return result in fp0 12788 fabs.x %fp0 # return positive INF 12789 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 12790 rts 12791 12792######################################################################### 12793# XDEF **************************************************************** # 12794# fneg(): emulates the fneg instruction # 12795# fsneg(): emulates the fsneg instruction # 12796# fdneg(): emulates the fdneg instruction # 12797# # 12798# XREF **************************************************************** # 12799# norm() - normalize a denorm to provide EXOP # 12800# scale_to_zero_src() - scale sgl/dbl source exponent # 12801# ovf_res() - return default overflow result # 12802# unf_res() - return default underflow result # 12803# res_qnan_1op() - return QNAN result # 12804# res_snan_1op() - return SNAN result # 12805# # 12806# INPUT *************************************************************** # 12807# a0 = pointer to extended precision source operand # 12808# d0 = rnd prec,mode # 12809# # 12810# OUTPUT ************************************************************** # 12811# fp0 = result # 12812# fp1 = EXOP (if exception occurred) # 12813# # 12814# ALGORITHM *********************************************************** # 12815# Handle NANs, zeroes, and infinities as special cases. Separate # 12816# norms/denorms into ext/sgl/dbl precisions. Extended precision can be # 12817# emulated by simply setting sign bit. Sgl/dbl operands must be scaled # 12818# and an actual fneg performed to see if overflow/underflow would have # 12819# occurred. If so, return default underflow/overflow result. Else, # 12820# scale the result exponent and return result. FPSR gets set based on # 12821# the result value. # 12822# # 12823######################################################################### 12824 12825 global fsneg 12826fsneg: 12827 andi.b &0x30,%d0 # clear rnd prec 12828 ori.b &s_mode*0x10,%d0 # insert sgl precision 12829 bra.b fneg 12830 12831 global fdneg 12832fdneg: 12833 andi.b &0x30,%d0 # clear rnd prec 12834 ori.b &d_mode*0x10,%d0 # insert dbl prec 12835 12836 global fneg 12837fneg: 12838 mov.l %d0,L_SCR3(%a6) # store rnd info 12839 mov.b STAG(%a6),%d1 12840 bne.w fneg_not_norm # optimize on non-norm input 12841 12842# 12843# NEGATE SIGN : norms and denorms ONLY! 12844# 12845fneg_norm: 12846 andi.b &0xc0,%d0 # is precision extended? 12847 bne.w fneg_not_ext # no; go handle sgl or dbl 12848 12849# 12850# precision selected is extended. so...we can not get an underflow 12851# or overflow because of rounding to the correct precision. so... 12852# skip the scaling and unscaling... 12853# 12854 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12855 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12856 mov.w SRC_EX(%a0),%d0 12857 eori.w &0x8000,%d0 # negate sign 12858 bpl.b fneg_norm_load # sign is positive 12859 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 12860fneg_norm_load: 12861 mov.w %d0,FP_SCR0_EX(%a6) 12862 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 12863 rts 12864 12865# 12866# for an extended precision DENORM, the UNFL exception bit is set 12867# the accrued bit is NOT set in this instance(no inexactness!) 12868# 12869fneg_denorm: 12870 andi.b &0xc0,%d0 # is precision extended? 12871 bne.b fneg_not_ext # no; go handle sgl or dbl 12872 12873 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 12874 12875 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12876 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12877 mov.w SRC_EX(%a0),%d0 12878 eori.w &0x8000,%d0 # negate sign 12879 bpl.b fneg_denorm_done # no 12880 mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit 12881fneg_denorm_done: 12882 mov.w %d0,FP_SCR0_EX(%a6) 12883 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 12884 12885 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 12886 bne.b fneg_ext_unfl_ena # yes 12887 rts 12888 12889# 12890# the input is an extended DENORM and underflow is enabled in the FPCR. 12891# normalize the mantissa and add the bias of 0x6000 to the resulting negative 12892# exponent and insert back into the operand. 12893# 12894fneg_ext_unfl_ena: 12895 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 12896 bsr.l norm # normalize result 12897 neg.w %d0 # new exponent = -(shft val) 12898 addi.w &0x6000,%d0 # add new bias to exponent 12899 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 12900 andi.w &0x8000,%d1 # keep old sign 12901 andi.w &0x7fff,%d0 # clear sign position 12902 or.w %d1,%d0 # concat old sign, new exponent 12903 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 12904 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 12905 rts 12906 12907# 12908# operand is either single or double 12909# 12910fneg_not_ext: 12911 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 12912 bne.b fneg_dbl 12913 12914# 12915# operand is to be rounded to single precision 12916# 12917fneg_sgl: 12918 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12919 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12920 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12921 bsr.l scale_to_zero_src # calculate scale factor 12922 12923 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 12924 bge.w fneg_sd_unfl # yes; go handle underflow 12925 cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 12926 beq.w fneg_sd_may_ovfl # maybe; go check 12927 blt.w fneg_sd_ovfl # yes; go handle overflow 12928 12929# 12930# operand will NOT overflow or underflow when moved in to the fp reg file 12931# 12932fneg_sd_normal: 12933 fmov.l &0x0,%fpsr # clear FPSR 12934 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12935 12936 fneg.x FP_SCR0(%a6),%fp0 # perform negation 12937 12938 fmov.l %fpsr,%d1 # save FPSR 12939 fmov.l &0x0,%fpcr # clear FPCR 12940 12941 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12942 12943fneg_sd_normal_exit: 12944 mov.l %d2,-(%sp) # save d2 12945 fmovm.x &0x80,FP_SCR0(%a6) # store out result 12946 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 12947 mov.w %d1,%d2 # make a copy 12948 andi.l &0x7fff,%d1 # strip sign 12949 sub.l %d0,%d1 # add scale factor 12950 andi.w &0x8000,%d2 # keep old sign 12951 or.w %d1,%d2 # concat old sign,new exp 12952 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 12953 mov.l (%sp)+,%d2 # restore d2 12954 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 12955 rts 12956 12957# 12958# operand is to be rounded to double precision 12959# 12960fneg_dbl: 12961 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12962 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12963 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12964 bsr.l scale_to_zero_src # calculate scale factor 12965 12966 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 12967 bge.b fneg_sd_unfl # yes; go handle underflow 12968 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 12969 beq.w fneg_sd_may_ovfl # maybe; go check 12970 blt.w fneg_sd_ovfl # yes; go handle overflow 12971 bra.w fneg_sd_normal # no; ho handle normalized op 12972 12973# 12974# operand WILL underflow when moved in to the fp register file 12975# 12976fneg_sd_unfl: 12977 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 12978 12979 eori.b &0x80,FP_SCR0_EX(%a6) # negate sign 12980 bpl.b fneg_sd_unfl_tst 12981 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit 12982 12983# if underflow or inexact is enabled, go calculate EXOP first. 12984fneg_sd_unfl_tst: 12985 mov.b FPCR_ENABLE(%a6),%d1 12986 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 12987 bne.b fneg_sd_unfl_ena # yes 12988 12989fneg_sd_unfl_dis: 12990 lea FP_SCR0(%a6),%a0 # pass: result addr 12991 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 12992 bsr.l unf_res # calculate default result 12993 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z' 12994 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 12995 rts 12996 12997# 12998# operand will underflow AND underflow is enabled. 12999# Therefore, we must return the result rounded to extended precision. 13000# 13001fneg_sd_unfl_ena: 13002 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 13003 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 13004 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 13005 13006 mov.l %d2,-(%sp) # save d2 13007 mov.l %d1,%d2 # make a copy 13008 andi.l &0x7fff,%d1 # strip sign 13009 andi.w &0x8000,%d2 # keep old sign 13010 sub.l %d0,%d1 # subtract scale factor 13011 addi.l &0x6000,%d1 # add new bias 13012 andi.w &0x7fff,%d1 13013 or.w %d2,%d1 # concat new sign,new exp 13014 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 13015 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 13016 mov.l (%sp)+,%d2 # restore d2 13017 bra.b fneg_sd_unfl_dis 13018 13019# 13020# operand WILL overflow. 13021# 13022fneg_sd_ovfl: 13023 fmov.l &0x0,%fpsr # clear FPSR 13024 fmov.l L_SCR3(%a6),%fpcr # set FPCR 13025 13026 fneg.x FP_SCR0(%a6),%fp0 # perform negation 13027 13028 fmov.l &0x0,%fpcr # clear FPCR 13029 fmov.l %fpsr,%d1 # save FPSR 13030 13031 or.l %d1,USER_FPSR(%a6) # save INEX2,N 13032 13033fneg_sd_ovfl_tst: 13034 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 13035 13036 mov.b FPCR_ENABLE(%a6),%d1 13037 andi.b &0x13,%d1 # is OVFL or INEX enabled? 13038 bne.b fneg_sd_ovfl_ena # yes 13039 13040# 13041# OVFL is not enabled; therefore, we must create the default result by 13042# calling ovf_res(). 13043# 13044fneg_sd_ovfl_dis: 13045 btst &neg_bit,FPSR_CC(%a6) # is result negative? 13046 sne %d1 # set sign param accordingly 13047 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 13048 bsr.l ovf_res # calculate default result 13049 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 13050 fmovm.x (%a0),&0x80 # return default result in fp0 13051 rts 13052 13053# 13054# OVFL is enabled. 13055# the INEX2 bit has already been updated by the round to the correct precision. 13056# now, round to extended(and don't alter the FPSR). 13057# 13058fneg_sd_ovfl_ena: 13059 mov.l %d2,-(%sp) # save d2 13060 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 13061 mov.l %d1,%d2 # make a copy 13062 andi.l &0x7fff,%d1 # strip sign 13063 andi.w &0x8000,%d2 # keep old sign 13064 sub.l %d0,%d1 # add scale factor 13065 subi.l &0x6000,%d1 # subtract bias 13066 andi.w &0x7fff,%d1 13067 or.w %d2,%d1 # concat sign,exp 13068 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 13069 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 13070 mov.l (%sp)+,%d2 # restore d2 13071 bra.b fneg_sd_ovfl_dis 13072 13073# 13074# the move in MAY underflow. so... 13075# 13076fneg_sd_may_ovfl: 13077 fmov.l &0x0,%fpsr # clear FPSR 13078 fmov.l L_SCR3(%a6),%fpcr # set FPCR 13079 13080 fneg.x FP_SCR0(%a6),%fp0 # perform negation 13081 13082 fmov.l %fpsr,%d1 # save status 13083 fmov.l &0x0,%fpcr # clear FPCR 13084 13085 or.l %d1,USER_FPSR(%a6) # save INEX2,N 13086 13087 fabs.x %fp0,%fp1 # make a copy of result 13088 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 13089 fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred 13090 13091# no, it didn't overflow; we have correct result 13092 bra.w fneg_sd_normal_exit 13093 13094########################################################################## 13095 13096# 13097# input is not normalized; what is it? 13098# 13099fneg_not_norm: 13100 cmpi.b %d1,&DENORM # weed out DENORM 13101 beq.w fneg_denorm 13102 cmpi.b %d1,&SNAN # weed out SNAN 13103 beq.l res_snan_1op 13104 cmpi.b %d1,&QNAN # weed out QNAN 13105 beq.l res_qnan_1op 13106 13107# 13108# do the fneg; at this point, only possible ops are ZERO and INF. 13109# use fneg to determine ccodes. 13110# prec:mode should be zero at this point but it won't affect answer anyways. 13111# 13112 fneg.x SRC_EX(%a0),%fp0 # do fneg 13113 fmov.l %fpsr,%d0 13114 rol.l &0x8,%d0 # put ccodes in lo byte 13115 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes 13116 rts 13117 13118######################################################################### 13119# XDEF **************************************************************** # 13120# ftst(): emulates the ftest instruction # 13121# # 13122# XREF **************************************************************** # 13123# res{s,q}nan_1op() - set NAN result for monadic instruction # 13124# # 13125# INPUT *************************************************************** # 13126# a0 = pointer to extended precision source operand # 13127# # 13128# OUTPUT ************************************************************** # 13129# none # 13130# # 13131# ALGORITHM *********************************************************** # 13132# Check the source operand tag (STAG) and set the FPCR according # 13133# to the operand type and sign. # 13134# # 13135######################################################################### 13136 13137 global ftst 13138ftst: 13139 mov.b STAG(%a6),%d1 13140 bne.b ftst_not_norm # optimize on non-norm input 13141 13142# 13143# Norm: 13144# 13145ftst_norm: 13146 tst.b SRC_EX(%a0) # is operand negative? 13147 bmi.b ftst_norm_m # yes 13148 rts 13149ftst_norm_m: 13150 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 13151 rts 13152 13153# 13154# input is not normalized; what is it? 13155# 13156ftst_not_norm: 13157 cmpi.b %d1,&ZERO # weed out ZERO 13158 beq.b ftst_zero 13159 cmpi.b %d1,&INF # weed out INF 13160 beq.b ftst_inf 13161 cmpi.b %d1,&SNAN # weed out SNAN 13162 beq.l res_snan_1op 13163 cmpi.b %d1,&QNAN # weed out QNAN 13164 beq.l res_qnan_1op 13165 13166# 13167# Denorm: 13168# 13169ftst_denorm: 13170 tst.b SRC_EX(%a0) # is operand negative? 13171 bmi.b ftst_denorm_m # yes 13172 rts 13173ftst_denorm_m: 13174 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 13175 rts 13176 13177# 13178# Infinity: 13179# 13180ftst_inf: 13181 tst.b SRC_EX(%a0) # is operand negative? 13182 bmi.b ftst_inf_m # yes 13183ftst_inf_p: 13184 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 13185 rts 13186ftst_inf_m: 13187 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits 13188 rts 13189 13190# 13191# Zero: 13192# 13193ftst_zero: 13194 tst.b SRC_EX(%a0) # is operand negative? 13195 bmi.b ftst_zero_m # yes 13196ftst_zero_p: 13197 mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit 13198 rts 13199ftst_zero_m: 13200 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 13201 rts 13202 13203######################################################################### 13204# XDEF **************************************************************** # 13205# fint(): emulates the fint instruction # 13206# # 13207# XREF **************************************************************** # 13208# res_{s,q}nan_1op() - set NAN result for monadic operation # 13209# # 13210# INPUT *************************************************************** # 13211# a0 = pointer to extended precision source operand # 13212# d0 = round precision/mode # 13213# # 13214# OUTPUT ************************************************************** # 13215# fp0 = result # 13216# # 13217# ALGORITHM *********************************************************** # 13218# Separate according to operand type. Unnorms don't pass through # 13219# here. For norms, load the rounding mode/prec, execute a "fint", then # 13220# store the resulting FPSR bits. # 13221# For denorms, force the j-bit to a one and do the same as for # 13222# norms. Denorms are so low that the answer will either be a zero or a # 13223# one. # 13224# For zeroes/infs/NANs, return the same while setting the FPSR # 13225# as appropriate. # 13226# # 13227######################################################################### 13228 13229 global fint 13230fint: 13231 mov.b STAG(%a6),%d1 13232 bne.b fint_not_norm # optimize on non-norm input 13233 13234# 13235# Norm: 13236# 13237fint_norm: 13238 andi.b &0x30,%d0 # set prec = ext 13239 13240 fmov.l %d0,%fpcr # set FPCR 13241 fmov.l &0x0,%fpsr # clear FPSR 13242 13243 fint.x SRC(%a0),%fp0 # execute fint 13244 13245 fmov.l &0x0,%fpcr # clear FPCR 13246 fmov.l %fpsr,%d0 # save FPSR 13247 or.l %d0,USER_FPSR(%a6) # set exception bits 13248 13249 rts 13250 13251# 13252# input is not normalized; what is it? 13253# 13254fint_not_norm: 13255 cmpi.b %d1,&ZERO # weed out ZERO 13256 beq.b fint_zero 13257 cmpi.b %d1,&INF # weed out INF 13258 beq.b fint_inf 13259 cmpi.b %d1,&DENORM # weed out DENORM 13260 beq.b fint_denorm 13261 cmpi.b %d1,&SNAN # weed out SNAN 13262 beq.l res_snan_1op 13263 bra.l res_qnan_1op # weed out QNAN 13264 13265# 13266# Denorm: 13267# 13268# for DENORMs, the result will be either (+/-)ZERO or (+/-)1. 13269# also, the INEX2 and AINEX exception bits will be set. 13270# so, we could either set these manually or force the DENORM 13271# to a very small NORM and ship it to the NORM routine. 13272# I do the latter. 13273# 13274fint_denorm: 13275 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp 13276 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM 13277 lea FP_SCR0(%a6),%a0 13278 bra.b fint_norm 13279 13280# 13281# Zero: 13282# 13283fint_zero: 13284 tst.b SRC_EX(%a0) # is ZERO negative? 13285 bmi.b fint_zero_m # yes 13286fint_zero_p: 13287 fmov.s &0x00000000,%fp0 # return +ZERO in fp0 13288 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 13289 rts 13290fint_zero_m: 13291 fmov.s &0x80000000,%fp0 # return -ZERO in fp0 13292 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 13293 rts 13294 13295# 13296# Infinity: 13297# 13298fint_inf: 13299 fmovm.x SRC(%a0),&0x80 # return result in fp0 13300 tst.b SRC_EX(%a0) # is INF negative? 13301 bmi.b fint_inf_m # yes 13302fint_inf_p: 13303 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 13304 rts 13305fint_inf_m: 13306 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits 13307 rts 13308 13309######################################################################### 13310# XDEF **************************************************************** # 13311# fintrz(): emulates the fintrz instruction # 13312# # 13313# XREF **************************************************************** # 13314# res_{s,q}nan_1op() - set NAN result for monadic operation # 13315# # 13316# INPUT *************************************************************** # 13317# a0 = pointer to extended precision source operand # 13318# d0 = round precision/mode # 13319# # 13320# OUTPUT ************************************************************** # 13321# fp0 = result # 13322# # 13323# ALGORITHM *********************************************************** # 13324# Separate according to operand type. Unnorms don't pass through # 13325# here. For norms, load the rounding mode/prec, execute a "fintrz", # 13326# then store the resulting FPSR bits. # 13327# For denorms, force the j-bit to a one and do the same as for # 13328# norms. Denorms are so low that the answer will either be a zero or a # 13329# one. # 13330# For zeroes/infs/NANs, return the same while setting the FPSR # 13331# as appropriate. # 13332# # 13333######################################################################### 13334 13335 global fintrz 13336fintrz: 13337 mov.b STAG(%a6),%d1 13338 bne.b fintrz_not_norm # optimize on non-norm input 13339 13340# 13341# Norm: 13342# 13343fintrz_norm: 13344 fmov.l &0x0,%fpsr # clear FPSR 13345 13346 fintrz.x SRC(%a0),%fp0 # execute fintrz 13347 13348 fmov.l %fpsr,%d0 # save FPSR 13349 or.l %d0,USER_FPSR(%a6) # set exception bits 13350 13351 rts 13352 13353# 13354# input is not normalized; what is it? 13355# 13356fintrz_not_norm: 13357 cmpi.b %d1,&ZERO # weed out ZERO 13358 beq.b fintrz_zero 13359 cmpi.b %d1,&INF # weed out INF 13360 beq.b fintrz_inf 13361 cmpi.b %d1,&DENORM # weed out DENORM 13362 beq.b fintrz_denorm 13363 cmpi.b %d1,&SNAN # weed out SNAN 13364 beq.l res_snan_1op 13365 bra.l res_qnan_1op # weed out QNAN 13366 13367# 13368# Denorm: 13369# 13370# for DENORMs, the result will be (+/-)ZERO. 13371# also, the INEX2 and AINEX exception bits will be set. 13372# so, we could either set these manually or force the DENORM 13373# to a very small NORM and ship it to the NORM routine. 13374# I do the latter. 13375# 13376fintrz_denorm: 13377 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp 13378 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM 13379 lea FP_SCR0(%a6),%a0 13380 bra.b fintrz_norm 13381 13382# 13383# Zero: 13384# 13385fintrz_zero: 13386 tst.b SRC_EX(%a0) # is ZERO negative? 13387 bmi.b fintrz_zero_m # yes 13388fintrz_zero_p: 13389 fmov.s &0x00000000,%fp0 # return +ZERO in fp0 13390 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 13391 rts 13392fintrz_zero_m: 13393 fmov.s &0x80000000,%fp0 # return -ZERO in fp0 13394 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 13395 rts 13396 13397# 13398# Infinity: 13399# 13400fintrz_inf: 13401 fmovm.x SRC(%a0),&0x80 # return result in fp0 13402 tst.b SRC_EX(%a0) # is INF negative? 13403 bmi.b fintrz_inf_m # yes 13404fintrz_inf_p: 13405 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 13406 rts 13407fintrz_inf_m: 13408 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits 13409 rts 13410 13411######################################################################### 13412# XDEF **************************************************************** # 13413# fabs(): emulates the fabs instruction # 13414# fsabs(): emulates the fsabs instruction # 13415# fdabs(): emulates the fdabs instruction # 13416# # 13417# XREF **************************************************************** # 13418# norm() - normalize denorm mantissa to provide EXOP # 13419# scale_to_zero_src() - make exponent. = 0; get scale factor # 13420# unf_res() - calculate underflow result # 13421# ovf_res() - calculate overflow result # 13422# res_{s,q}nan_1op() - set NAN result for monadic operation # 13423# # 13424# INPUT *************************************************************** # 13425# a0 = pointer to extended precision source operand # 13426# d0 = rnd precision/mode # 13427# # 13428# OUTPUT ************************************************************** # 13429# fp0 = result # 13430# fp1 = EXOP (if exception occurred) # 13431# # 13432# ALGORITHM *********************************************************** # 13433# Handle NANs, infinities, and zeroes as special cases. Divide # 13434# norms into extended, single, and double precision. # 13435# Simply clear sign for extended precision norm. Ext prec denorm # 13436# gets an EXOP created for it since it's an underflow. # 13437# Double and single precision can overflow and underflow. First, # 13438# scale the operand such that the exponent is zero. Perform an "fabs" # 13439# using the correct rnd mode/prec. Check to see if the original # 13440# exponent would take an exception. If so, use unf_res() or ovf_res() # 13441# to calculate the default result. Also, create the EXOP for the # 13442# exceptional case. If no exception should occur, insert the correct # 13443# result exponent and return. # 13444# Unnorms don't pass through here. # 13445# # 13446######################################################################### 13447 13448 global fsabs 13449fsabs: 13450 andi.b &0x30,%d0 # clear rnd prec 13451 ori.b &s_mode*0x10,%d0 # insert sgl precision 13452 bra.b fabs 13453 13454 global fdabs 13455fdabs: 13456 andi.b &0x30,%d0 # clear rnd prec 13457 ori.b &d_mode*0x10,%d0 # insert dbl precision 13458 13459 global fabs 13460fabs: 13461 mov.l %d0,L_SCR3(%a6) # store rnd info 13462 mov.b STAG(%a6),%d1 13463 bne.w fabs_not_norm # optimize on non-norm input 13464 13465# 13466# ABSOLUTE VALUE: norms and denorms ONLY! 13467# 13468fabs_norm: 13469 andi.b &0xc0,%d0 # is precision extended? 13470 bne.b fabs_not_ext # no; go handle sgl or dbl 13471 13472# 13473# precision selected is extended. so...we can not get an underflow 13474# or overflow because of rounding to the correct precision. so... 13475# skip the scaling and unscaling... 13476# 13477 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 13478 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 13479 mov.w SRC_EX(%a0),%d1 13480 bclr &15,%d1 # force absolute value 13481 mov.w %d1,FP_SCR0_EX(%a6) # insert exponent 13482 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 13483 rts 13484 13485# 13486# for an extended precision DENORM, the UNFL exception bit is set 13487# the accrued bit is NOT set in this instance(no inexactness!) 13488# 13489fabs_denorm: 13490 andi.b &0xc0,%d0 # is precision extended? 13491 bne.b fabs_not_ext # no 13492 13493 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 13494 13495 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 13496 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 13497 mov.w SRC_EX(%a0),%d0 13498 bclr &15,%d0 # clear sign 13499 mov.w %d0,FP_SCR0_EX(%a6) # insert exponent 13500 13501 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 13502 13503 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 13504 bne.b fabs_ext_unfl_ena 13505 rts 13506 13507# 13508# the input is an extended DENORM and underflow is enabled in the FPCR. 13509# normalize the mantissa and add the bias of 0x6000 to the resulting negative 13510# exponent and insert back into the operand. 13511# 13512fabs_ext_unfl_ena: 13513 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 13514 bsr.l norm # normalize result 13515 neg.w %d0 # new exponent = -(shft val) 13516 addi.w &0x6000,%d0 # add new bias to exponent 13517 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 13518 andi.w &0x8000,%d1 # keep old sign 13519 andi.w &0x7fff,%d0 # clear sign position 13520 or.w %d1,%d0 # concat old sign, new exponent 13521 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 13522 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 13523 rts 13524 13525# 13526# operand is either single or double 13527# 13528fabs_not_ext: 13529 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 13530 bne.b fabs_dbl 13531 13532# 13533# operand is to be rounded to single precision 13534# 13535fabs_sgl: 13536 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 13537 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 13538 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 13539 bsr.l scale_to_zero_src # calculate scale factor 13540 13541 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 13542 bge.w fabs_sd_unfl # yes; go handle underflow 13543 cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 13544 beq.w fabs_sd_may_ovfl # maybe; go check 13545 blt.w fabs_sd_ovfl # yes; go handle overflow 13546 13547# 13548# operand will NOT overflow or underflow when moved in to the fp reg file 13549# 13550fabs_sd_normal: 13551 fmov.l &0x0,%fpsr # clear FPSR 13552 fmov.l L_SCR3(%a6),%fpcr # set FPCR 13553 13554 fabs.x FP_SCR0(%a6),%fp0 # perform absolute 13555 13556 fmov.l %fpsr,%d1 # save FPSR 13557 fmov.l &0x0,%fpcr # clear FPCR 13558 13559 or.l %d1,USER_FPSR(%a6) # save INEX2,N 13560 13561fabs_sd_normal_exit: 13562 mov.l %d2,-(%sp) # save d2 13563 fmovm.x &0x80,FP_SCR0(%a6) # store out result 13564 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 13565 mov.l %d1,%d2 # make a copy 13566 andi.l &0x7fff,%d1 # strip sign 13567 sub.l %d0,%d1 # add scale factor 13568 andi.w &0x8000,%d2 # keep old sign 13569 or.w %d1,%d2 # concat old sign,new exp 13570 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 13571 mov.l (%sp)+,%d2 # restore d2 13572 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 13573 rts 13574 13575# 13576# operand is to be rounded to double precision 13577# 13578fabs_dbl: 13579 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 13580 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 13581 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 13582 bsr.l scale_to_zero_src # calculate scale factor 13583 13584 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 13585 bge.b fabs_sd_unfl # yes; go handle underflow 13586 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 13587 beq.w fabs_sd_may_ovfl # maybe; go check 13588 blt.w fabs_sd_ovfl # yes; go handle overflow 13589 bra.w fabs_sd_normal # no; ho handle normalized op 13590 13591# 13592# operand WILL underflow when moved in to the fp register file 13593# 13594fabs_sd_unfl: 13595 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 13596 13597 bclr &0x7,FP_SCR0_EX(%a6) # force absolute value 13598 13599# if underflow or inexact is enabled, go calculate EXOP first. 13600 mov.b FPCR_ENABLE(%a6),%d1 13601 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 13602 bne.b fabs_sd_unfl_ena # yes 13603 13604fabs_sd_unfl_dis: 13605 lea FP_SCR0(%a6),%a0 # pass: result addr 13606 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 13607 bsr.l unf_res # calculate default result 13608 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode 13609 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 13610 rts 13611 13612# 13613# operand will underflow AND underflow is enabled. 13614# Therefore, we must return the result rounded to extended precision. 13615# 13616fabs_sd_unfl_ena: 13617 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 13618 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 13619 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 13620 13621 mov.l %d2,-(%sp) # save d2 13622 mov.l %d1,%d2 # make a copy 13623 andi.l &0x7fff,%d1 # strip sign 13624 andi.w &0x8000,%d2 # keep old sign 13625 sub.l %d0,%d1 # subtract scale factor 13626 addi.l &0x6000,%d1 # add new bias 13627 andi.w &0x7fff,%d1 13628 or.w %d2,%d1 # concat new sign,new exp 13629 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 13630 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 13631 mov.l (%sp)+,%d2 # restore d2 13632 bra.b fabs_sd_unfl_dis 13633 13634# 13635# operand WILL overflow. 13636# 13637fabs_sd_ovfl: 13638 fmov.l &0x0,%fpsr # clear FPSR 13639 fmov.l L_SCR3(%a6),%fpcr # set FPCR 13640 13641 fabs.x FP_SCR0(%a6),%fp0 # perform absolute 13642 13643 fmov.l &0x0,%fpcr # clear FPCR 13644 fmov.l %fpsr,%d1 # save FPSR 13645 13646 or.l %d1,USER_FPSR(%a6) # save INEX2,N 13647 13648fabs_sd_ovfl_tst: 13649 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 13650 13651 mov.b FPCR_ENABLE(%a6),%d1 13652 andi.b &0x13,%d1 # is OVFL or INEX enabled? 13653 bne.b fabs_sd_ovfl_ena # yes 13654 13655# 13656# OVFL is not enabled; therefore, we must create the default result by 13657# calling ovf_res(). 13658# 13659fabs_sd_ovfl_dis: 13660 btst &neg_bit,FPSR_CC(%a6) # is result negative? 13661 sne %d1 # set sign param accordingly 13662 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 13663 bsr.l ovf_res # calculate default result 13664 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 13665 fmovm.x (%a0),&0x80 # return default result in fp0 13666 rts 13667 13668# 13669# OVFL is enabled. 13670# the INEX2 bit has already been updated by the round to the correct precision. 13671# now, round to extended(and don't alter the FPSR). 13672# 13673fabs_sd_ovfl_ena: 13674 mov.l %d2,-(%sp) # save d2 13675 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 13676 mov.l %d1,%d2 # make a copy 13677 andi.l &0x7fff,%d1 # strip sign 13678 andi.w &0x8000,%d2 # keep old sign 13679 sub.l %d0,%d1 # add scale factor 13680 subi.l &0x6000,%d1 # subtract bias 13681 andi.w &0x7fff,%d1 13682 or.w %d2,%d1 # concat sign,exp 13683 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 13684 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 13685 mov.l (%sp)+,%d2 # restore d2 13686 bra.b fabs_sd_ovfl_dis 13687 13688# 13689# the move in MAY underflow. so... 13690# 13691fabs_sd_may_ovfl: 13692 fmov.l &0x0,%fpsr # clear FPSR 13693 fmov.l L_SCR3(%a6),%fpcr # set FPCR 13694 13695 fabs.x FP_SCR0(%a6),%fp0 # perform absolute 13696 13697 fmov.l %fpsr,%d1 # save status 13698 fmov.l &0x0,%fpcr # clear FPCR 13699 13700 or.l %d1,USER_FPSR(%a6) # save INEX2,N 13701 13702 fabs.x %fp0,%fp1 # make a copy of result 13703 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 13704 fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred 13705 13706# no, it didn't overflow; we have correct result 13707 bra.w fabs_sd_normal_exit 13708 13709########################################################################## 13710 13711# 13712# input is not normalized; what is it? 13713# 13714fabs_not_norm: 13715 cmpi.b %d1,&DENORM # weed out DENORM 13716 beq.w fabs_denorm 13717 cmpi.b %d1,&SNAN # weed out SNAN 13718 beq.l res_snan_1op 13719 cmpi.b %d1,&QNAN # weed out QNAN 13720 beq.l res_qnan_1op 13721 13722 fabs.x SRC(%a0),%fp0 # force absolute value 13723 13724 cmpi.b %d1,&INF # weed out INF 13725 beq.b fabs_inf 13726fabs_zero: 13727 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 13728 rts 13729fabs_inf: 13730 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 13731 rts 13732 13733######################################################################### 13734# XDEF **************************************************************** # 13735# fcmp(): fp compare op routine # 13736# # 13737# XREF **************************************************************** # 13738# res_qnan() - return QNAN result # 13739# res_snan() - return SNAN result # 13740# # 13741# INPUT *************************************************************** # 13742# a0 = pointer to extended precision source operand # 13743# a1 = pointer to extended precision destination operand # 13744# d0 = round prec/mode # 13745# # 13746# OUTPUT ************************************************************** # 13747# None # 13748# # 13749# ALGORITHM *********************************************************** # 13750# Handle NANs and denorms as special cases. For everything else, # 13751# just use the actual fcmp instruction to produce the correct condition # 13752# codes. # 13753# # 13754######################################################################### 13755 13756 global fcmp 13757fcmp: 13758 clr.w %d1 13759 mov.b DTAG(%a6),%d1 13760 lsl.b &0x3,%d1 13761 or.b STAG(%a6),%d1 13762 bne.b fcmp_not_norm # optimize on non-norm input 13763 13764# 13765# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs 13766# 13767fcmp_norm: 13768 fmovm.x DST(%a1),&0x80 # load dst op 13769 13770 fcmp.x %fp0,SRC(%a0) # do compare 13771 13772 fmov.l %fpsr,%d0 # save FPSR 13773 rol.l &0x8,%d0 # extract ccode bits 13774 mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set) 13775 13776 rts 13777 13778# 13779# fcmp: inputs are not both normalized; what are they? 13780# 13781fcmp_not_norm: 13782 mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1 13783 jmp (tbl_fcmp_op.b,%pc,%d1.w*1) 13784 13785 swbeg &48 13786tbl_fcmp_op: 13787 short fcmp_norm - tbl_fcmp_op # NORM - NORM 13788 short fcmp_norm - tbl_fcmp_op # NORM - ZERO 13789 short fcmp_norm - tbl_fcmp_op # NORM - INF 13790 short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN 13791 short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM 13792 short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN 13793 short tbl_fcmp_op - tbl_fcmp_op # 13794 short tbl_fcmp_op - tbl_fcmp_op # 13795 13796 short fcmp_norm - tbl_fcmp_op # ZERO - NORM 13797 short fcmp_norm - tbl_fcmp_op # ZERO - ZERO 13798 short fcmp_norm - tbl_fcmp_op # ZERO - INF 13799 short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN 13800 short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM 13801 short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN 13802 short tbl_fcmp_op - tbl_fcmp_op # 13803 short tbl_fcmp_op - tbl_fcmp_op # 13804 13805 short fcmp_norm - tbl_fcmp_op # INF - NORM 13806 short fcmp_norm - tbl_fcmp_op # INF - ZERO 13807 short fcmp_norm - tbl_fcmp_op # INF - INF 13808 short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN 13809 short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM 13810 short fcmp_res_snan - tbl_fcmp_op # INF - SNAN 13811 short tbl_fcmp_op - tbl_fcmp_op # 13812 short tbl_fcmp_op - tbl_fcmp_op # 13813 13814 short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM 13815 short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO 13816 short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF 13817 short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN 13818 short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM 13819 short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN 13820 short tbl_fcmp_op - tbl_fcmp_op # 13821 short tbl_fcmp_op - tbl_fcmp_op # 13822 13823 short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM 13824 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO 13825 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF 13826 short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN 13827 short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM 13828 short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN 13829 short tbl_fcmp_op - tbl_fcmp_op # 13830 short tbl_fcmp_op - tbl_fcmp_op # 13831 13832 short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM 13833 short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO 13834 short fcmp_res_snan - tbl_fcmp_op # SNAN - INF 13835 short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN 13836 short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM 13837 short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN 13838 short tbl_fcmp_op - tbl_fcmp_op # 13839 short tbl_fcmp_op - tbl_fcmp_op # 13840 13841# unlike all other functions for QNAN and SNAN, fcmp does NOT set the 13842# 'N' bit for a negative QNAN or SNAN input so we must squelch it here. 13843fcmp_res_qnan: 13844 bsr.l res_qnan 13845 andi.b &0xf7,FPSR_CC(%a6) 13846 rts 13847fcmp_res_snan: 13848 bsr.l res_snan 13849 andi.b &0xf7,FPSR_CC(%a6) 13850 rts 13851 13852# 13853# DENORMs are a little more difficult. 13854# If you have a 2 DENORMs, then you can just force the j-bit to a one 13855# and use the fcmp_norm routine. 13856# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one 13857# and use the fcmp_norm routine. 13858# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also. 13859# But with a DENORM and a NORM of the same sign, the neg bit is set if the 13860# (1) signs are (+) and the DENORM is the dst or 13861# (2) signs are (-) and the DENORM is the src 13862# 13863 13864fcmp_dnrm_s: 13865 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 13866 mov.l SRC_HI(%a0),%d0 13867 bset &31,%d0 # DENORM src; make into small norm 13868 mov.l %d0,FP_SCR0_HI(%a6) 13869 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 13870 lea FP_SCR0(%a6),%a0 13871 bra.w fcmp_norm 13872 13873fcmp_dnrm_d: 13874 mov.l DST_EX(%a1),FP_SCR0_EX(%a6) 13875 mov.l DST_HI(%a1),%d0 13876 bset &31,%d0 # DENORM src; make into small norm 13877 mov.l %d0,FP_SCR0_HI(%a6) 13878 mov.l DST_LO(%a1),FP_SCR0_LO(%a6) 13879 lea FP_SCR0(%a6),%a1 13880 bra.w fcmp_norm 13881 13882fcmp_dnrm_sd: 13883 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 13884 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 13885 mov.l DST_HI(%a1),%d0 13886 bset &31,%d0 # DENORM dst; make into small norm 13887 mov.l %d0,FP_SCR1_HI(%a6) 13888 mov.l SRC_HI(%a0),%d0 13889 bset &31,%d0 # DENORM dst; make into small norm 13890 mov.l %d0,FP_SCR0_HI(%a6) 13891 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 13892 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 13893 lea FP_SCR1(%a6),%a1 13894 lea FP_SCR0(%a6),%a0 13895 bra.w fcmp_norm 13896 13897fcmp_nrm_dnrm: 13898 mov.b SRC_EX(%a0),%d0 # determine if like signs 13899 mov.b DST_EX(%a1),%d1 13900 eor.b %d0,%d1 13901 bmi.w fcmp_dnrm_s 13902 13903# signs are the same, so must determine the answer ourselves. 13904 tst.b %d0 # is src op negative? 13905 bmi.b fcmp_nrm_dnrm_m # yes 13906 rts 13907fcmp_nrm_dnrm_m: 13908 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 13909 rts 13910 13911fcmp_dnrm_nrm: 13912 mov.b SRC_EX(%a0),%d0 # determine if like signs 13913 mov.b DST_EX(%a1),%d1 13914 eor.b %d0,%d1 13915 bmi.w fcmp_dnrm_d 13916 13917# signs are the same, so must determine the answer ourselves. 13918 tst.b %d0 # is src op negative? 13919 bpl.b fcmp_dnrm_nrm_m # no 13920 rts 13921fcmp_dnrm_nrm_m: 13922 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 13923 rts 13924 13925######################################################################### 13926# XDEF **************************************************************** # 13927# fsglmul(): emulates the fsglmul instruction # 13928# # 13929# XREF **************************************************************** # 13930# scale_to_zero_src() - scale src exponent to zero # 13931# scale_to_zero_dst() - scale dst exponent to zero # 13932# unf_res4() - return default underflow result for sglop # 13933# ovf_res() - return default overflow result # 13934# res_qnan() - return QNAN result # 13935# res_snan() - return SNAN result # 13936# # 13937# INPUT *************************************************************** # 13938# a0 = pointer to extended precision source operand # 13939# a1 = pointer to extended precision destination operand # 13940# d0 rnd prec,mode # 13941# # 13942# OUTPUT ************************************************************** # 13943# fp0 = result # 13944# fp1 = EXOP (if exception occurred) # 13945# # 13946# ALGORITHM *********************************************************** # 13947# Handle NANs, infinities, and zeroes as special cases. Divide # 13948# norms/denorms into ext/sgl/dbl precision. # 13949# For norms/denorms, scale the exponents such that a multiply # 13950# instruction won't cause an exception. Use the regular fsglmul to # 13951# compute a result. Check if the regular operands would have taken # 13952# an exception. If so, return the default overflow/underflow result # 13953# and return the EXOP if exceptions are enabled. Else, scale the # 13954# result operand to the proper exponent. # 13955# # 13956######################################################################### 13957 13958 global fsglmul 13959fsglmul: 13960 mov.l %d0,L_SCR3(%a6) # store rnd info 13961 13962 clr.w %d1 13963 mov.b DTAG(%a6),%d1 13964 lsl.b &0x3,%d1 13965 or.b STAG(%a6),%d1 13966 13967 bne.w fsglmul_not_norm # optimize on non-norm input 13968 13969fsglmul_norm: 13970 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 13971 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 13972 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 13973 13974 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 13975 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 13976 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 13977 13978 bsr.l scale_to_zero_src # scale exponent 13979 mov.l %d0,-(%sp) # save scale factor 1 13980 13981 bsr.l scale_to_zero_dst # scale dst exponent 13982 13983 add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2 13984 13985 cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl? 13986 beq.w fsglmul_may_ovfl # result may rnd to overflow 13987 blt.w fsglmul_ovfl # result will overflow 13988 13989 cmpi.l %d0,&0x3fff+0x0001 # would result unfl? 13990 beq.w fsglmul_may_unfl # result may rnd to no unfl 13991 bgt.w fsglmul_unfl # result will underflow 13992 13993fsglmul_normal: 13994 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 13995 13996 fmov.l L_SCR3(%a6),%fpcr # set FPCR 13997 fmov.l &0x0,%fpsr # clear FPSR 13998 13999 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 14000 14001 fmov.l %fpsr,%d1 # save status 14002 fmov.l &0x0,%fpcr # clear FPCR 14003 14004 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14005 14006fsglmul_normal_exit: 14007 fmovm.x &0x80,FP_SCR0(%a6) # store out result 14008 mov.l %d2,-(%sp) # save d2 14009 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 14010 mov.l %d1,%d2 # make a copy 14011 andi.l &0x7fff,%d1 # strip sign 14012 andi.w &0x8000,%d2 # keep old sign 14013 sub.l %d0,%d1 # add scale factor 14014 or.w %d2,%d1 # concat old sign,new exp 14015 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 14016 mov.l (%sp)+,%d2 # restore d2 14017 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 14018 rts 14019 14020fsglmul_ovfl: 14021 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14022 14023 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14024 fmov.l &0x0,%fpsr # clear FPSR 14025 14026 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 14027 14028 fmov.l %fpsr,%d1 # save status 14029 fmov.l &0x0,%fpcr # clear FPCR 14030 14031 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14032 14033fsglmul_ovfl_tst: 14034 14035# save setting this until now because this is where fsglmul_may_ovfl may jump in 14036 or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex 14037 14038 mov.b FPCR_ENABLE(%a6),%d1 14039 andi.b &0x13,%d1 # is OVFL or INEX enabled? 14040 bne.b fsglmul_ovfl_ena # yes 14041 14042fsglmul_ovfl_dis: 14043 btst &neg_bit,FPSR_CC(%a6) # is result negative? 14044 sne %d1 # set sign param accordingly 14045 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 14046 andi.b &0x30,%d0 # force prec = ext 14047 bsr.l ovf_res # calculate default result 14048 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 14049 fmovm.x (%a0),&0x80 # return default result in fp0 14050 rts 14051 14052fsglmul_ovfl_ena: 14053 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 14054 14055 mov.l %d2,-(%sp) # save d2 14056 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 14057 mov.l %d1,%d2 # make a copy 14058 andi.l &0x7fff,%d1 # strip sign 14059 sub.l %d0,%d1 # add scale factor 14060 subi.l &0x6000,%d1 # subtract bias 14061 andi.w &0x7fff,%d1 14062 andi.w &0x8000,%d2 # keep old sign 14063 or.w %d2,%d1 # concat old sign,new exp 14064 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 14065 mov.l (%sp)+,%d2 # restore d2 14066 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 14067 bra.b fsglmul_ovfl_dis 14068 14069fsglmul_may_ovfl: 14070 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14071 14072 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14073 fmov.l &0x0,%fpsr # clear FPSR 14074 14075 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 14076 14077 fmov.l %fpsr,%d1 # save status 14078 fmov.l &0x0,%fpcr # clear FPCR 14079 14080 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14081 14082 fabs.x %fp0,%fp1 # make a copy of result 14083 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 14084 fbge.w fsglmul_ovfl_tst # yes; overflow has occurred 14085 14086# no, it didn't overflow; we have correct result 14087 bra.w fsglmul_normal_exit 14088 14089fsglmul_unfl: 14090 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 14091 14092 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14093 14094 fmov.l &rz_mode*0x10,%fpcr # set FPCR 14095 fmov.l &0x0,%fpsr # clear FPSR 14096 14097 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 14098 14099 fmov.l %fpsr,%d1 # save status 14100 fmov.l &0x0,%fpcr # clear FPCR 14101 14102 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14103 14104 mov.b FPCR_ENABLE(%a6),%d1 14105 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 14106 bne.b fsglmul_unfl_ena # yes 14107 14108fsglmul_unfl_dis: 14109 fmovm.x &0x80,FP_SCR0(%a6) # store out result 14110 14111 lea FP_SCR0(%a6),%a0 # pass: result addr 14112 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 14113 bsr.l unf_res4 # calculate default result 14114 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 14115 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 14116 rts 14117 14118# 14119# UNFL is enabled. 14120# 14121fsglmul_unfl_ena: 14122 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 14123 14124 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14125 fmov.l &0x0,%fpsr # clear FPSR 14126 14127 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply 14128 14129 fmov.l &0x0,%fpcr # clear FPCR 14130 14131 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 14132 mov.l %d2,-(%sp) # save d2 14133 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 14134 mov.l %d1,%d2 # make a copy 14135 andi.l &0x7fff,%d1 # strip sign 14136 andi.w &0x8000,%d2 # keep old sign 14137 sub.l %d0,%d1 # add scale factor 14138 addi.l &0x6000,%d1 # add bias 14139 andi.w &0x7fff,%d1 14140 or.w %d2,%d1 # concat old sign,new exp 14141 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 14142 mov.l (%sp)+,%d2 # restore d2 14143 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 14144 bra.w fsglmul_unfl_dis 14145 14146fsglmul_may_unfl: 14147 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14148 14149 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14150 fmov.l &0x0,%fpsr # clear FPSR 14151 14152 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 14153 14154 fmov.l %fpsr,%d1 # save status 14155 fmov.l &0x0,%fpcr # clear FPCR 14156 14157 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14158 14159 fabs.x %fp0,%fp1 # make a copy of result 14160 fcmp.b %fp1,&0x2 # is |result| > 2.b? 14161 fbgt.w fsglmul_normal_exit # no; no underflow occurred 14162 fblt.w fsglmul_unfl # yes; underflow occurred 14163 14164# 14165# we still don't know if underflow occurred. result is ~ equal to 2. but, 14166# we don't know if the result was an underflow that rounded up to a 2 or 14167# a normalized number that rounded down to a 2. so, redo the entire operation 14168# using RZ as the rounding mode to see what the pre-rounded result is. 14169# this case should be relatively rare. 14170# 14171 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 14172 14173 mov.l L_SCR3(%a6),%d1 14174 andi.b &0xc0,%d1 # keep rnd prec 14175 ori.b &rz_mode*0x10,%d1 # insert RZ 14176 14177 fmov.l %d1,%fpcr # set FPCR 14178 fmov.l &0x0,%fpsr # clear FPSR 14179 14180 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply 14181 14182 fmov.l &0x0,%fpcr # clear FPCR 14183 fabs.x %fp1 # make absolute value 14184 fcmp.b %fp1,&0x2 # is |result| < 2.b? 14185 fbge.w fsglmul_normal_exit # no; no underflow occurred 14186 bra.w fsglmul_unfl # yes, underflow occurred 14187 14188############################################################################## 14189 14190# 14191# Single Precision Multiply: inputs are not both normalized; what are they? 14192# 14193fsglmul_not_norm: 14194 mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1 14195 jmp (tbl_fsglmul_op.b,%pc,%d1.w*1) 14196 14197 swbeg &48 14198tbl_fsglmul_op: 14199 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM 14200 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO 14201 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF 14202 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN 14203 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM 14204 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN 14205 short tbl_fsglmul_op - tbl_fsglmul_op # 14206 short tbl_fsglmul_op - tbl_fsglmul_op # 14207 14208 short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM 14209 short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO 14210 short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF 14211 short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN 14212 short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM 14213 short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN 14214 short tbl_fsglmul_op - tbl_fsglmul_op # 14215 short tbl_fsglmul_op - tbl_fsglmul_op # 14216 14217 short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM 14218 short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO 14219 short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF 14220 short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN 14221 short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM 14222 short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN 14223 short tbl_fsglmul_op - tbl_fsglmul_op # 14224 short tbl_fsglmul_op - tbl_fsglmul_op # 14225 14226 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM 14227 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO 14228 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF 14229 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN 14230 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM 14231 short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN 14232 short tbl_fsglmul_op - tbl_fsglmul_op # 14233 short tbl_fsglmul_op - tbl_fsglmul_op # 14234 14235 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM 14236 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO 14237 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF 14238 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN 14239 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM 14240 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN 14241 short tbl_fsglmul_op - tbl_fsglmul_op # 14242 short tbl_fsglmul_op - tbl_fsglmul_op # 14243 14244 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM 14245 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO 14246 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF 14247 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN 14248 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM 14249 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN 14250 short tbl_fsglmul_op - tbl_fsglmul_op # 14251 short tbl_fsglmul_op - tbl_fsglmul_op # 14252 14253fsglmul_res_operr: 14254 bra.l res_operr 14255fsglmul_res_snan: 14256 bra.l res_snan 14257fsglmul_res_qnan: 14258 bra.l res_qnan 14259fsglmul_zero: 14260 bra.l fmul_zero 14261fsglmul_inf_src: 14262 bra.l fmul_inf_src 14263fsglmul_inf_dst: 14264 bra.l fmul_inf_dst 14265 14266######################################################################### 14267# XDEF **************************************************************** # 14268# fsgldiv(): emulates the fsgldiv instruction # 14269# # 14270# XREF **************************************************************** # 14271# scale_to_zero_src() - scale src exponent to zero # 14272# scale_to_zero_dst() - scale dst exponent to zero # 14273# unf_res4() - return default underflow result for sglop # 14274# ovf_res() - return default overflow result # 14275# res_qnan() - return QNAN result # 14276# res_snan() - return SNAN result # 14277# # 14278# INPUT *************************************************************** # 14279# a0 = pointer to extended precision source operand # 14280# a1 = pointer to extended precision destination operand # 14281# d0 rnd prec,mode # 14282# # 14283# OUTPUT ************************************************************** # 14284# fp0 = result # 14285# fp1 = EXOP (if exception occurred) # 14286# # 14287# ALGORITHM *********************************************************** # 14288# Handle NANs, infinities, and zeroes as special cases. Divide # 14289# norms/denorms into ext/sgl/dbl precision. # 14290# For norms/denorms, scale the exponents such that a divide # 14291# instruction won't cause an exception. Use the regular fsgldiv to # 14292# compute a result. Check if the regular operands would have taken # 14293# an exception. If so, return the default overflow/underflow result # 14294# and return the EXOP if exceptions are enabled. Else, scale the # 14295# result operand to the proper exponent. # 14296# # 14297######################################################################### 14298 14299 global fsgldiv 14300fsgldiv: 14301 mov.l %d0,L_SCR3(%a6) # store rnd info 14302 14303 clr.w %d1 14304 mov.b DTAG(%a6),%d1 14305 lsl.b &0x3,%d1 14306 or.b STAG(%a6),%d1 # combine src tags 14307 14308 bne.w fsgldiv_not_norm # optimize on non-norm input 14309 14310# 14311# DIVIDE: NORMs and DENORMs ONLY! 14312# 14313fsgldiv_norm: 14314 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 14315 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 14316 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 14317 14318 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 14319 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 14320 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 14321 14322 bsr.l scale_to_zero_src # calculate scale factor 1 14323 mov.l %d0,-(%sp) # save scale factor 1 14324 14325 bsr.l scale_to_zero_dst # calculate scale factor 2 14326 14327 neg.l (%sp) # S.F. = scale1 - scale2 14328 add.l %d0,(%sp) 14329 14330 mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode 14331 lsr.b &0x6,%d1 14332 mov.l (%sp)+,%d0 14333 cmpi.l %d0,&0x3fff-0x7ffe 14334 ble.w fsgldiv_may_ovfl 14335 14336 cmpi.l %d0,&0x3fff-0x0000 # will result underflow? 14337 beq.w fsgldiv_may_unfl # maybe 14338 bgt.w fsgldiv_unfl # yes; go handle underflow 14339 14340fsgldiv_normal: 14341 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14342 14343 fmov.l L_SCR3(%a6),%fpcr # save FPCR 14344 fmov.l &0x0,%fpsr # clear FPSR 14345 14346 fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide 14347 14348 fmov.l %fpsr,%d1 # save FPSR 14349 fmov.l &0x0,%fpcr # clear FPCR 14350 14351 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14352 14353fsgldiv_normal_exit: 14354 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack 14355 mov.l %d2,-(%sp) # save d2 14356 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 14357 mov.l %d1,%d2 # make a copy 14358 andi.l &0x7fff,%d1 # strip sign 14359 andi.w &0x8000,%d2 # keep old sign 14360 sub.l %d0,%d1 # add scale factor 14361 or.w %d2,%d1 # concat old sign,new exp 14362 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 14363 mov.l (%sp)+,%d2 # restore d2 14364 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 14365 rts 14366 14367fsgldiv_may_ovfl: 14368 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14369 14370 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14371 fmov.l &0x0,%fpsr # set FPSR 14372 14373 fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide 14374 14375 fmov.l %fpsr,%d1 14376 fmov.l &0x0,%fpcr 14377 14378 or.l %d1,USER_FPSR(%a6) # save INEX,N 14379 14380 fmovm.x &0x01,-(%sp) # save result to stack 14381 mov.w (%sp),%d1 # fetch new exponent 14382 add.l &0xc,%sp # clear result 14383 andi.l &0x7fff,%d1 # strip sign 14384 sub.l %d0,%d1 # add scale factor 14385 cmp.l %d1,&0x7fff # did divide overflow? 14386 blt.b fsgldiv_normal_exit 14387 14388fsgldiv_ovfl_tst: 14389 or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 14390 14391 mov.b FPCR_ENABLE(%a6),%d1 14392 andi.b &0x13,%d1 # is OVFL or INEX enabled? 14393 bne.b fsgldiv_ovfl_ena # yes 14394 14395fsgldiv_ovfl_dis: 14396 btst &neg_bit,FPSR_CC(%a6) # is result negative 14397 sne %d1 # set sign param accordingly 14398 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 14399 andi.b &0x30,%d0 # kill precision 14400 bsr.l ovf_res # calculate default result 14401 or.b %d0,FPSR_CC(%a6) # set INF if applicable 14402 fmovm.x (%a0),&0x80 # return default result in fp0 14403 rts 14404 14405fsgldiv_ovfl_ena: 14406 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 14407 14408 mov.l %d2,-(%sp) # save d2 14409 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 14410 mov.l %d1,%d2 # make a copy 14411 andi.l &0x7fff,%d1 # strip sign 14412 andi.w &0x8000,%d2 # keep old sign 14413 sub.l %d0,%d1 # add scale factor 14414 subi.l &0x6000,%d1 # subtract new bias 14415 andi.w &0x7fff,%d1 # clear ms bit 14416 or.w %d2,%d1 # concat old sign,new exp 14417 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 14418 mov.l (%sp)+,%d2 # restore d2 14419 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 14420 bra.b fsgldiv_ovfl_dis 14421 14422fsgldiv_unfl: 14423 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 14424 14425 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14426 14427 fmov.l &rz_mode*0x10,%fpcr # set FPCR 14428 fmov.l &0x0,%fpsr # clear FPSR 14429 14430 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide 14431 14432 fmov.l %fpsr,%d1 # save status 14433 fmov.l &0x0,%fpcr # clear FPCR 14434 14435 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14436 14437 mov.b FPCR_ENABLE(%a6),%d1 14438 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 14439 bne.b fsgldiv_unfl_ena # yes 14440 14441fsgldiv_unfl_dis: 14442 fmovm.x &0x80,FP_SCR0(%a6) # store out result 14443 14444 lea FP_SCR0(%a6),%a0 # pass: result addr 14445 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 14446 bsr.l unf_res4 # calculate default result 14447 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 14448 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 14449 rts 14450 14451# 14452# UNFL is enabled. 14453# 14454fsgldiv_unfl_ena: 14455 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 14456 14457 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14458 fmov.l &0x0,%fpsr # clear FPSR 14459 14460 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide 14461 14462 fmov.l &0x0,%fpcr # clear FPCR 14463 14464 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 14465 mov.l %d2,-(%sp) # save d2 14466 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 14467 mov.l %d1,%d2 # make a copy 14468 andi.l &0x7fff,%d1 # strip sign 14469 andi.w &0x8000,%d2 # keep old sign 14470 sub.l %d0,%d1 # add scale factor 14471 addi.l &0x6000,%d1 # add bias 14472 andi.w &0x7fff,%d1 # clear top bit 14473 or.w %d2,%d1 # concat old sign, new exp 14474 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 14475 mov.l (%sp)+,%d2 # restore d2 14476 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 14477 bra.b fsgldiv_unfl_dis 14478 14479# 14480# the divide operation MAY underflow: 14481# 14482fsgldiv_may_unfl: 14483 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14484 14485 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14486 fmov.l &0x0,%fpsr # clear FPSR 14487 14488 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide 14489 14490 fmov.l %fpsr,%d1 # save status 14491 fmov.l &0x0,%fpcr # clear FPCR 14492 14493 or.l %d1,USER_FPSR(%a6) # save INEX2,N 14494 14495 fabs.x %fp0,%fp1 # make a copy of result 14496 fcmp.b %fp1,&0x1 # is |result| > 1.b? 14497 fbgt.w fsgldiv_normal_exit # no; no underflow occurred 14498 fblt.w fsgldiv_unfl # yes; underflow occurred 14499 14500# 14501# we still don't know if underflow occurred. result is ~ equal to 1. but, 14502# we don't know if the result was an underflow that rounded up to a 1 14503# or a normalized number that rounded down to a 1. so, redo the entire 14504# operation using RZ as the rounding mode to see what the pre-rounded 14505# result is. this case should be relatively rare. 14506# 14507 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1 14508 14509 clr.l %d1 # clear scratch register 14510 ori.b &rz_mode*0x10,%d1 # force RZ rnd mode 14511 14512 fmov.l %d1,%fpcr # set FPCR 14513 fmov.l &0x0,%fpsr # clear FPSR 14514 14515 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide 14516 14517 fmov.l &0x0,%fpcr # clear FPCR 14518 fabs.x %fp1 # make absolute value 14519 fcmp.b %fp1,&0x1 # is |result| < 1.b? 14520 fbge.w fsgldiv_normal_exit # no; no underflow occurred 14521 bra.w fsgldiv_unfl # yes; underflow occurred 14522 14523############################################################################ 14524 14525# 14526# Divide: inputs are not both normalized; what are they? 14527# 14528fsgldiv_not_norm: 14529 mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1 14530 jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1) 14531 14532 swbeg &48 14533tbl_fsgldiv_op: 14534 short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM 14535 short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO 14536 short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF 14537 short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN 14538 short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM 14539 short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN 14540 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14541 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14542 14543 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM 14544 short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO 14545 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF 14546 short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN 14547 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM 14548 short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN 14549 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14550 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14551 14552 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM 14553 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO 14554 short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF 14555 short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN 14556 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM 14557 short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN 14558 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14559 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14560 14561 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM 14562 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO 14563 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF 14564 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN 14565 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM 14566 short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN 14567 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14568 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14569 14570 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM 14571 short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO 14572 short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF 14573 short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN 14574 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM 14575 short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN 14576 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14577 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14578 14579 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM 14580 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO 14581 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF 14582 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN 14583 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM 14584 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN 14585 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14586 short tbl_fsgldiv_op - tbl_fsgldiv_op # 14587 14588fsgldiv_res_qnan: 14589 bra.l res_qnan 14590fsgldiv_res_snan: 14591 bra.l res_snan 14592fsgldiv_res_operr: 14593 bra.l res_operr 14594fsgldiv_inf_load: 14595 bra.l fdiv_inf_load 14596fsgldiv_zero_load: 14597 bra.l fdiv_zero_load 14598fsgldiv_inf_dst: 14599 bra.l fdiv_inf_dst 14600 14601######################################################################### 14602# XDEF **************************************************************** # 14603# fadd(): emulates the fadd instruction # 14604# fsadd(): emulates the fadd instruction # 14605# fdadd(): emulates the fdadd instruction # 14606# # 14607# XREF **************************************************************** # 14608# addsub_scaler2() - scale the operands so they won't take exc # 14609# ovf_res() - return default overflow result # 14610# unf_res() - return default underflow result # 14611# res_qnan() - set QNAN result # 14612# res_snan() - set SNAN result # 14613# res_operr() - set OPERR result # 14614# scale_to_zero_src() - set src operand exponent equal to zero # 14615# scale_to_zero_dst() - set dst operand exponent equal to zero # 14616# # 14617# INPUT *************************************************************** # 14618# a0 = pointer to extended precision source operand # 14619# a1 = pointer to extended precision destination operand # 14620# # 14621# OUTPUT ************************************************************** # 14622# fp0 = result # 14623# fp1 = EXOP (if exception occurred) # 14624# # 14625# ALGORITHM *********************************************************** # 14626# Handle NANs, infinities, and zeroes as special cases. Divide # 14627# norms into extended, single, and double precision. # 14628# Do addition after scaling exponents such that exception won't # 14629# occur. Then, check result exponent to see if exception would have # 14630# occurred. If so, return default result and maybe EXOP. Else, insert # 14631# the correct result exponent and return. Set FPSR bits as appropriate. # 14632# # 14633######################################################################### 14634 14635 global fsadd 14636fsadd: 14637 andi.b &0x30,%d0 # clear rnd prec 14638 ori.b &s_mode*0x10,%d0 # insert sgl prec 14639 bra.b fadd 14640 14641 global fdadd 14642fdadd: 14643 andi.b &0x30,%d0 # clear rnd prec 14644 ori.b &d_mode*0x10,%d0 # insert dbl prec 14645 14646 global fadd 14647fadd: 14648 mov.l %d0,L_SCR3(%a6) # store rnd info 14649 14650 clr.w %d1 14651 mov.b DTAG(%a6),%d1 14652 lsl.b &0x3,%d1 14653 or.b STAG(%a6),%d1 # combine src tags 14654 14655 bne.w fadd_not_norm # optimize on non-norm input 14656 14657# 14658# ADD: norms and denorms 14659# 14660fadd_norm: 14661 bsr.l addsub_scaler2 # scale exponents 14662 14663fadd_zero_entry: 14664 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14665 14666 fmov.l &0x0,%fpsr # clear FPSR 14667 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14668 14669 fadd.x FP_SCR0(%a6),%fp0 # execute add 14670 14671 fmov.l &0x0,%fpcr # clear FPCR 14672 fmov.l %fpsr,%d1 # fetch INEX2,N,Z 14673 14674 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits 14675 14676 fbeq.w fadd_zero_exit # if result is zero, end now 14677 14678 mov.l %d2,-(%sp) # save d2 14679 14680 fmovm.x &0x01,-(%sp) # save result to stack 14681 14682 mov.w 2+L_SCR3(%a6),%d1 14683 lsr.b &0x6,%d1 14684 14685 mov.w (%sp),%d2 # fetch new sign, exp 14686 andi.l &0x7fff,%d2 # strip sign 14687 sub.l %d0,%d2 # add scale factor 14688 14689 cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow? 14690 bge.b fadd_ovfl # yes 14691 14692 cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow? 14693 blt.w fadd_unfl # yes 14694 beq.w fadd_may_unfl # maybe; go find out 14695 14696fadd_normal: 14697 mov.w (%sp),%d1 14698 andi.w &0x8000,%d1 # keep sign 14699 or.w %d2,%d1 # concat sign,new exp 14700 mov.w %d1,(%sp) # insert new exponent 14701 14702 fmovm.x (%sp)+,&0x80 # return result in fp0 14703 14704 mov.l (%sp)+,%d2 # restore d2 14705 rts 14706 14707fadd_zero_exit: 14708# fmov.s &0x00000000,%fp0 # return zero in fp0 14709 rts 14710 14711tbl_fadd_ovfl: 14712 long 0x7fff # ext ovfl 14713 long 0x407f # sgl ovfl 14714 long 0x43ff # dbl ovfl 14715 14716tbl_fadd_unfl: 14717 long 0x0000 # ext unfl 14718 long 0x3f81 # sgl unfl 14719 long 0x3c01 # dbl unfl 14720 14721fadd_ovfl: 14722 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 14723 14724 mov.b FPCR_ENABLE(%a6),%d1 14725 andi.b &0x13,%d1 # is OVFL or INEX enabled? 14726 bne.b fadd_ovfl_ena # yes 14727 14728 add.l &0xc,%sp 14729fadd_ovfl_dis: 14730 btst &neg_bit,FPSR_CC(%a6) # is result negative? 14731 sne %d1 # set sign param accordingly 14732 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 14733 bsr.l ovf_res # calculate default result 14734 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 14735 fmovm.x (%a0),&0x80 # return default result in fp0 14736 mov.l (%sp)+,%d2 # restore d2 14737 rts 14738 14739fadd_ovfl_ena: 14740 mov.b L_SCR3(%a6),%d1 14741 andi.b &0xc0,%d1 # is precision extended? 14742 bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl 14743 14744fadd_ovfl_ena_cont: 14745 mov.w (%sp),%d1 14746 andi.w &0x8000,%d1 # keep sign 14747 subi.l &0x6000,%d2 # add extra bias 14748 andi.w &0x7fff,%d2 14749 or.w %d2,%d1 # concat sign,new exp 14750 mov.w %d1,(%sp) # insert new exponent 14751 14752 fmovm.x (%sp)+,&0x40 # return EXOP in fp1 14753 bra.b fadd_ovfl_dis 14754 14755fadd_ovfl_ena_sd: 14756 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14757 14758 mov.l L_SCR3(%a6),%d1 14759 andi.b &0x30,%d1 # keep rnd mode 14760 fmov.l %d1,%fpcr # set FPCR 14761 14762 fadd.x FP_SCR0(%a6),%fp0 # execute add 14763 14764 fmov.l &0x0,%fpcr # clear FPCR 14765 14766 add.l &0xc,%sp 14767 fmovm.x &0x01,-(%sp) 14768 bra.b fadd_ovfl_ena_cont 14769 14770fadd_unfl: 14771 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 14772 14773 add.l &0xc,%sp 14774 14775 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 14776 14777 fmov.l &rz_mode*0x10,%fpcr # set FPCR 14778 fmov.l &0x0,%fpsr # clear FPSR 14779 14780 fadd.x FP_SCR0(%a6),%fp0 # execute add 14781 14782 fmov.l &0x0,%fpcr # clear FPCR 14783 fmov.l %fpsr,%d1 # save status 14784 14785 or.l %d1,USER_FPSR(%a6) # save INEX,N 14786 14787 mov.b FPCR_ENABLE(%a6),%d1 14788 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 14789 bne.b fadd_unfl_ena # yes 14790 14791fadd_unfl_dis: 14792 fmovm.x &0x80,FP_SCR0(%a6) # store out result 14793 14794 lea FP_SCR0(%a6),%a0 # pass: result addr 14795 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 14796 bsr.l unf_res # calculate default result 14797 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 14798 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 14799 mov.l (%sp)+,%d2 # restore d2 14800 rts 14801 14802fadd_unfl_ena: 14803 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 14804 14805 mov.l L_SCR3(%a6),%d1 14806 andi.b &0xc0,%d1 # is precision extended? 14807 bne.b fadd_unfl_ena_sd # no; sgl or dbl 14808 14809 fmov.l L_SCR3(%a6),%fpcr # set FPCR 14810 14811fadd_unfl_ena_cont: 14812 fmov.l &0x0,%fpsr # clear FPSR 14813 14814 fadd.x FP_SCR0(%a6),%fp1 # execute multiply 14815 14816 fmov.l &0x0,%fpcr # clear FPCR 14817 14818 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 14819 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 14820 mov.l %d1,%d2 # make a copy 14821 andi.l &0x7fff,%d1 # strip sign 14822 andi.w &0x8000,%d2 # keep old sign 14823 sub.l %d0,%d1 # add scale factor 14824 addi.l &0x6000,%d1 # add new bias 14825 andi.w &0x7fff,%d1 # clear top bit 14826 or.w %d2,%d1 # concat sign,new exp 14827 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 14828 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 14829 bra.w fadd_unfl_dis 14830 14831fadd_unfl_ena_sd: 14832 mov.l L_SCR3(%a6),%d1 14833 andi.b &0x30,%d1 # use only rnd mode 14834 fmov.l %d1,%fpcr # set FPCR 14835 14836 bra.b fadd_unfl_ena_cont 14837 14838# 14839# result is equal to the smallest normalized number in the selected precision 14840# if the precision is extended, this result could not have come from an 14841# underflow that rounded up. 14842# 14843fadd_may_unfl: 14844 mov.l L_SCR3(%a6),%d1 14845 andi.b &0xc0,%d1 14846 beq.w fadd_normal # yes; no underflow occurred 14847 14848 mov.l 0x4(%sp),%d1 # extract hi(man) 14849 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000? 14850 bne.w fadd_normal # no; no underflow occurred 14851 14852 tst.l 0x8(%sp) # is lo(man) = 0x0? 14853 bne.w fadd_normal # no; no underflow occurred 14854 14855 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 14856 beq.w fadd_normal # no; no underflow occurred 14857 14858# 14859# ok, so now the result has a exponent equal to the smallest normalized 14860# exponent for the selected precision. also, the mantissa is equal to 14861# 0x8000000000000000 and this mantissa is the result of rounding non-zero 14862# g,r,s. 14863# now, we must determine whether the pre-rounded result was an underflow 14864# rounded "up" or a normalized number rounded "down". 14865# so, we do this be re-executing the add using RZ as the rounding mode and 14866# seeing if the new result is smaller or equal to the current result. 14867# 14868 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 14869 14870 mov.l L_SCR3(%a6),%d1 14871 andi.b &0xc0,%d1 # keep rnd prec 14872 ori.b &rz_mode*0x10,%d1 # insert rnd mode 14873 fmov.l %d1,%fpcr # set FPCR 14874 fmov.l &0x0,%fpsr # clear FPSR 14875 14876 fadd.x FP_SCR0(%a6),%fp1 # execute add 14877 14878 fmov.l &0x0,%fpcr # clear FPCR 14879 14880 fabs.x %fp0 # compare absolute values 14881 fabs.x %fp1 14882 fcmp.x %fp0,%fp1 # is first result > second? 14883 14884 fbgt.w fadd_unfl # yes; it's an underflow 14885 bra.w fadd_normal # no; it's not an underflow 14886 14887########################################################################## 14888 14889# 14890# Add: inputs are not both normalized; what are they? 14891# 14892fadd_not_norm: 14893 mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1 14894 jmp (tbl_fadd_op.b,%pc,%d1.w*1) 14895 14896 swbeg &48 14897tbl_fadd_op: 14898 short fadd_norm - tbl_fadd_op # NORM + NORM 14899 short fadd_zero_src - tbl_fadd_op # NORM + ZERO 14900 short fadd_inf_src - tbl_fadd_op # NORM + INF 14901 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 14902 short fadd_norm - tbl_fadd_op # NORM + DENORM 14903 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 14904 short tbl_fadd_op - tbl_fadd_op # 14905 short tbl_fadd_op - tbl_fadd_op # 14906 14907 short fadd_zero_dst - tbl_fadd_op # ZERO + NORM 14908 short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO 14909 short fadd_inf_src - tbl_fadd_op # ZERO + INF 14910 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 14911 short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM 14912 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 14913 short tbl_fadd_op - tbl_fadd_op # 14914 short tbl_fadd_op - tbl_fadd_op # 14915 14916 short fadd_inf_dst - tbl_fadd_op # INF + NORM 14917 short fadd_inf_dst - tbl_fadd_op # INF + ZERO 14918 short fadd_inf_2 - tbl_fadd_op # INF + INF 14919 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 14920 short fadd_inf_dst - tbl_fadd_op # INF + DENORM 14921 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 14922 short tbl_fadd_op - tbl_fadd_op # 14923 short tbl_fadd_op - tbl_fadd_op # 14924 14925 short fadd_res_qnan - tbl_fadd_op # QNAN + NORM 14926 short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO 14927 short fadd_res_qnan - tbl_fadd_op # QNAN + INF 14928 short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN 14929 short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM 14930 short fadd_res_snan - tbl_fadd_op # QNAN + SNAN 14931 short tbl_fadd_op - tbl_fadd_op # 14932 short tbl_fadd_op - tbl_fadd_op # 14933 14934 short fadd_norm - tbl_fadd_op # DENORM + NORM 14935 short fadd_zero_src - tbl_fadd_op # DENORM + ZERO 14936 short fadd_inf_src - tbl_fadd_op # DENORM + INF 14937 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 14938 short fadd_norm - tbl_fadd_op # DENORM + DENORM 14939 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 14940 short tbl_fadd_op - tbl_fadd_op # 14941 short tbl_fadd_op - tbl_fadd_op # 14942 14943 short fadd_res_snan - tbl_fadd_op # SNAN + NORM 14944 short fadd_res_snan - tbl_fadd_op # SNAN + ZERO 14945 short fadd_res_snan - tbl_fadd_op # SNAN + INF 14946 short fadd_res_snan - tbl_fadd_op # SNAN + QNAN 14947 short fadd_res_snan - tbl_fadd_op # SNAN + DENORM 14948 short fadd_res_snan - tbl_fadd_op # SNAN + SNAN 14949 short tbl_fadd_op - tbl_fadd_op # 14950 short tbl_fadd_op - tbl_fadd_op # 14951 14952fadd_res_qnan: 14953 bra.l res_qnan 14954fadd_res_snan: 14955 bra.l res_snan 14956 14957# 14958# both operands are ZEROes 14959# 14960fadd_zero_2: 14961 mov.b SRC_EX(%a0),%d0 # are the signs opposite 14962 mov.b DST_EX(%a1),%d1 14963 eor.b %d0,%d1 14964 bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO) 14965 14966# the signs are the same. so determine whether they are positive or negative 14967# and return the appropriately signed zero. 14968 tst.b %d0 # are ZEROes positive or negative? 14969 bmi.b fadd_zero_rm # negative 14970 fmov.s &0x00000000,%fp0 # return +ZERO 14971 mov.b &z_bmask,FPSR_CC(%a6) # set Z 14972 rts 14973 14974# 14975# the ZEROes have opposite signs: 14976# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP. 14977# - -ZERO is returned in the case of RM. 14978# 14979fadd_zero_2_chk_rm: 14980 mov.b 3+L_SCR3(%a6),%d1 14981 andi.b &0x30,%d1 # extract rnd mode 14982 cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM? 14983 beq.b fadd_zero_rm # yes 14984 fmov.s &0x00000000,%fp0 # return +ZERO 14985 mov.b &z_bmask,FPSR_CC(%a6) # set Z 14986 rts 14987 14988fadd_zero_rm: 14989 fmov.s &0x80000000,%fp0 # return -ZERO 14990 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z 14991 rts 14992 14993# 14994# one operand is a ZERO and the other is a DENORM or NORM. scale 14995# the DENORM or NORM and jump to the regular fadd routine. 14996# 14997fadd_zero_dst: 14998 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 14999 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 15000 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 15001 bsr.l scale_to_zero_src # scale the operand 15002 clr.w FP_SCR1_EX(%a6) 15003 clr.l FP_SCR1_HI(%a6) 15004 clr.l FP_SCR1_LO(%a6) 15005 bra.w fadd_zero_entry # go execute fadd 15006 15007fadd_zero_src: 15008 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 15009 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 15010 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 15011 bsr.l scale_to_zero_dst # scale the operand 15012 clr.w FP_SCR0_EX(%a6) 15013 clr.l FP_SCR0_HI(%a6) 15014 clr.l FP_SCR0_LO(%a6) 15015 bra.w fadd_zero_entry # go execute fadd 15016 15017# 15018# both operands are INFs. an OPERR will result if the INFs have 15019# different signs. else, an INF of the same sign is returned 15020# 15021fadd_inf_2: 15022 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 15023 mov.b DST_EX(%a1),%d1 15024 eor.b %d1,%d0 15025 bmi.l res_operr # weed out (-INF)+(+INF) 15026 15027# ok, so it's not an OPERR. but, we do have to remember to return the 15028# src INF since that's where the 881/882 gets the j-bit from... 15029 15030# 15031# operands are INF and one of {ZERO, INF, DENORM, NORM} 15032# 15033fadd_inf_src: 15034 fmovm.x SRC(%a0),&0x80 # return src INF 15035 tst.b SRC_EX(%a0) # is INF positive? 15036 bpl.b fadd_inf_done # yes; we're done 15037 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 15038 rts 15039 15040# 15041# operands are INF and one of {ZERO, INF, DENORM, NORM} 15042# 15043fadd_inf_dst: 15044 fmovm.x DST(%a1),&0x80 # return dst INF 15045 tst.b DST_EX(%a1) # is INF positive? 15046 bpl.b fadd_inf_done # yes; we're done 15047 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 15048 rts 15049 15050fadd_inf_done: 15051 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 15052 rts 15053 15054######################################################################### 15055# XDEF **************************************************************** # 15056# fsub(): emulates the fsub instruction # 15057# fssub(): emulates the fssub instruction # 15058# fdsub(): emulates the fdsub instruction # 15059# # 15060# XREF **************************************************************** # 15061# addsub_scaler2() - scale the operands so they won't take exc # 15062# ovf_res() - return default overflow result # 15063# unf_res() - return default underflow result # 15064# res_qnan() - set QNAN result # 15065# res_snan() - set SNAN result # 15066# res_operr() - set OPERR result # 15067# scale_to_zero_src() - set src operand exponent equal to zero # 15068# scale_to_zero_dst() - set dst operand exponent equal to zero # 15069# # 15070# INPUT *************************************************************** # 15071# a0 = pointer to extended precision source operand # 15072# a1 = pointer to extended precision destination operand # 15073# # 15074# OUTPUT ************************************************************** # 15075# fp0 = result # 15076# fp1 = EXOP (if exception occurred) # 15077# # 15078# ALGORITHM *********************************************************** # 15079# Handle NANs, infinities, and zeroes as special cases. Divide # 15080# norms into extended, single, and double precision. # 15081# Do subtraction after scaling exponents such that exception won't# 15082# occur. Then, check result exponent to see if exception would have # 15083# occurred. If so, return default result and maybe EXOP. Else, insert # 15084# the correct result exponent and return. Set FPSR bits as appropriate. # 15085# # 15086######################################################################### 15087 15088 global fssub 15089fssub: 15090 andi.b &0x30,%d0 # clear rnd prec 15091 ori.b &s_mode*0x10,%d0 # insert sgl prec 15092 bra.b fsub 15093 15094 global fdsub 15095fdsub: 15096 andi.b &0x30,%d0 # clear rnd prec 15097 ori.b &d_mode*0x10,%d0 # insert dbl prec 15098 15099 global fsub 15100fsub: 15101 mov.l %d0,L_SCR3(%a6) # store rnd info 15102 15103 clr.w %d1 15104 mov.b DTAG(%a6),%d1 15105 lsl.b &0x3,%d1 15106 or.b STAG(%a6),%d1 # combine src tags 15107 15108 bne.w fsub_not_norm # optimize on non-norm input 15109 15110# 15111# SUB: norms and denorms 15112# 15113fsub_norm: 15114 bsr.l addsub_scaler2 # scale exponents 15115 15116fsub_zero_entry: 15117 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 15118 15119 fmov.l &0x0,%fpsr # clear FPSR 15120 fmov.l L_SCR3(%a6),%fpcr # set FPCR 15121 15122 fsub.x FP_SCR0(%a6),%fp0 # execute subtract 15123 15124 fmov.l &0x0,%fpcr # clear FPCR 15125 fmov.l %fpsr,%d1 # fetch INEX2, N, Z 15126 15127 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits 15128 15129 fbeq.w fsub_zero_exit # if result zero, end now 15130 15131 mov.l %d2,-(%sp) # save d2 15132 15133 fmovm.x &0x01,-(%sp) # save result to stack 15134 15135 mov.w 2+L_SCR3(%a6),%d1 15136 lsr.b &0x6,%d1 15137 15138 mov.w (%sp),%d2 # fetch new exponent 15139 andi.l &0x7fff,%d2 # strip sign 15140 sub.l %d0,%d2 # add scale factor 15141 15142 cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow? 15143 bge.b fsub_ovfl # yes 15144 15145 cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow? 15146 blt.w fsub_unfl # yes 15147 beq.w fsub_may_unfl # maybe; go find out 15148 15149fsub_normal: 15150 mov.w (%sp),%d1 15151 andi.w &0x8000,%d1 # keep sign 15152 or.w %d2,%d1 # insert new exponent 15153 mov.w %d1,(%sp) # insert new exponent 15154 15155 fmovm.x (%sp)+,&0x80 # return result in fp0 15156 15157 mov.l (%sp)+,%d2 # restore d2 15158 rts 15159 15160fsub_zero_exit: 15161# fmov.s &0x00000000,%fp0 # return zero in fp0 15162 rts 15163 15164tbl_fsub_ovfl: 15165 long 0x7fff # ext ovfl 15166 long 0x407f # sgl ovfl 15167 long 0x43ff # dbl ovfl 15168 15169tbl_fsub_unfl: 15170 long 0x0000 # ext unfl 15171 long 0x3f81 # sgl unfl 15172 long 0x3c01 # dbl unfl 15173 15174fsub_ovfl: 15175 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 15176 15177 mov.b FPCR_ENABLE(%a6),%d1 15178 andi.b &0x13,%d1 # is OVFL or INEX enabled? 15179 bne.b fsub_ovfl_ena # yes 15180 15181 add.l &0xc,%sp 15182fsub_ovfl_dis: 15183 btst &neg_bit,FPSR_CC(%a6) # is result negative? 15184 sne %d1 # set sign param accordingly 15185 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 15186 bsr.l ovf_res # calculate default result 15187 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 15188 fmovm.x (%a0),&0x80 # return default result in fp0 15189 mov.l (%sp)+,%d2 # restore d2 15190 rts 15191 15192fsub_ovfl_ena: 15193 mov.b L_SCR3(%a6),%d1 15194 andi.b &0xc0,%d1 # is precision extended? 15195 bne.b fsub_ovfl_ena_sd # no 15196 15197fsub_ovfl_ena_cont: 15198 mov.w (%sp),%d1 # fetch {sgn,exp} 15199 andi.w &0x8000,%d1 # keep sign 15200 subi.l &0x6000,%d2 # subtract new bias 15201 andi.w &0x7fff,%d2 # clear top bit 15202 or.w %d2,%d1 # concat sign,exp 15203 mov.w %d1,(%sp) # insert new exponent 15204 15205 fmovm.x (%sp)+,&0x40 # return EXOP in fp1 15206 bra.b fsub_ovfl_dis 15207 15208fsub_ovfl_ena_sd: 15209 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 15210 15211 mov.l L_SCR3(%a6),%d1 15212 andi.b &0x30,%d1 # clear rnd prec 15213 fmov.l %d1,%fpcr # set FPCR 15214 15215 fsub.x FP_SCR0(%a6),%fp0 # execute subtract 15216 15217 fmov.l &0x0,%fpcr # clear FPCR 15218 15219 add.l &0xc,%sp 15220 fmovm.x &0x01,-(%sp) 15221 bra.b fsub_ovfl_ena_cont 15222 15223fsub_unfl: 15224 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 15225 15226 add.l &0xc,%sp 15227 15228 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 15229 15230 fmov.l &rz_mode*0x10,%fpcr # set FPCR 15231 fmov.l &0x0,%fpsr # clear FPSR 15232 15233 fsub.x FP_SCR0(%a6),%fp0 # execute subtract 15234 15235 fmov.l &0x0,%fpcr # clear FPCR 15236 fmov.l %fpsr,%d1 # save status 15237 15238 or.l %d1,USER_FPSR(%a6) 15239 15240 mov.b FPCR_ENABLE(%a6),%d1 15241 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 15242 bne.b fsub_unfl_ena # yes 15243 15244fsub_unfl_dis: 15245 fmovm.x &0x80,FP_SCR0(%a6) # store out result 15246 15247 lea FP_SCR0(%a6),%a0 # pass: result addr 15248 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 15249 bsr.l unf_res # calculate default result 15250 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set 15251 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 15252 mov.l (%sp)+,%d2 # restore d2 15253 rts 15254 15255fsub_unfl_ena: 15256 fmovm.x FP_SCR1(%a6),&0x40 15257 15258 mov.l L_SCR3(%a6),%d1 15259 andi.b &0xc0,%d1 # is precision extended? 15260 bne.b fsub_unfl_ena_sd # no 15261 15262 fmov.l L_SCR3(%a6),%fpcr # set FPCR 15263 15264fsub_unfl_ena_cont: 15265 fmov.l &0x0,%fpsr # clear FPSR 15266 15267 fsub.x FP_SCR0(%a6),%fp1 # execute subtract 15268 15269 fmov.l &0x0,%fpcr # clear FPCR 15270 15271 fmovm.x &0x40,FP_SCR0(%a6) # store result to stack 15272 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 15273 mov.l %d1,%d2 # make a copy 15274 andi.l &0x7fff,%d1 # strip sign 15275 andi.w &0x8000,%d2 # keep old sign 15276 sub.l %d0,%d1 # add scale factor 15277 addi.l &0x6000,%d1 # subtract new bias 15278 andi.w &0x7fff,%d1 # clear top bit 15279 or.w %d2,%d1 # concat sgn,exp 15280 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 15281 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 15282 bra.w fsub_unfl_dis 15283 15284fsub_unfl_ena_sd: 15285 mov.l L_SCR3(%a6),%d1 15286 andi.b &0x30,%d1 # clear rnd prec 15287 fmov.l %d1,%fpcr # set FPCR 15288 15289 bra.b fsub_unfl_ena_cont 15290 15291# 15292# result is equal to the smallest normalized number in the selected precision 15293# if the precision is extended, this result could not have come from an 15294# underflow that rounded up. 15295# 15296fsub_may_unfl: 15297 mov.l L_SCR3(%a6),%d1 15298 andi.b &0xc0,%d1 # fetch rnd prec 15299 beq.w fsub_normal # yes; no underflow occurred 15300 15301 mov.l 0x4(%sp),%d1 15302 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000? 15303 bne.w fsub_normal # no; no underflow occurred 15304 15305 tst.l 0x8(%sp) # is lo(man) = 0x0? 15306 bne.w fsub_normal # no; no underflow occurred 15307 15308 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 15309 beq.w fsub_normal # no; no underflow occurred 15310 15311# 15312# ok, so now the result has a exponent equal to the smallest normalized 15313# exponent for the selected precision. also, the mantissa is equal to 15314# 0x8000000000000000 and this mantissa is the result of rounding non-zero 15315# g,r,s. 15316# now, we must determine whether the pre-rounded result was an underflow 15317# rounded "up" or a normalized number rounded "down". 15318# so, we do this be re-executing the add using RZ as the rounding mode and 15319# seeing if the new result is smaller or equal to the current result. 15320# 15321 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 15322 15323 mov.l L_SCR3(%a6),%d1 15324 andi.b &0xc0,%d1 # keep rnd prec 15325 ori.b &rz_mode*0x10,%d1 # insert rnd mode 15326 fmov.l %d1,%fpcr # set FPCR 15327 fmov.l &0x0,%fpsr # clear FPSR 15328 15329 fsub.x FP_SCR0(%a6),%fp1 # execute subtract 15330 15331 fmov.l &0x0,%fpcr # clear FPCR 15332 15333 fabs.x %fp0 # compare absolute values 15334 fabs.x %fp1 15335 fcmp.x %fp0,%fp1 # is first result > second? 15336 15337 fbgt.w fsub_unfl # yes; it's an underflow 15338 bra.w fsub_normal # no; it's not an underflow 15339 15340########################################################################## 15341 15342# 15343# Sub: inputs are not both normalized; what are they? 15344# 15345fsub_not_norm: 15346 mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1 15347 jmp (tbl_fsub_op.b,%pc,%d1.w*1) 15348 15349 swbeg &48 15350tbl_fsub_op: 15351 short fsub_norm - tbl_fsub_op # NORM - NORM 15352 short fsub_zero_src - tbl_fsub_op # NORM - ZERO 15353 short fsub_inf_src - tbl_fsub_op # NORM - INF 15354 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 15355 short fsub_norm - tbl_fsub_op # NORM - DENORM 15356 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 15357 short tbl_fsub_op - tbl_fsub_op # 15358 short tbl_fsub_op - tbl_fsub_op # 15359 15360 short fsub_zero_dst - tbl_fsub_op # ZERO - NORM 15361 short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO 15362 short fsub_inf_src - tbl_fsub_op # ZERO - INF 15363 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 15364 short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM 15365 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 15366 short tbl_fsub_op - tbl_fsub_op # 15367 short tbl_fsub_op - tbl_fsub_op # 15368 15369 short fsub_inf_dst - tbl_fsub_op # INF - NORM 15370 short fsub_inf_dst - tbl_fsub_op # INF - ZERO 15371 short fsub_inf_2 - tbl_fsub_op # INF - INF 15372 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 15373 short fsub_inf_dst - tbl_fsub_op # INF - DENORM 15374 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 15375 short tbl_fsub_op - tbl_fsub_op # 15376 short tbl_fsub_op - tbl_fsub_op # 15377 15378 short fsub_res_qnan - tbl_fsub_op # QNAN - NORM 15379 short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO 15380 short fsub_res_qnan - tbl_fsub_op # QNAN - INF 15381 short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN 15382 short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM 15383 short fsub_res_snan - tbl_fsub_op # QNAN - SNAN 15384 short tbl_fsub_op - tbl_fsub_op # 15385 short tbl_fsub_op - tbl_fsub_op # 15386 15387 short fsub_norm - tbl_fsub_op # DENORM - NORM 15388 short fsub_zero_src - tbl_fsub_op # DENORM - ZERO 15389 short fsub_inf_src - tbl_fsub_op # DENORM - INF 15390 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 15391 short fsub_norm - tbl_fsub_op # DENORM - DENORM 15392 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 15393 short tbl_fsub_op - tbl_fsub_op # 15394 short tbl_fsub_op - tbl_fsub_op # 15395 15396 short fsub_res_snan - tbl_fsub_op # SNAN - NORM 15397 short fsub_res_snan - tbl_fsub_op # SNAN - ZERO 15398 short fsub_res_snan - tbl_fsub_op # SNAN - INF 15399 short fsub_res_snan - tbl_fsub_op # SNAN - QNAN 15400 short fsub_res_snan - tbl_fsub_op # SNAN - DENORM 15401 short fsub_res_snan - tbl_fsub_op # SNAN - SNAN 15402 short tbl_fsub_op - tbl_fsub_op # 15403 short tbl_fsub_op - tbl_fsub_op # 15404 15405fsub_res_qnan: 15406 bra.l res_qnan 15407fsub_res_snan: 15408 bra.l res_snan 15409 15410# 15411# both operands are ZEROes 15412# 15413fsub_zero_2: 15414 mov.b SRC_EX(%a0),%d0 15415 mov.b DST_EX(%a1),%d1 15416 eor.b %d1,%d0 15417 bpl.b fsub_zero_2_chk_rm 15418 15419# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO 15420 tst.b %d0 # is dst negative? 15421 bmi.b fsub_zero_2_rm # yes 15422 fmov.s &0x00000000,%fp0 # no; return +ZERO 15423 mov.b &z_bmask,FPSR_CC(%a6) # set Z 15424 rts 15425 15426# 15427# the ZEROes have the same signs: 15428# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP 15429# - -ZERO is returned in the case of RM. 15430# 15431fsub_zero_2_chk_rm: 15432 mov.b 3+L_SCR3(%a6),%d1 15433 andi.b &0x30,%d1 # extract rnd mode 15434 cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM? 15435 beq.b fsub_zero_2_rm # yes 15436 fmov.s &0x00000000,%fp0 # no; return +ZERO 15437 mov.b &z_bmask,FPSR_CC(%a6) # set Z 15438 rts 15439 15440fsub_zero_2_rm: 15441 fmov.s &0x80000000,%fp0 # return -ZERO 15442 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG 15443 rts 15444 15445# 15446# one operand is a ZERO and the other is a DENORM or a NORM. 15447# scale the DENORM or NORM and jump to the regular fsub routine. 15448# 15449fsub_zero_dst: 15450 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 15451 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 15452 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 15453 bsr.l scale_to_zero_src # scale the operand 15454 clr.w FP_SCR1_EX(%a6) 15455 clr.l FP_SCR1_HI(%a6) 15456 clr.l FP_SCR1_LO(%a6) 15457 bra.w fsub_zero_entry # go execute fsub 15458 15459fsub_zero_src: 15460 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 15461 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 15462 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 15463 bsr.l scale_to_zero_dst # scale the operand 15464 clr.w FP_SCR0_EX(%a6) 15465 clr.l FP_SCR0_HI(%a6) 15466 clr.l FP_SCR0_LO(%a6) 15467 bra.w fsub_zero_entry # go execute fsub 15468 15469# 15470# both operands are INFs. an OPERR will result if the INFs have the 15471# same signs. else, 15472# 15473fsub_inf_2: 15474 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 15475 mov.b DST_EX(%a1),%d1 15476 eor.b %d1,%d0 15477 bpl.l res_operr # weed out (-INF)+(+INF) 15478 15479# ok, so it's not an OPERR. but we do have to remember to return 15480# the src INF since that's where the 881/882 gets the j-bit. 15481 15482fsub_inf_src: 15483 fmovm.x SRC(%a0),&0x80 # return src INF 15484 fneg.x %fp0 # invert sign 15485 fbge.w fsub_inf_done # sign is now positive 15486 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 15487 rts 15488 15489fsub_inf_dst: 15490 fmovm.x DST(%a1),&0x80 # return dst INF 15491 tst.b DST_EX(%a1) # is INF negative? 15492 bpl.b fsub_inf_done # no 15493 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 15494 rts 15495 15496fsub_inf_done: 15497 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 15498 rts 15499 15500######################################################################### 15501# XDEF **************************************************************** # 15502# fsqrt(): emulates the fsqrt instruction # 15503# fssqrt(): emulates the fssqrt instruction # 15504# fdsqrt(): emulates the fdsqrt instruction # 15505# # 15506# XREF **************************************************************** # 15507# scale_sqrt() - scale the source operand # 15508# unf_res() - return default underflow result # 15509# ovf_res() - return default overflow result # 15510# res_qnan_1op() - return QNAN result # 15511# res_snan_1op() - return SNAN result # 15512# # 15513# INPUT *************************************************************** # 15514# a0 = pointer to extended precision source operand # 15515# d0 rnd prec,mode # 15516# # 15517# OUTPUT ************************************************************** # 15518# fp0 = result # 15519# fp1 = EXOP (if exception occurred) # 15520# # 15521# ALGORITHM *********************************************************** # 15522# Handle NANs, infinities, and zeroes as special cases. Divide # 15523# norms/denorms into ext/sgl/dbl precision. # 15524# For norms/denorms, scale the exponents such that a sqrt # 15525# instruction won't cause an exception. Use the regular fsqrt to # 15526# compute a result. Check if the regular operands would have taken # 15527# an exception. If so, return the default overflow/underflow result # 15528# and return the EXOP if exceptions are enabled. Else, scale the # 15529# result operand to the proper exponent. # 15530# # 15531######################################################################### 15532 15533 global fssqrt 15534fssqrt: 15535 andi.b &0x30,%d0 # clear rnd prec 15536 ori.b &s_mode*0x10,%d0 # insert sgl precision 15537 bra.b fsqrt 15538 15539 global fdsqrt 15540fdsqrt: 15541 andi.b &0x30,%d0 # clear rnd prec 15542 ori.b &d_mode*0x10,%d0 # insert dbl precision 15543 15544 global fsqrt 15545fsqrt: 15546 mov.l %d0,L_SCR3(%a6) # store rnd info 15547 clr.w %d1 15548 mov.b STAG(%a6),%d1 15549 bne.w fsqrt_not_norm # optimize on non-norm input 15550 15551# 15552# SQUARE ROOT: norms and denorms ONLY! 15553# 15554fsqrt_norm: 15555 tst.b SRC_EX(%a0) # is operand negative? 15556 bmi.l res_operr # yes 15557 15558 andi.b &0xc0,%d0 # is precision extended? 15559 bne.b fsqrt_not_ext # no; go handle sgl or dbl 15560 15561 fmov.l L_SCR3(%a6),%fpcr # set FPCR 15562 fmov.l &0x0,%fpsr # clear FPSR 15563 15564 fsqrt.x (%a0),%fp0 # execute square root 15565 15566 fmov.l %fpsr,%d1 15567 or.l %d1,USER_FPSR(%a6) # set N,INEX 15568 15569 rts 15570 15571fsqrt_denorm: 15572 tst.b SRC_EX(%a0) # is operand negative? 15573 bmi.l res_operr # yes 15574 15575 andi.b &0xc0,%d0 # is precision extended? 15576 bne.b fsqrt_not_ext # no; go handle sgl or dbl 15577 15578 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 15579 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 15580 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 15581 15582 bsr.l scale_sqrt # calculate scale factor 15583 15584 bra.w fsqrt_sd_normal 15585 15586# 15587# operand is either single or double 15588# 15589fsqrt_not_ext: 15590 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 15591 bne.w fsqrt_dbl 15592 15593# 15594# operand is to be rounded to single precision 15595# 15596fsqrt_sgl: 15597 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 15598 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 15599 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 15600 15601 bsr.l scale_sqrt # calculate scale factor 15602 15603 cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow? 15604 beq.w fsqrt_sd_may_unfl 15605 bgt.w fsqrt_sd_unfl # yes; go handle underflow 15606 cmpi.l %d0,&0x3fff-0x407f # will move in overflow? 15607 beq.w fsqrt_sd_may_ovfl # maybe; go check 15608 blt.w fsqrt_sd_ovfl # yes; go handle overflow 15609 15610# 15611# operand will NOT overflow or underflow when moved in to the fp reg file 15612# 15613fsqrt_sd_normal: 15614 fmov.l &0x0,%fpsr # clear FPSR 15615 fmov.l L_SCR3(%a6),%fpcr # set FPCR 15616 15617 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute 15618 15619 fmov.l %fpsr,%d1 # save FPSR 15620 fmov.l &0x0,%fpcr # clear FPCR 15621 15622 or.l %d1,USER_FPSR(%a6) # save INEX2,N 15623 15624fsqrt_sd_normal_exit: 15625 mov.l %d2,-(%sp) # save d2 15626 fmovm.x &0x80,FP_SCR0(%a6) # store out result 15627 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 15628 mov.l %d1,%d2 # make a copy 15629 andi.l &0x7fff,%d1 # strip sign 15630 sub.l %d0,%d1 # add scale factor 15631 andi.w &0x8000,%d2 # keep old sign 15632 or.w %d1,%d2 # concat old sign,new exp 15633 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 15634 mov.l (%sp)+,%d2 # restore d2 15635 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 15636 rts 15637 15638# 15639# operand is to be rounded to double precision 15640# 15641fsqrt_dbl: 15642 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 15643 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 15644 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 15645 15646 bsr.l scale_sqrt # calculate scale factor 15647 15648 cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow? 15649 beq.w fsqrt_sd_may_unfl 15650 bgt.b fsqrt_sd_unfl # yes; go handle underflow 15651 cmpi.l %d0,&0x3fff-0x43ff # will move in overflow? 15652 beq.w fsqrt_sd_may_ovfl # maybe; go check 15653 blt.w fsqrt_sd_ovfl # yes; go handle overflow 15654 bra.w fsqrt_sd_normal # no; ho handle normalized op 15655 15656# we're on the line here and the distinguising characteristic is whether 15657# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number 15658# elsewise fall through to underflow. 15659fsqrt_sd_may_unfl: 15660 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff? 15661 bne.w fsqrt_sd_normal # yes, so no underflow 15662 15663# 15664# operand WILL underflow when moved in to the fp register file 15665# 15666fsqrt_sd_unfl: 15667 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 15668 15669 fmov.l &rz_mode*0x10,%fpcr # set FPCR 15670 fmov.l &0x0,%fpsr # clear FPSR 15671 15672 fsqrt.x FP_SCR0(%a6),%fp0 # execute square root 15673 15674 fmov.l %fpsr,%d1 # save status 15675 fmov.l &0x0,%fpcr # clear FPCR 15676 15677 or.l %d1,USER_FPSR(%a6) # save INEX2,N 15678 15679# if underflow or inexact is enabled, go calculate EXOP first. 15680 mov.b FPCR_ENABLE(%a6),%d1 15681 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 15682 bne.b fsqrt_sd_unfl_ena # yes 15683 15684fsqrt_sd_unfl_dis: 15685 fmovm.x &0x80,FP_SCR0(%a6) # store out result 15686 15687 lea FP_SCR0(%a6),%a0 # pass: result addr 15688 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 15689 bsr.l unf_res # calculate default result 15690 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode 15691 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 15692 rts 15693 15694# 15695# operand will underflow AND underflow is enabled. 15696# Therefore, we must return the result rounded to extended precision. 15697# 15698fsqrt_sd_unfl_ena: 15699 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 15700 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 15701 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 15702 15703 mov.l %d2,-(%sp) # save d2 15704 mov.l %d1,%d2 # make a copy 15705 andi.l &0x7fff,%d1 # strip sign 15706 andi.w &0x8000,%d2 # keep old sign 15707 sub.l %d0,%d1 # subtract scale factor 15708 addi.l &0x6000,%d1 # add new bias 15709 andi.w &0x7fff,%d1 15710 or.w %d2,%d1 # concat new sign,new exp 15711 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 15712 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 15713 mov.l (%sp)+,%d2 # restore d2 15714 bra.b fsqrt_sd_unfl_dis 15715 15716# 15717# operand WILL overflow. 15718# 15719fsqrt_sd_ovfl: 15720 fmov.l &0x0,%fpsr # clear FPSR 15721 fmov.l L_SCR3(%a6),%fpcr # set FPCR 15722 15723 fsqrt.x FP_SCR0(%a6),%fp0 # perform square root 15724 15725 fmov.l &0x0,%fpcr # clear FPCR 15726 fmov.l %fpsr,%d1 # save FPSR 15727 15728 or.l %d1,USER_FPSR(%a6) # save INEX2,N 15729 15730fsqrt_sd_ovfl_tst: 15731 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 15732 15733 mov.b FPCR_ENABLE(%a6),%d1 15734 andi.b &0x13,%d1 # is OVFL or INEX enabled? 15735 bne.b fsqrt_sd_ovfl_ena # yes 15736 15737# 15738# OVFL is not enabled; therefore, we must create the default result by 15739# calling ovf_res(). 15740# 15741fsqrt_sd_ovfl_dis: 15742 btst &neg_bit,FPSR_CC(%a6) # is result negative? 15743 sne %d1 # set sign param accordingly 15744 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 15745 bsr.l ovf_res # calculate default result 15746 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 15747 fmovm.x (%a0),&0x80 # return default result in fp0 15748 rts 15749 15750# 15751# OVFL is enabled. 15752# the INEX2 bit has already been updated by the round to the correct precision. 15753# now, round to extended(and don't alter the FPSR). 15754# 15755fsqrt_sd_ovfl_ena: 15756 mov.l %d2,-(%sp) # save d2 15757 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 15758 mov.l %d1,%d2 # make a copy 15759 andi.l &0x7fff,%d1 # strip sign 15760 andi.w &0x8000,%d2 # keep old sign 15761 sub.l %d0,%d1 # add scale factor 15762 subi.l &0x6000,%d1 # subtract bias 15763 andi.w &0x7fff,%d1 15764 or.w %d2,%d1 # concat sign,exp 15765 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 15766 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 15767 mov.l (%sp)+,%d2 # restore d2 15768 bra.b fsqrt_sd_ovfl_dis 15769 15770# 15771# the move in MAY underflow. so... 15772# 15773fsqrt_sd_may_ovfl: 15774 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff? 15775 bne.w fsqrt_sd_ovfl # yes, so overflow 15776 15777 fmov.l &0x0,%fpsr # clear FPSR 15778 fmov.l L_SCR3(%a6),%fpcr # set FPCR 15779 15780 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute 15781 15782 fmov.l %fpsr,%d1 # save status 15783 fmov.l &0x0,%fpcr # clear FPCR 15784 15785 or.l %d1,USER_FPSR(%a6) # save INEX2,N 15786 15787 fmov.x %fp0,%fp1 # make a copy of result 15788 fcmp.b %fp1,&0x1 # is |result| >= 1.b? 15789 fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred 15790 15791# no, it didn't overflow; we have correct result 15792 bra.w fsqrt_sd_normal_exit 15793 15794########################################################################## 15795 15796# 15797# input is not normalized; what is it? 15798# 15799fsqrt_not_norm: 15800 cmpi.b %d1,&DENORM # weed out DENORM 15801 beq.w fsqrt_denorm 15802 cmpi.b %d1,&ZERO # weed out ZERO 15803 beq.b fsqrt_zero 15804 cmpi.b %d1,&INF # weed out INF 15805 beq.b fsqrt_inf 15806 cmpi.b %d1,&SNAN # weed out SNAN 15807 beq.l res_snan_1op 15808 bra.l res_qnan_1op 15809 15810# 15811# fsqrt(+0) = +0 15812# fsqrt(-0) = -0 15813# fsqrt(+INF) = +INF 15814# fsqrt(-INF) = OPERR 15815# 15816fsqrt_zero: 15817 tst.b SRC_EX(%a0) # is ZERO positive or negative? 15818 bmi.b fsqrt_zero_m # negative 15819fsqrt_zero_p: 15820 fmov.s &0x00000000,%fp0 # return +ZERO 15821 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 15822 rts 15823fsqrt_zero_m: 15824 fmov.s &0x80000000,%fp0 # return -ZERO 15825 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 15826 rts 15827 15828fsqrt_inf: 15829 tst.b SRC_EX(%a0) # is INF positive or negative? 15830 bmi.l res_operr # negative 15831fsqrt_inf_p: 15832 fmovm.x SRC(%a0),&0x80 # return +INF in fp0 15833 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 15834 rts 15835 15836########################################################################## 15837 15838######################################################################### 15839# XDEF **************************************************************** # 15840# addsub_scaler2(): scale inputs to fadd/fsub such that no # 15841# OVFL/UNFL exceptions will result # 15842# # 15843# XREF **************************************************************** # 15844# norm() - normalize mantissa after adjusting exponent # 15845# # 15846# INPUT *************************************************************** # 15847# FP_SRC(a6) = fp op1(src) # 15848# FP_DST(a6) = fp op2(dst) # 15849# # 15850# OUTPUT ************************************************************** # 15851# FP_SRC(a6) = fp op1 scaled(src) # 15852# FP_DST(a6) = fp op2 scaled(dst) # 15853# d0 = scale amount # 15854# # 15855# ALGORITHM *********************************************************** # 15856# If the DST exponent is > the SRC exponent, set the DST exponent # 15857# equal to 0x3fff and scale the SRC exponent by the value that the # 15858# DST exponent was scaled by. If the SRC exponent is greater or equal, # 15859# do the opposite. Return this scale factor in d0. # 15860# If the two exponents differ by > the number of mantissa bits # 15861# plus two, then set the smallest exponent to a very small value as a # 15862# quick shortcut. # 15863# # 15864######################################################################### 15865 15866 global addsub_scaler2 15867addsub_scaler2: 15868 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 15869 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 15870 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 15871 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 15872 mov.w SRC_EX(%a0),%d0 15873 mov.w DST_EX(%a1),%d1 15874 mov.w %d0,FP_SCR0_EX(%a6) 15875 mov.w %d1,FP_SCR1_EX(%a6) 15876 15877 andi.w &0x7fff,%d0 15878 andi.w &0x7fff,%d1 15879 mov.w %d0,L_SCR1(%a6) # store src exponent 15880 mov.w %d1,2+L_SCR1(%a6) # store dst exponent 15881 15882 cmp.w %d0, %d1 # is src exp >= dst exp? 15883 bge.l src_exp_ge2 15884 15885# dst exp is > src exp; scale dst to exp = 0x3fff 15886dst_exp_gt2: 15887 bsr.l scale_to_zero_dst 15888 mov.l %d0,-(%sp) # save scale factor 15889 15890 cmpi.b STAG(%a6),&DENORM # is dst denormalized? 15891 bne.b cmpexp12 15892 15893 lea FP_SCR0(%a6),%a0 15894 bsr.l norm # normalize the denorm; result is new exp 15895 neg.w %d0 # new exp = -(shft val) 15896 mov.w %d0,L_SCR1(%a6) # inset new exp 15897 15898cmpexp12: 15899 mov.w 2+L_SCR1(%a6),%d0 15900 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp 15901 15902 cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2? 15903 bge.b quick_scale12 15904 15905 mov.w L_SCR1(%a6),%d0 15906 add.w 0x2(%sp),%d0 # scale src exponent by scale factor 15907 mov.w FP_SCR0_EX(%a6),%d1 15908 and.w &0x8000,%d1 15909 or.w %d1,%d0 # concat {sgn,new exp} 15910 mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent 15911 15912 mov.l (%sp)+,%d0 # return SCALE factor 15913 rts 15914 15915quick_scale12: 15916 andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent 15917 bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1 15918 15919 mov.l (%sp)+,%d0 # return SCALE factor 15920 rts 15921 15922# src exp is >= dst exp; scale src to exp = 0x3fff 15923src_exp_ge2: 15924 bsr.l scale_to_zero_src 15925 mov.l %d0,-(%sp) # save scale factor 15926 15927 cmpi.b DTAG(%a6),&DENORM # is dst denormalized? 15928 bne.b cmpexp22 15929 lea FP_SCR1(%a6),%a0 15930 bsr.l norm # normalize the denorm; result is new exp 15931 neg.w %d0 # new exp = -(shft val) 15932 mov.w %d0,2+L_SCR1(%a6) # inset new exp 15933 15934cmpexp22: 15935 mov.w L_SCR1(%a6),%d0 15936 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp 15937 15938 cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2? 15939 bge.b quick_scale22 15940 15941 mov.w 2+L_SCR1(%a6),%d0 15942 add.w 0x2(%sp),%d0 # scale dst exponent by scale factor 15943 mov.w FP_SCR1_EX(%a6),%d1 15944 andi.w &0x8000,%d1 15945 or.w %d1,%d0 # concat {sgn,new exp} 15946 mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent 15947 15948 mov.l (%sp)+,%d0 # return SCALE factor 15949 rts 15950 15951quick_scale22: 15952 andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent 15953 bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1 15954 15955 mov.l (%sp)+,%d0 # return SCALE factor 15956 rts 15957 15958########################################################################## 15959 15960######################################################################### 15961# XDEF **************************************************************** # 15962# scale_to_zero_src(): scale the exponent of extended precision # 15963# value at FP_SCR0(a6). # 15964# # 15965# XREF **************************************************************** # 15966# norm() - normalize the mantissa if the operand was a DENORM # 15967# # 15968# INPUT *************************************************************** # 15969# FP_SCR0(a6) = extended precision operand to be scaled # 15970# # 15971# OUTPUT ************************************************************** # 15972# FP_SCR0(a6) = scaled extended precision operand # 15973# d0 = scale value # 15974# # 15975# ALGORITHM *********************************************************** # 15976# Set the exponent of the input operand to 0x3fff. Save the value # 15977# of the difference between the original and new exponent. Then, # 15978# normalize the operand if it was a DENORM. Add this normalization # 15979# value to the previous value. Return the result. # 15980# # 15981######################################################################### 15982 15983 global scale_to_zero_src 15984scale_to_zero_src: 15985 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp} 15986 mov.w %d1,%d0 # make a copy 15987 15988 andi.l &0x7fff,%d1 # extract operand's exponent 15989 15990 andi.w &0x8000,%d0 # extract operand's sgn 15991 or.w &0x3fff,%d0 # insert new operand's exponent(=0) 15992 15993 mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent 15994 15995 cmpi.b STAG(%a6),&DENORM # is operand normalized? 15996 beq.b stzs_denorm # normalize the DENORM 15997 15998stzs_norm: 15999 mov.l &0x3fff,%d0 16000 sub.l %d1,%d0 # scale = BIAS + (-exp) 16001 16002 rts 16003 16004stzs_denorm: 16005 lea FP_SCR0(%a6),%a0 # pass ptr to src op 16006 bsr.l norm # normalize denorm 16007 neg.l %d0 # new exponent = -(shft val) 16008 mov.l %d0,%d1 # prepare for op_norm call 16009 bra.b stzs_norm # finish scaling 16010 16011### 16012 16013######################################################################### 16014# XDEF **************************************************************** # 16015# scale_sqrt(): scale the input operand exponent so a subsequent # 16016# fsqrt operation won't take an exception. # 16017# # 16018# XREF **************************************************************** # 16019# norm() - normalize the mantissa if the operand was a DENORM # 16020# # 16021# INPUT *************************************************************** # 16022# FP_SCR0(a6) = extended precision operand to be scaled # 16023# # 16024# OUTPUT ************************************************************** # 16025# FP_SCR0(a6) = scaled extended precision operand # 16026# d0 = scale value # 16027# # 16028# ALGORITHM *********************************************************** # 16029# If the input operand is a DENORM, normalize it. # 16030# If the exponent of the input operand is even, set the exponent # 16031# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the # 16032# exponent of the input operand is off, set the exponent to ox3fff and # 16033# return a scale factor of "(exp-0x3fff)/2". # 16034# # 16035######################################################################### 16036 16037 global scale_sqrt 16038scale_sqrt: 16039 cmpi.b STAG(%a6),&DENORM # is operand normalized? 16040 beq.b ss_denorm # normalize the DENORM 16041 16042 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp} 16043 andi.l &0x7fff,%d1 # extract operand's exponent 16044 16045 andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn 16046 16047 btst &0x0,%d1 # is exp even or odd? 16048 beq.b ss_norm_even 16049 16050 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 16051 16052 mov.l &0x3fff,%d0 16053 sub.l %d1,%d0 # scale = BIAS + (-exp) 16054 asr.l &0x1,%d0 # divide scale factor by 2 16055 rts 16056 16057ss_norm_even: 16058 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 16059 16060 mov.l &0x3ffe,%d0 16061 sub.l %d1,%d0 # scale = BIAS + (-exp) 16062 asr.l &0x1,%d0 # divide scale factor by 2 16063 rts 16064 16065ss_denorm: 16066 lea FP_SCR0(%a6),%a0 # pass ptr to src op 16067 bsr.l norm # normalize denorm 16068 16069 btst &0x0,%d0 # is exp even or odd? 16070 beq.b ss_denorm_even 16071 16072 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 16073 16074 add.l &0x3fff,%d0 16075 asr.l &0x1,%d0 # divide scale factor by 2 16076 rts 16077 16078ss_denorm_even: 16079 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 16080 16081 add.l &0x3ffe,%d0 16082 asr.l &0x1,%d0 # divide scale factor by 2 16083 rts 16084 16085### 16086 16087######################################################################### 16088# XDEF **************************************************************** # 16089# scale_to_zero_dst(): scale the exponent of extended precision # 16090# value at FP_SCR1(a6). # 16091# # 16092# XREF **************************************************************** # 16093# norm() - normalize the mantissa if the operand was a DENORM # 16094# # 16095# INPUT *************************************************************** # 16096# FP_SCR1(a6) = extended precision operand to be scaled # 16097# # 16098# OUTPUT ************************************************************** # 16099# FP_SCR1(a6) = scaled extended precision operand # 16100# d0 = scale value # 16101# # 16102# ALGORITHM *********************************************************** # 16103# Set the exponent of the input operand to 0x3fff. Save the value # 16104# of the difference between the original and new exponent. Then, # 16105# normalize the operand if it was a DENORM. Add this normalization # 16106# value to the previous value. Return the result. # 16107# # 16108######################################################################### 16109 16110 global scale_to_zero_dst 16111scale_to_zero_dst: 16112 mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp} 16113 mov.w %d1,%d0 # make a copy 16114 16115 andi.l &0x7fff,%d1 # extract operand's exponent 16116 16117 andi.w &0x8000,%d0 # extract operand's sgn 16118 or.w &0x3fff,%d0 # insert new operand's exponent(=0) 16119 16120 mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent 16121 16122 cmpi.b DTAG(%a6),&DENORM # is operand normalized? 16123 beq.b stzd_denorm # normalize the DENORM 16124 16125stzd_norm: 16126 mov.l &0x3fff,%d0 16127 sub.l %d1,%d0 # scale = BIAS + (-exp) 16128 rts 16129 16130stzd_denorm: 16131 lea FP_SCR1(%a6),%a0 # pass ptr to dst op 16132 bsr.l norm # normalize denorm 16133 neg.l %d0 # new exponent = -(shft val) 16134 mov.l %d0,%d1 # prepare for op_norm call 16135 bra.b stzd_norm # finish scaling 16136 16137########################################################################## 16138 16139######################################################################### 16140# XDEF **************************************************************** # 16141# res_qnan(): return default result w/ QNAN operand for dyadic # 16142# res_snan(): return default result w/ SNAN operand for dyadic # 16143# res_qnan_1op(): return dflt result w/ QNAN operand for monadic # 16144# res_snan_1op(): return dflt result w/ SNAN operand for monadic # 16145# # 16146# XREF **************************************************************** # 16147# None # 16148# # 16149# INPUT *************************************************************** # 16150# FP_SRC(a6) = pointer to extended precision src operand # 16151# FP_DST(a6) = pointer to extended precision dst operand # 16152# # 16153# OUTPUT ************************************************************** # 16154# fp0 = default result # 16155# # 16156# ALGORITHM *********************************************************** # 16157# If either operand (but not both operands) of an operation is a # 16158# nonsignalling NAN, then that NAN is returned as the result. If both # 16159# operands are nonsignalling NANs, then the destination operand # 16160# nonsignalling NAN is returned as the result. # 16161# If either operand to an operation is a signalling NAN (SNAN), # 16162# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap # 16163# enable bit is set in the FPCR, then the trap is taken and the # 16164# destination is not modified. If the SNAN trap enable bit is not set, # 16165# then the SNAN is converted to a nonsignalling NAN (by setting the # 16166# SNAN bit in the operand to one), and the operation continues as # 16167# described in the preceding paragraph, for nonsignalling NANs. # 16168# Make sure the appropriate FPSR bits are set before exiting. # 16169# # 16170######################################################################### 16171 16172 global res_qnan 16173 global res_snan 16174res_qnan: 16175res_snan: 16176 cmp.b DTAG(%a6), &SNAN # is the dst an SNAN? 16177 beq.b dst_snan2 16178 cmp.b DTAG(%a6), &QNAN # is the dst a QNAN? 16179 beq.b dst_qnan2 16180src_nan: 16181 cmp.b STAG(%a6), &QNAN 16182 beq.b src_qnan2 16183 global res_snan_1op 16184res_snan_1op: 16185src_snan2: 16186 bset &0x6, FP_SRC_HI(%a6) # set SNAN bit 16187 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6) 16188 lea FP_SRC(%a6), %a0 16189 bra.b nan_comp 16190 global res_qnan_1op 16191res_qnan_1op: 16192src_qnan2: 16193 or.l &nan_mask, USER_FPSR(%a6) 16194 lea FP_SRC(%a6), %a0 16195 bra.b nan_comp 16196dst_snan2: 16197 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6) 16198 bset &0x6, FP_DST_HI(%a6) # set SNAN bit 16199 lea FP_DST(%a6), %a0 16200 bra.b nan_comp 16201dst_qnan2: 16202 lea FP_DST(%a6), %a0 16203 cmp.b STAG(%a6), &SNAN 16204 bne nan_done 16205 or.l &aiop_mask+snan_mask, USER_FPSR(%a6) 16206nan_done: 16207 or.l &nan_mask, USER_FPSR(%a6) 16208nan_comp: 16209 btst &0x7, FTEMP_EX(%a0) # is NAN neg? 16210 beq.b nan_not_neg 16211 or.l &neg_mask, USER_FPSR(%a6) 16212nan_not_neg: 16213 fmovm.x (%a0), &0x80 16214 rts 16215 16216######################################################################### 16217# XDEF **************************************************************** # 16218# res_operr(): return default result during operand error # 16219# # 16220# XREF **************************************************************** # 16221# None # 16222# # 16223# INPUT *************************************************************** # 16224# None # 16225# # 16226# OUTPUT ************************************************************** # 16227# fp0 = default operand error result # 16228# # 16229# ALGORITHM *********************************************************** # 16230# An nonsignalling NAN is returned as the default result when # 16231# an operand error occurs for the following cases: # 16232# # 16233# Multiply: (Infinity x Zero) # 16234# Divide : (Zero / Zero) || (Infinity / Infinity) # 16235# # 16236######################################################################### 16237 16238 global res_operr 16239res_operr: 16240 or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6) 16241 fmovm.x nan_return(%pc), &0x80 16242 rts 16243 16244nan_return: 16245 long 0x7fff0000, 0xffffffff, 0xffffffff 16246 16247######################################################################### 16248# fdbcc(): routine to emulate the fdbcc instruction # 16249# # 16250# XDEF **************************************************************** # 16251# _fdbcc() # 16252# # 16253# XREF **************************************************************** # 16254# fetch_dreg() - fetch Dn value # 16255# store_dreg_l() - store updated Dn value # 16256# # 16257# INPUT *************************************************************** # 16258# d0 = displacement # 16259# # 16260# OUTPUT ************************************************************** # 16261# none # 16262# # 16263# ALGORITHM *********************************************************** # 16264# This routine checks which conditional predicate is specified by # 16265# the stacked fdbcc instruction opcode and then branches to a routine # 16266# for that predicate. The corresponding fbcc instruction is then used # 16267# to see whether the condition (specified by the stacked FPSR) is true # 16268# or false. # 16269# If a BSUN exception should be indicated, the BSUN and ABSUN # 16270# bits are set in the stacked FPSR. If the BSUN exception is enabled, # 16271# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an # 16272# enabled BSUN should not be flagged and the predicate is true, then # 16273# Dn is fetched and decremented by one. If Dn is not equal to -1, add # 16274# the displacement value to the stacked PC so that when an "rte" is # 16275# finally executed, the branch occurs. # 16276# # 16277######################################################################### 16278 global _fdbcc 16279_fdbcc: 16280 mov.l %d0,L_SCR1(%a6) # save displacement 16281 16282 mov.w EXC_CMDREG(%a6),%d0 # fetch predicate 16283 16284 clr.l %d1 # clear scratch reg 16285 mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes 16286 ror.l &0x8,%d1 # rotate to top byte 16287 fmov.l %d1,%fpsr # insert into FPSR 16288 16289 mov.w (tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table 16290 jmp (tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine 16291 16292tbl_fdbcc: 16293 short fdbcc_f - tbl_fdbcc # 00 16294 short fdbcc_eq - tbl_fdbcc # 01 16295 short fdbcc_ogt - tbl_fdbcc # 02 16296 short fdbcc_oge - tbl_fdbcc # 03 16297 short fdbcc_olt - tbl_fdbcc # 04 16298 short fdbcc_ole - tbl_fdbcc # 05 16299 short fdbcc_ogl - tbl_fdbcc # 06 16300 short fdbcc_or - tbl_fdbcc # 07 16301 short fdbcc_un - tbl_fdbcc # 08 16302 short fdbcc_ueq - tbl_fdbcc # 09 16303 short fdbcc_ugt - tbl_fdbcc # 10 16304 short fdbcc_uge - tbl_fdbcc # 11 16305 short fdbcc_ult - tbl_fdbcc # 12 16306 short fdbcc_ule - tbl_fdbcc # 13 16307 short fdbcc_neq - tbl_fdbcc # 14 16308 short fdbcc_t - tbl_fdbcc # 15 16309 short fdbcc_sf - tbl_fdbcc # 16 16310 short fdbcc_seq - tbl_fdbcc # 17 16311 short fdbcc_gt - tbl_fdbcc # 18 16312 short fdbcc_ge - tbl_fdbcc # 19 16313 short fdbcc_lt - tbl_fdbcc # 20 16314 short fdbcc_le - tbl_fdbcc # 21 16315 short fdbcc_gl - tbl_fdbcc # 22 16316 short fdbcc_gle - tbl_fdbcc # 23 16317 short fdbcc_ngle - tbl_fdbcc # 24 16318 short fdbcc_ngl - tbl_fdbcc # 25 16319 short fdbcc_nle - tbl_fdbcc # 26 16320 short fdbcc_nlt - tbl_fdbcc # 27 16321 short fdbcc_nge - tbl_fdbcc # 28 16322 short fdbcc_ngt - tbl_fdbcc # 29 16323 short fdbcc_sneq - tbl_fdbcc # 30 16324 short fdbcc_st - tbl_fdbcc # 31 16325 16326######################################################################### 16327# # 16328# IEEE Nonaware tests # 16329# # 16330# For the IEEE nonaware tests, only the false branch changes the # 16331# counter. However, the true branch may set bsun so we check to see # 16332# if the NAN bit is set, in which case BSUN and AIOP will be set. # 16333# # 16334# The cases EQ and NE are shared by the Aware and Nonaware groups # 16335# and are incapable of setting the BSUN exception bit. # 16336# # 16337# Typically, only one of the two possible branch directions could # 16338# have the NAN bit set. # 16339# (This is assuming the mutual exclusiveness of FPSR cc bit groupings # 16340# is preserved.) # 16341# # 16342######################################################################### 16343 16344# 16345# equal: 16346# 16347# Z 16348# 16349fdbcc_eq: 16350 fbeq.w fdbcc_eq_yes # equal? 16351fdbcc_eq_no: 16352 bra.w fdbcc_false # no; go handle counter 16353fdbcc_eq_yes: 16354 rts 16355 16356# 16357# not equal: 16358# _ 16359# Z 16360# 16361fdbcc_neq: 16362 fbneq.w fdbcc_neq_yes # not equal? 16363fdbcc_neq_no: 16364 bra.w fdbcc_false # no; go handle counter 16365fdbcc_neq_yes: 16366 rts 16367 16368# 16369# greater than: 16370# _______ 16371# NANvZvN 16372# 16373fdbcc_gt: 16374 fbgt.w fdbcc_gt_yes # greater than? 16375 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16376 beq.w fdbcc_false # no;go handle counter 16377 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16378 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16379 bne.w fdbcc_bsun # yes; we have an exception 16380 bra.w fdbcc_false # no; go handle counter 16381fdbcc_gt_yes: 16382 rts # do nothing 16383 16384# 16385# not greater than: 16386# 16387# NANvZvN 16388# 16389fdbcc_ngt: 16390 fbngt.w fdbcc_ngt_yes # not greater than? 16391fdbcc_ngt_no: 16392 bra.w fdbcc_false # no; go handle counter 16393fdbcc_ngt_yes: 16394 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16395 beq.b fdbcc_ngt_done # no;go finish 16396 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16397 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16398 bne.w fdbcc_bsun # yes; we have an exception 16399fdbcc_ngt_done: 16400 rts # no; do nothing 16401 16402# 16403# greater than or equal: 16404# _____ 16405# Zv(NANvN) 16406# 16407fdbcc_ge: 16408 fbge.w fdbcc_ge_yes # greater than or equal? 16409fdbcc_ge_no: 16410 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16411 beq.w fdbcc_false # no;go handle counter 16412 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16413 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16414 bne.w fdbcc_bsun # yes; we have an exception 16415 bra.w fdbcc_false # no; go handle counter 16416fdbcc_ge_yes: 16417 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16418 beq.b fdbcc_ge_yes_done # no;go do nothing 16419 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16420 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16421 bne.w fdbcc_bsun # yes; we have an exception 16422fdbcc_ge_yes_done: 16423 rts # do nothing 16424 16425# 16426# not (greater than or equal): 16427# _ 16428# NANv(N^Z) 16429# 16430fdbcc_nge: 16431 fbnge.w fdbcc_nge_yes # not (greater than or equal)? 16432fdbcc_nge_no: 16433 bra.w fdbcc_false # no; go handle counter 16434fdbcc_nge_yes: 16435 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16436 beq.b fdbcc_nge_done # no;go finish 16437 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16438 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16439 bne.w fdbcc_bsun # yes; we have an exception 16440fdbcc_nge_done: 16441 rts # no; do nothing 16442 16443# 16444# less than: 16445# _____ 16446# N^(NANvZ) 16447# 16448fdbcc_lt: 16449 fblt.w fdbcc_lt_yes # less than? 16450fdbcc_lt_no: 16451 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16452 beq.w fdbcc_false # no; go handle counter 16453 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16454 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16455 bne.w fdbcc_bsun # yes; we have an exception 16456 bra.w fdbcc_false # no; go handle counter 16457fdbcc_lt_yes: 16458 rts # do nothing 16459 16460# 16461# not less than: 16462# _ 16463# NANv(ZvN) 16464# 16465fdbcc_nlt: 16466 fbnlt.w fdbcc_nlt_yes # not less than? 16467fdbcc_nlt_no: 16468 bra.w fdbcc_false # no; go handle counter 16469fdbcc_nlt_yes: 16470 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16471 beq.b fdbcc_nlt_done # no;go finish 16472 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16473 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16474 bne.w fdbcc_bsun # yes; we have an exception 16475fdbcc_nlt_done: 16476 rts # no; do nothing 16477 16478# 16479# less than or equal: 16480# ___ 16481# Zv(N^NAN) 16482# 16483fdbcc_le: 16484 fble.w fdbcc_le_yes # less than or equal? 16485fdbcc_le_no: 16486 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16487 beq.w fdbcc_false # no; go handle counter 16488 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16489 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16490 bne.w fdbcc_bsun # yes; we have an exception 16491 bra.w fdbcc_false # no; go handle counter 16492fdbcc_le_yes: 16493 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16494 beq.b fdbcc_le_yes_done # no; go do nothing 16495 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16496 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16497 bne.w fdbcc_bsun # yes; we have an exception 16498fdbcc_le_yes_done: 16499 rts # do nothing 16500 16501# 16502# not (less than or equal): 16503# ___ 16504# NANv(NvZ) 16505# 16506fdbcc_nle: 16507 fbnle.w fdbcc_nle_yes # not (less than or equal)? 16508fdbcc_nle_no: 16509 bra.w fdbcc_false # no; go handle counter 16510fdbcc_nle_yes: 16511 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16512 beq.w fdbcc_nle_done # no; go finish 16513 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16514 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16515 bne.w fdbcc_bsun # yes; we have an exception 16516fdbcc_nle_done: 16517 rts # no; do nothing 16518 16519# 16520# greater or less than: 16521# _____ 16522# NANvZ 16523# 16524fdbcc_gl: 16525 fbgl.w fdbcc_gl_yes # greater or less than? 16526fdbcc_gl_no: 16527 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16528 beq.w fdbcc_false # no; handle counter 16529 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16530 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16531 bne.w fdbcc_bsun # yes; we have an exception 16532 bra.w fdbcc_false # no; go handle counter 16533fdbcc_gl_yes: 16534 rts # do nothing 16535 16536# 16537# not (greater or less than): 16538# 16539# NANvZ 16540# 16541fdbcc_ngl: 16542 fbngl.w fdbcc_ngl_yes # not (greater or less than)? 16543fdbcc_ngl_no: 16544 bra.w fdbcc_false # no; go handle counter 16545fdbcc_ngl_yes: 16546 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16547 beq.b fdbcc_ngl_done # no; go finish 16548 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16549 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16550 bne.w fdbcc_bsun # yes; we have an exception 16551fdbcc_ngl_done: 16552 rts # no; do nothing 16553 16554# 16555# greater, less, or equal: 16556# ___ 16557# NAN 16558# 16559fdbcc_gle: 16560 fbgle.w fdbcc_gle_yes # greater, less, or equal? 16561fdbcc_gle_no: 16562 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16563 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16564 bne.w fdbcc_bsun # yes; we have an exception 16565 bra.w fdbcc_false # no; go handle counter 16566fdbcc_gle_yes: 16567 rts # do nothing 16568 16569# 16570# not (greater, less, or equal): 16571# 16572# NAN 16573# 16574fdbcc_ngle: 16575 fbngle.w fdbcc_ngle_yes # not (greater, less, or equal)? 16576fdbcc_ngle_no: 16577 bra.w fdbcc_false # no; go handle counter 16578fdbcc_ngle_yes: 16579 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16580 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16581 bne.w fdbcc_bsun # yes; we have an exception 16582 rts # no; do nothing 16583 16584######################################################################### 16585# # 16586# Miscellaneous tests # 16587# # 16588# For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. # 16589# # 16590######################################################################### 16591 16592# 16593# false: 16594# 16595# False 16596# 16597fdbcc_f: # no bsun possible 16598 bra.w fdbcc_false # go handle counter 16599 16600# 16601# true: 16602# 16603# True 16604# 16605fdbcc_t: # no bsun possible 16606 rts # do nothing 16607 16608# 16609# signalling false: 16610# 16611# False 16612# 16613fdbcc_sf: 16614 btst &nan_bit, FPSR_CC(%a6) # is NAN set? 16615 beq.w fdbcc_false # no;go handle counter 16616 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16617 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16618 bne.w fdbcc_bsun # yes; we have an exception 16619 bra.w fdbcc_false # go handle counter 16620 16621# 16622# signalling true: 16623# 16624# True 16625# 16626fdbcc_st: 16627 btst &nan_bit, FPSR_CC(%a6) # is NAN set? 16628 beq.b fdbcc_st_done # no;go finish 16629 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16630 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16631 bne.w fdbcc_bsun # yes; we have an exception 16632fdbcc_st_done: 16633 rts 16634 16635# 16636# signalling equal: 16637# 16638# Z 16639# 16640fdbcc_seq: 16641 fbseq.w fdbcc_seq_yes # signalling equal? 16642fdbcc_seq_no: 16643 btst &nan_bit, FPSR_CC(%a6) # is NAN set? 16644 beq.w fdbcc_false # no;go handle counter 16645 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16646 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16647 bne.w fdbcc_bsun # yes; we have an exception 16648 bra.w fdbcc_false # go handle counter 16649fdbcc_seq_yes: 16650 btst &nan_bit, FPSR_CC(%a6) # is NAN set? 16651 beq.b fdbcc_seq_yes_done # no;go do nothing 16652 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16653 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16654 bne.w fdbcc_bsun # yes; we have an exception 16655fdbcc_seq_yes_done: 16656 rts # yes; do nothing 16657 16658# 16659# signalling not equal: 16660# _ 16661# Z 16662# 16663fdbcc_sneq: 16664 fbsneq.w fdbcc_sneq_yes # signalling not equal? 16665fdbcc_sneq_no: 16666 btst &nan_bit, FPSR_CC(%a6) # is NAN set? 16667 beq.w fdbcc_false # no;go handle counter 16668 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16669 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16670 bne.w fdbcc_bsun # yes; we have an exception 16671 bra.w fdbcc_false # go handle counter 16672fdbcc_sneq_yes: 16673 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 16674 beq.w fdbcc_sneq_done # no;go finish 16675 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 16676 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled? 16677 bne.w fdbcc_bsun # yes; we have an exception 16678fdbcc_sneq_done: 16679 rts 16680 16681######################################################################### 16682# # 16683# IEEE Aware tests # 16684# # 16685# For the IEEE aware tests, action is only taken if the result is false.# 16686# Therefore, the opposite branch type is used to jump to the decrement # 16687# routine. # 16688# The BSUN exception will not be set for any of these tests. # 16689# # 16690######################################################################### 16691 16692# 16693# ordered greater than: 16694# _______ 16695# NANvZvN 16696# 16697fdbcc_ogt: 16698 fbogt.w fdbcc_ogt_yes # ordered greater than? 16699fdbcc_ogt_no: 16700 bra.w fdbcc_false # no; go handle counter 16701fdbcc_ogt_yes: 16702 rts # yes; do nothing 16703 16704# 16705# unordered or less or equal: 16706# _______ 16707# NANvZvN 16708# 16709fdbcc_ule: 16710 fbule.w fdbcc_ule_yes # unordered or less or equal? 16711fdbcc_ule_no: 16712 bra.w fdbcc_false # no; go handle counter 16713fdbcc_ule_yes: 16714 rts # yes; do nothing 16715 16716# 16717# ordered greater than or equal: 16718# _____ 16719# Zv(NANvN) 16720# 16721fdbcc_oge: 16722 fboge.w fdbcc_oge_yes # ordered greater than or equal? 16723fdbcc_oge_no: 16724 bra.w fdbcc_false # no; go handle counter 16725fdbcc_oge_yes: 16726 rts # yes; do nothing 16727 16728# 16729# unordered or less than: 16730# _ 16731# NANv(N^Z) 16732# 16733fdbcc_ult: 16734 fbult.w fdbcc_ult_yes # unordered or less than? 16735fdbcc_ult_no: 16736 bra.w fdbcc_false # no; go handle counter 16737fdbcc_ult_yes: 16738 rts # yes; do nothing 16739 16740# 16741# ordered less than: 16742# _____ 16743# N^(NANvZ) 16744# 16745fdbcc_olt: 16746 fbolt.w fdbcc_olt_yes # ordered less than? 16747fdbcc_olt_no: 16748 bra.w fdbcc_false # no; go handle counter 16749fdbcc_olt_yes: 16750 rts # yes; do nothing 16751 16752# 16753# unordered or greater or equal: 16754# 16755# NANvZvN 16756# 16757fdbcc_uge: 16758 fbuge.w fdbcc_uge_yes # unordered or greater than? 16759fdbcc_uge_no: 16760 bra.w fdbcc_false # no; go handle counter 16761fdbcc_uge_yes: 16762 rts # yes; do nothing 16763 16764# 16765# ordered less than or equal: 16766# ___ 16767# Zv(N^NAN) 16768# 16769fdbcc_ole: 16770 fbole.w fdbcc_ole_yes # ordered greater or less than? 16771fdbcc_ole_no: 16772 bra.w fdbcc_false # no; go handle counter 16773fdbcc_ole_yes: 16774 rts # yes; do nothing 16775 16776# 16777# unordered or greater than: 16778# ___ 16779# NANv(NvZ) 16780# 16781fdbcc_ugt: 16782 fbugt.w fdbcc_ugt_yes # unordered or greater than? 16783fdbcc_ugt_no: 16784 bra.w fdbcc_false # no; go handle counter 16785fdbcc_ugt_yes: 16786 rts # yes; do nothing 16787 16788# 16789# ordered greater or less than: 16790# _____ 16791# NANvZ 16792# 16793fdbcc_ogl: 16794 fbogl.w fdbcc_ogl_yes # ordered greater or less than? 16795fdbcc_ogl_no: 16796 bra.w fdbcc_false # no; go handle counter 16797fdbcc_ogl_yes: 16798 rts # yes; do nothing 16799 16800# 16801# unordered or equal: 16802# 16803# NANvZ 16804# 16805fdbcc_ueq: 16806 fbueq.w fdbcc_ueq_yes # unordered or equal? 16807fdbcc_ueq_no: 16808 bra.w fdbcc_false # no; go handle counter 16809fdbcc_ueq_yes: 16810 rts # yes; do nothing 16811 16812# 16813# ordered: 16814# ___ 16815# NAN 16816# 16817fdbcc_or: 16818 fbor.w fdbcc_or_yes # ordered? 16819fdbcc_or_no: 16820 bra.w fdbcc_false # no; go handle counter 16821fdbcc_or_yes: 16822 rts # yes; do nothing 16823 16824# 16825# unordered: 16826# 16827# NAN 16828# 16829fdbcc_un: 16830 fbun.w fdbcc_un_yes # unordered? 16831fdbcc_un_no: 16832 bra.w fdbcc_false # no; go handle counter 16833fdbcc_un_yes: 16834 rts # yes; do nothing 16835 16836####################################################################### 16837 16838# 16839# the bsun exception bit was not set. 16840# 16841# (1) subtract 1 from the count register 16842# (2) if (cr == -1) then 16843# pc = pc of next instruction 16844# else 16845# pc += sign_ext(16-bit displacement) 16846# 16847fdbcc_false: 16848 mov.b 1+EXC_OPWORD(%a6), %d1 # fetch lo opword 16849 andi.w &0x7, %d1 # extract count register 16850 16851 bsr.l fetch_dreg # fetch count value 16852# make sure that d0 isn't corrupted between calls... 16853 16854 subq.w &0x1, %d0 # Dn - 1 -> Dn 16855 16856 bsr.l store_dreg_l # store new count value 16857 16858 cmpi.w %d0, &-0x1 # is (Dn == -1)? 16859 bne.b fdbcc_false_cont # no; 16860 rts 16861 16862fdbcc_false_cont: 16863 mov.l L_SCR1(%a6),%d0 # fetch displacement 16864 add.l USER_FPIAR(%a6),%d0 # add instruction PC 16865 addq.l &0x4,%d0 # add instruction length 16866 mov.l %d0,EXC_PC(%a6) # set new PC 16867 rts 16868 16869# the emulation routine set bsun and BSUN was enabled. have to 16870# fix stack and jump to the bsun handler. 16871# let the caller of this routine shift the stack frame up to 16872# eliminate the effective address field. 16873fdbcc_bsun: 16874 mov.b &fbsun_flg,SPCOND_FLG(%a6) 16875 rts 16876 16877######################################################################### 16878# ftrapcc(): routine to emulate the ftrapcc instruction # 16879# # 16880# XDEF **************************************************************** # 16881# _ftrapcc() # 16882# # 16883# XREF **************************************************************** # 16884# none # 16885# # 16886# INPUT *************************************************************** # 16887# none # 16888# # 16889# OUTPUT ************************************************************** # 16890# none # 16891# # 16892# ALGORITHM *********************************************************** # 16893# This routine checks which conditional predicate is specified by # 16894# the stacked ftrapcc instruction opcode and then branches to a routine # 16895# for that predicate. The corresponding fbcc instruction is then used # 16896# to see whether the condition (specified by the stacked FPSR) is true # 16897# or false. # 16898# If a BSUN exception should be indicated, the BSUN and ABSUN # 16899# bits are set in the stacked FPSR. If the BSUN exception is enabled, # 16900# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an # 16901# enabled BSUN should not be flagged and the predicate is true, then # 16902# the ftrapcc_flg is set in the SPCOND_FLG location. These special # 16903# flags indicate to the calling routine to emulate the exceptional # 16904# condition. # 16905# # 16906######################################################################### 16907 16908 global _ftrapcc 16909_ftrapcc: 16910 mov.w EXC_CMDREG(%a6),%d0 # fetch predicate 16911 16912 clr.l %d1 # clear scratch reg 16913 mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes 16914 ror.l &0x8,%d1 # rotate to top byte 16915 fmov.l %d1,%fpsr # insert into FPSR 16916 16917 mov.w (tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table 16918 jmp (tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine 16919 16920tbl_ftrapcc: 16921 short ftrapcc_f - tbl_ftrapcc # 00 16922 short ftrapcc_eq - tbl_ftrapcc # 01 16923 short ftrapcc_ogt - tbl_ftrapcc # 02 16924 short ftrapcc_oge - tbl_ftrapcc # 03 16925 short ftrapcc_olt - tbl_ftrapcc # 04 16926 short ftrapcc_ole - tbl_ftrapcc # 05 16927 short ftrapcc_ogl - tbl_ftrapcc # 06 16928 short ftrapcc_or - tbl_ftrapcc # 07 16929 short ftrapcc_un - tbl_ftrapcc # 08 16930 short ftrapcc_ueq - tbl_ftrapcc # 09 16931 short ftrapcc_ugt - tbl_ftrapcc # 10 16932 short ftrapcc_uge - tbl_ftrapcc # 11 16933 short ftrapcc_ult - tbl_ftrapcc # 12 16934 short ftrapcc_ule - tbl_ftrapcc # 13 16935 short ftrapcc_neq - tbl_ftrapcc # 14 16936 short ftrapcc_t - tbl_ftrapcc # 15 16937 short ftrapcc_sf - tbl_ftrapcc # 16 16938 short ftrapcc_seq - tbl_ftrapcc # 17 16939 short ftrapcc_gt - tbl_ftrapcc # 18 16940 short ftrapcc_ge - tbl_ftrapcc # 19 16941 short ftrapcc_lt - tbl_ftrapcc # 20 16942 short ftrapcc_le - tbl_ftrapcc # 21 16943 short ftrapcc_gl - tbl_ftrapcc # 22 16944 short ftrapcc_gle - tbl_ftrapcc # 23 16945 short ftrapcc_ngle - tbl_ftrapcc # 24 16946 short ftrapcc_ngl - tbl_ftrapcc # 25 16947 short ftrapcc_nle - tbl_ftrapcc # 26 16948 short ftrapcc_nlt - tbl_ftrapcc # 27 16949 short ftrapcc_nge - tbl_ftrapcc # 28 16950 short ftrapcc_ngt - tbl_ftrapcc # 29 16951 short ftrapcc_sneq - tbl_ftrapcc # 30 16952 short ftrapcc_st - tbl_ftrapcc # 31 16953 16954######################################################################### 16955# # 16956# IEEE Nonaware tests # 16957# # 16958# For the IEEE nonaware tests, we set the result based on the # 16959# floating point condition codes. In addition, we check to see # 16960# if the NAN bit is set, in which case BSUN and AIOP will be set. # 16961# # 16962# The cases EQ and NE are shared by the Aware and Nonaware groups # 16963# and are incapable of setting the BSUN exception bit. # 16964# # 16965# Typically, only one of the two possible branch directions could # 16966# have the NAN bit set. # 16967# # 16968######################################################################### 16969 16970# 16971# equal: 16972# 16973# Z 16974# 16975ftrapcc_eq: 16976 fbeq.w ftrapcc_trap # equal? 16977ftrapcc_eq_no: 16978 rts # do nothing 16979 16980# 16981# not equal: 16982# _ 16983# Z 16984# 16985ftrapcc_neq: 16986 fbneq.w ftrapcc_trap # not equal? 16987ftrapcc_neq_no: 16988 rts # do nothing 16989 16990# 16991# greater than: 16992# _______ 16993# NANvZvN 16994# 16995ftrapcc_gt: 16996 fbgt.w ftrapcc_trap # greater than? 16997ftrapcc_gt_no: 16998 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 16999 beq.b ftrapcc_gt_done # no 17000 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17001 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17002 bne.w ftrapcc_bsun # yes 17003ftrapcc_gt_done: 17004 rts # no; do nothing 17005 17006# 17007# not greater than: 17008# 17009# NANvZvN 17010# 17011ftrapcc_ngt: 17012 fbngt.w ftrapcc_ngt_yes # not greater than? 17013ftrapcc_ngt_no: 17014 rts # do nothing 17015ftrapcc_ngt_yes: 17016 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17017 beq.w ftrapcc_trap # no; go take trap 17018 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17019 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17020 bne.w ftrapcc_bsun # yes 17021 bra.w ftrapcc_trap # no; go take trap 17022 17023# 17024# greater than or equal: 17025# _____ 17026# Zv(NANvN) 17027# 17028ftrapcc_ge: 17029 fbge.w ftrapcc_ge_yes # greater than or equal? 17030ftrapcc_ge_no: 17031 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17032 beq.b ftrapcc_ge_done # no; go finish 17033 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17034 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17035 bne.w ftrapcc_bsun # yes 17036ftrapcc_ge_done: 17037 rts # no; do nothing 17038ftrapcc_ge_yes: 17039 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17040 beq.w ftrapcc_trap # no; go take trap 17041 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17042 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17043 bne.w ftrapcc_bsun # yes 17044 bra.w ftrapcc_trap # no; go take trap 17045 17046# 17047# not (greater than or equal): 17048# _ 17049# NANv(N^Z) 17050# 17051ftrapcc_nge: 17052 fbnge.w ftrapcc_nge_yes # not (greater than or equal)? 17053ftrapcc_nge_no: 17054 rts # do nothing 17055ftrapcc_nge_yes: 17056 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17057 beq.w ftrapcc_trap # no; go take trap 17058 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17059 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17060 bne.w ftrapcc_bsun # yes 17061 bra.w ftrapcc_trap # no; go take trap 17062 17063# 17064# less than: 17065# _____ 17066# N^(NANvZ) 17067# 17068ftrapcc_lt: 17069 fblt.w ftrapcc_trap # less than? 17070ftrapcc_lt_no: 17071 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17072 beq.b ftrapcc_lt_done # no; go finish 17073 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17074 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17075 bne.w ftrapcc_bsun # yes 17076ftrapcc_lt_done: 17077 rts # no; do nothing 17078 17079# 17080# not less than: 17081# _ 17082# NANv(ZvN) 17083# 17084ftrapcc_nlt: 17085 fbnlt.w ftrapcc_nlt_yes # not less than? 17086ftrapcc_nlt_no: 17087 rts # do nothing 17088ftrapcc_nlt_yes: 17089 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17090 beq.w ftrapcc_trap # no; go take trap 17091 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17092 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17093 bne.w ftrapcc_bsun # yes 17094 bra.w ftrapcc_trap # no; go take trap 17095 17096# 17097# less than or equal: 17098# ___ 17099# Zv(N^NAN) 17100# 17101ftrapcc_le: 17102 fble.w ftrapcc_le_yes # less than or equal? 17103ftrapcc_le_no: 17104 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17105 beq.b ftrapcc_le_done # no; go finish 17106 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17107 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17108 bne.w ftrapcc_bsun # yes 17109ftrapcc_le_done: 17110 rts # no; do nothing 17111ftrapcc_le_yes: 17112 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17113 beq.w ftrapcc_trap # no; go take trap 17114 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17115 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17116 bne.w ftrapcc_bsun # yes 17117 bra.w ftrapcc_trap # no; go take trap 17118 17119# 17120# not (less than or equal): 17121# ___ 17122# NANv(NvZ) 17123# 17124ftrapcc_nle: 17125 fbnle.w ftrapcc_nle_yes # not (less than or equal)? 17126ftrapcc_nle_no: 17127 rts # do nothing 17128ftrapcc_nle_yes: 17129 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17130 beq.w ftrapcc_trap # no; go take trap 17131 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17132 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17133 bne.w ftrapcc_bsun # yes 17134 bra.w ftrapcc_trap # no; go take trap 17135 17136# 17137# greater or less than: 17138# _____ 17139# NANvZ 17140# 17141ftrapcc_gl: 17142 fbgl.w ftrapcc_trap # greater or less than? 17143ftrapcc_gl_no: 17144 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17145 beq.b ftrapcc_gl_done # no; go finish 17146 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17147 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17148 bne.w ftrapcc_bsun # yes 17149ftrapcc_gl_done: 17150 rts # no; do nothing 17151 17152# 17153# not (greater or less than): 17154# 17155# NANvZ 17156# 17157ftrapcc_ngl: 17158 fbngl.w ftrapcc_ngl_yes # not (greater or less than)? 17159ftrapcc_ngl_no: 17160 rts # do nothing 17161ftrapcc_ngl_yes: 17162 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17163 beq.w ftrapcc_trap # no; go take trap 17164 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17165 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17166 bne.w ftrapcc_bsun # yes 17167 bra.w ftrapcc_trap # no; go take trap 17168 17169# 17170# greater, less, or equal: 17171# ___ 17172# NAN 17173# 17174ftrapcc_gle: 17175 fbgle.w ftrapcc_trap # greater, less, or equal? 17176ftrapcc_gle_no: 17177 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17178 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17179 bne.w ftrapcc_bsun # yes 17180 rts # no; do nothing 17181 17182# 17183# not (greater, less, or equal): 17184# 17185# NAN 17186# 17187ftrapcc_ngle: 17188 fbngle.w ftrapcc_ngle_yes # not (greater, less, or equal)? 17189ftrapcc_ngle_no: 17190 rts # do nothing 17191ftrapcc_ngle_yes: 17192 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17193 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17194 bne.w ftrapcc_bsun # yes 17195 bra.w ftrapcc_trap # no; go take trap 17196 17197######################################################################### 17198# # 17199# Miscellaneous tests # 17200# # 17201# For the IEEE aware tests, we only have to set the result based on the # 17202# floating point condition codes. The BSUN exception will not be # 17203# set for any of these tests. # 17204# # 17205######################################################################### 17206 17207# 17208# false: 17209# 17210# False 17211# 17212ftrapcc_f: 17213 rts # do nothing 17214 17215# 17216# true: 17217# 17218# True 17219# 17220ftrapcc_t: 17221 bra.w ftrapcc_trap # go take trap 17222 17223# 17224# signalling false: 17225# 17226# False 17227# 17228ftrapcc_sf: 17229 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17230 beq.b ftrapcc_sf_done # no; go finish 17231 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17232 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17233 bne.w ftrapcc_bsun # yes 17234ftrapcc_sf_done: 17235 rts # no; do nothing 17236 17237# 17238# signalling true: 17239# 17240# True 17241# 17242ftrapcc_st: 17243 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17244 beq.w ftrapcc_trap # no; go take trap 17245 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17246 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17247 bne.w ftrapcc_bsun # yes 17248 bra.w ftrapcc_trap # no; go take trap 17249 17250# 17251# signalling equal: 17252# 17253# Z 17254# 17255ftrapcc_seq: 17256 fbseq.w ftrapcc_seq_yes # signalling equal? 17257ftrapcc_seq_no: 17258 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17259 beq.w ftrapcc_seq_done # no; go finish 17260 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17261 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17262 bne.w ftrapcc_bsun # yes 17263ftrapcc_seq_done: 17264 rts # no; do nothing 17265ftrapcc_seq_yes: 17266 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17267 beq.w ftrapcc_trap # no; go take trap 17268 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17269 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17270 bne.w ftrapcc_bsun # yes 17271 bra.w ftrapcc_trap # no; go take trap 17272 17273# 17274# signalling not equal: 17275# _ 17276# Z 17277# 17278ftrapcc_sneq: 17279 fbsneq.w ftrapcc_sneq_yes # signalling equal? 17280ftrapcc_sneq_no: 17281 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17282 beq.w ftrapcc_sneq_no_done # no; go finish 17283 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17284 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17285 bne.w ftrapcc_bsun # yes 17286ftrapcc_sneq_no_done: 17287 rts # do nothing 17288ftrapcc_sneq_yes: 17289 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17290 beq.w ftrapcc_trap # no; go take trap 17291 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17292 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set? 17293 bne.w ftrapcc_bsun # yes 17294 bra.w ftrapcc_trap # no; go take trap 17295 17296######################################################################### 17297# # 17298# IEEE Aware tests # 17299# # 17300# For the IEEE aware tests, we only have to set the result based on the # 17301# floating point condition codes. The BSUN exception will not be # 17302# set for any of these tests. # 17303# # 17304######################################################################### 17305 17306# 17307# ordered greater than: 17308# _______ 17309# NANvZvN 17310# 17311ftrapcc_ogt: 17312 fbogt.w ftrapcc_trap # ordered greater than? 17313ftrapcc_ogt_no: 17314 rts # do nothing 17315 17316# 17317# unordered or less or equal: 17318# _______ 17319# NANvZvN 17320# 17321ftrapcc_ule: 17322 fbule.w ftrapcc_trap # unordered or less or equal? 17323ftrapcc_ule_no: 17324 rts # do nothing 17325 17326# 17327# ordered greater than or equal: 17328# _____ 17329# Zv(NANvN) 17330# 17331ftrapcc_oge: 17332 fboge.w ftrapcc_trap # ordered greater than or equal? 17333ftrapcc_oge_no: 17334 rts # do nothing 17335 17336# 17337# unordered or less than: 17338# _ 17339# NANv(N^Z) 17340# 17341ftrapcc_ult: 17342 fbult.w ftrapcc_trap # unordered or less than? 17343ftrapcc_ult_no: 17344 rts # do nothing 17345 17346# 17347# ordered less than: 17348# _____ 17349# N^(NANvZ) 17350# 17351ftrapcc_olt: 17352 fbolt.w ftrapcc_trap # ordered less than? 17353ftrapcc_olt_no: 17354 rts # do nothing 17355 17356# 17357# unordered or greater or equal: 17358# 17359# NANvZvN 17360# 17361ftrapcc_uge: 17362 fbuge.w ftrapcc_trap # unordered or greater than? 17363ftrapcc_uge_no: 17364 rts # do nothing 17365 17366# 17367# ordered less than or equal: 17368# ___ 17369# Zv(N^NAN) 17370# 17371ftrapcc_ole: 17372 fbole.w ftrapcc_trap # ordered greater or less than? 17373ftrapcc_ole_no: 17374 rts # do nothing 17375 17376# 17377# unordered or greater than: 17378# ___ 17379# NANv(NvZ) 17380# 17381ftrapcc_ugt: 17382 fbugt.w ftrapcc_trap # unordered or greater than? 17383ftrapcc_ugt_no: 17384 rts # do nothing 17385 17386# 17387# ordered greater or less than: 17388# _____ 17389# NANvZ 17390# 17391ftrapcc_ogl: 17392 fbogl.w ftrapcc_trap # ordered greater or less than? 17393ftrapcc_ogl_no: 17394 rts # do nothing 17395 17396# 17397# unordered or equal: 17398# 17399# NANvZ 17400# 17401ftrapcc_ueq: 17402 fbueq.w ftrapcc_trap # unordered or equal? 17403ftrapcc_ueq_no: 17404 rts # do nothing 17405 17406# 17407# ordered: 17408# ___ 17409# NAN 17410# 17411ftrapcc_or: 17412 fbor.w ftrapcc_trap # ordered? 17413ftrapcc_or_no: 17414 rts # do nothing 17415 17416# 17417# unordered: 17418# 17419# NAN 17420# 17421ftrapcc_un: 17422 fbun.w ftrapcc_trap # unordered? 17423ftrapcc_un_no: 17424 rts # do nothing 17425 17426####################################################################### 17427 17428# the bsun exception bit was not set. 17429# we will need to jump to the ftrapcc vector. the stack frame 17430# is the same size as that of the fp unimp instruction. the 17431# only difference is that the <ea> field should hold the PC 17432# of the ftrapcc instruction and the vector offset field 17433# should denote the ftrapcc trap. 17434ftrapcc_trap: 17435 mov.b &ftrapcc_flg,SPCOND_FLG(%a6) 17436 rts 17437 17438# the emulation routine set bsun and BSUN was enabled. have to 17439# fix stack and jump to the bsun handler. 17440# let the caller of this routine shift the stack frame up to 17441# eliminate the effective address field. 17442ftrapcc_bsun: 17443 mov.b &fbsun_flg,SPCOND_FLG(%a6) 17444 rts 17445 17446######################################################################### 17447# fscc(): routine to emulate the fscc instruction # 17448# # 17449# XDEF **************************************************************** # 17450# _fscc() # 17451# # 17452# XREF **************************************************************** # 17453# store_dreg_b() - store result to data register file # 17454# dec_areg() - decrement an areg for -(an) mode # 17455# inc_areg() - increment an areg for (an)+ mode # 17456# _dmem_write_byte() - store result to memory # 17457# # 17458# INPUT *************************************************************** # 17459# none # 17460# # 17461# OUTPUT ************************************************************** # 17462# none # 17463# # 17464# ALGORITHM *********************************************************** # 17465# This routine checks which conditional predicate is specified by # 17466# the stacked fscc instruction opcode and then branches to a routine # 17467# for that predicate. The corresponding fbcc instruction is then used # 17468# to see whether the condition (specified by the stacked FPSR) is true # 17469# or false. # 17470# If a BSUN exception should be indicated, the BSUN and ABSUN # 17471# bits are set in the stacked FPSR. If the BSUN exception is enabled, # 17472# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an # 17473# enabled BSUN should not be flagged and the predicate is true, then # 17474# the result is stored to the data register file or memory # 17475# # 17476######################################################################### 17477 17478 global _fscc 17479_fscc: 17480 mov.w EXC_CMDREG(%a6),%d0 # fetch predicate 17481 17482 clr.l %d1 # clear scratch reg 17483 mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes 17484 ror.l &0x8,%d1 # rotate to top byte 17485 fmov.l %d1,%fpsr # insert into FPSR 17486 17487 mov.w (tbl_fscc.b,%pc,%d0.w*2),%d1 # load table 17488 jmp (tbl_fscc.b,%pc,%d1.w) # jump to fscc routine 17489 17490tbl_fscc: 17491 short fscc_f - tbl_fscc # 00 17492 short fscc_eq - tbl_fscc # 01 17493 short fscc_ogt - tbl_fscc # 02 17494 short fscc_oge - tbl_fscc # 03 17495 short fscc_olt - tbl_fscc # 04 17496 short fscc_ole - tbl_fscc # 05 17497 short fscc_ogl - tbl_fscc # 06 17498 short fscc_or - tbl_fscc # 07 17499 short fscc_un - tbl_fscc # 08 17500 short fscc_ueq - tbl_fscc # 09 17501 short fscc_ugt - tbl_fscc # 10 17502 short fscc_uge - tbl_fscc # 11 17503 short fscc_ult - tbl_fscc # 12 17504 short fscc_ule - tbl_fscc # 13 17505 short fscc_neq - tbl_fscc # 14 17506 short fscc_t - tbl_fscc # 15 17507 short fscc_sf - tbl_fscc # 16 17508 short fscc_seq - tbl_fscc # 17 17509 short fscc_gt - tbl_fscc # 18 17510 short fscc_ge - tbl_fscc # 19 17511 short fscc_lt - tbl_fscc # 20 17512 short fscc_le - tbl_fscc # 21 17513 short fscc_gl - tbl_fscc # 22 17514 short fscc_gle - tbl_fscc # 23 17515 short fscc_ngle - tbl_fscc # 24 17516 short fscc_ngl - tbl_fscc # 25 17517 short fscc_nle - tbl_fscc # 26 17518 short fscc_nlt - tbl_fscc # 27 17519 short fscc_nge - tbl_fscc # 28 17520 short fscc_ngt - tbl_fscc # 29 17521 short fscc_sneq - tbl_fscc # 30 17522 short fscc_st - tbl_fscc # 31 17523 17524######################################################################### 17525# # 17526# IEEE Nonaware tests # 17527# # 17528# For the IEEE nonaware tests, we set the result based on the # 17529# floating point condition codes. In addition, we check to see # 17530# if the NAN bit is set, in which case BSUN and AIOP will be set. # 17531# # 17532# The cases EQ and NE are shared by the Aware and Nonaware groups # 17533# and are incapable of setting the BSUN exception bit. # 17534# # 17535# Typically, only one of the two possible branch directions could # 17536# have the NAN bit set. # 17537# # 17538######################################################################### 17539 17540# 17541# equal: 17542# 17543# Z 17544# 17545fscc_eq: 17546 fbeq.w fscc_eq_yes # equal? 17547fscc_eq_no: 17548 clr.b %d0 # set false 17549 bra.w fscc_done # go finish 17550fscc_eq_yes: 17551 st %d0 # set true 17552 bra.w fscc_done # go finish 17553 17554# 17555# not equal: 17556# _ 17557# Z 17558# 17559fscc_neq: 17560 fbneq.w fscc_neq_yes # not equal? 17561fscc_neq_no: 17562 clr.b %d0 # set false 17563 bra.w fscc_done # go finish 17564fscc_neq_yes: 17565 st %d0 # set true 17566 bra.w fscc_done # go finish 17567 17568# 17569# greater than: 17570# _______ 17571# NANvZvN 17572# 17573fscc_gt: 17574 fbgt.w fscc_gt_yes # greater than? 17575fscc_gt_no: 17576 clr.b %d0 # set false 17577 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17578 beq.w fscc_done # no;go finish 17579 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17580 bra.w fscc_chk_bsun # go finish 17581fscc_gt_yes: 17582 st %d0 # set true 17583 bra.w fscc_done # go finish 17584 17585# 17586# not greater than: 17587# 17588# NANvZvN 17589# 17590fscc_ngt: 17591 fbngt.w fscc_ngt_yes # not greater than? 17592fscc_ngt_no: 17593 clr.b %d0 # set false 17594 bra.w fscc_done # go finish 17595fscc_ngt_yes: 17596 st %d0 # set true 17597 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17598 beq.w fscc_done # no;go finish 17599 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17600 bra.w fscc_chk_bsun # go finish 17601 17602# 17603# greater than or equal: 17604# _____ 17605# Zv(NANvN) 17606# 17607fscc_ge: 17608 fbge.w fscc_ge_yes # greater than or equal? 17609fscc_ge_no: 17610 clr.b %d0 # set false 17611 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17612 beq.w fscc_done # no;go finish 17613 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17614 bra.w fscc_chk_bsun # go finish 17615fscc_ge_yes: 17616 st %d0 # set true 17617 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17618 beq.w fscc_done # no;go finish 17619 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17620 bra.w fscc_chk_bsun # go finish 17621 17622# 17623# not (greater than or equal): 17624# _ 17625# NANv(N^Z) 17626# 17627fscc_nge: 17628 fbnge.w fscc_nge_yes # not (greater than or equal)? 17629fscc_nge_no: 17630 clr.b %d0 # set false 17631 bra.w fscc_done # go finish 17632fscc_nge_yes: 17633 st %d0 # set true 17634 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17635 beq.w fscc_done # no;go finish 17636 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17637 bra.w fscc_chk_bsun # go finish 17638 17639# 17640# less than: 17641# _____ 17642# N^(NANvZ) 17643# 17644fscc_lt: 17645 fblt.w fscc_lt_yes # less than? 17646fscc_lt_no: 17647 clr.b %d0 # set false 17648 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17649 beq.w fscc_done # no;go finish 17650 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17651 bra.w fscc_chk_bsun # go finish 17652fscc_lt_yes: 17653 st %d0 # set true 17654 bra.w fscc_done # go finish 17655 17656# 17657# not less than: 17658# _ 17659# NANv(ZvN) 17660# 17661fscc_nlt: 17662 fbnlt.w fscc_nlt_yes # not less than? 17663fscc_nlt_no: 17664 clr.b %d0 # set false 17665 bra.w fscc_done # go finish 17666fscc_nlt_yes: 17667 st %d0 # set true 17668 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17669 beq.w fscc_done # no;go finish 17670 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17671 bra.w fscc_chk_bsun # go finish 17672 17673# 17674# less than or equal: 17675# ___ 17676# Zv(N^NAN) 17677# 17678fscc_le: 17679 fble.w fscc_le_yes # less than or equal? 17680fscc_le_no: 17681 clr.b %d0 # set false 17682 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17683 beq.w fscc_done # no;go finish 17684 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17685 bra.w fscc_chk_bsun # go finish 17686fscc_le_yes: 17687 st %d0 # set true 17688 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17689 beq.w fscc_done # no;go finish 17690 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17691 bra.w fscc_chk_bsun # go finish 17692 17693# 17694# not (less than or equal): 17695# ___ 17696# NANv(NvZ) 17697# 17698fscc_nle: 17699 fbnle.w fscc_nle_yes # not (less than or equal)? 17700fscc_nle_no: 17701 clr.b %d0 # set false 17702 bra.w fscc_done # go finish 17703fscc_nle_yes: 17704 st %d0 # set true 17705 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17706 beq.w fscc_done # no;go finish 17707 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17708 bra.w fscc_chk_bsun # go finish 17709 17710# 17711# greater or less than: 17712# _____ 17713# NANvZ 17714# 17715fscc_gl: 17716 fbgl.w fscc_gl_yes # greater or less than? 17717fscc_gl_no: 17718 clr.b %d0 # set false 17719 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17720 beq.w fscc_done # no;go finish 17721 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17722 bra.w fscc_chk_bsun # go finish 17723fscc_gl_yes: 17724 st %d0 # set true 17725 bra.w fscc_done # go finish 17726 17727# 17728# not (greater or less than): 17729# 17730# NANvZ 17731# 17732fscc_ngl: 17733 fbngl.w fscc_ngl_yes # not (greater or less than)? 17734fscc_ngl_no: 17735 clr.b %d0 # set false 17736 bra.w fscc_done # go finish 17737fscc_ngl_yes: 17738 st %d0 # set true 17739 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc? 17740 beq.w fscc_done # no;go finish 17741 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17742 bra.w fscc_chk_bsun # go finish 17743 17744# 17745# greater, less, or equal: 17746# ___ 17747# NAN 17748# 17749fscc_gle: 17750 fbgle.w fscc_gle_yes # greater, less, or equal? 17751fscc_gle_no: 17752 clr.b %d0 # set false 17753 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17754 bra.w fscc_chk_bsun # go finish 17755fscc_gle_yes: 17756 st %d0 # set true 17757 bra.w fscc_done # go finish 17758 17759# 17760# not (greater, less, or equal): 17761# 17762# NAN 17763# 17764fscc_ngle: 17765 fbngle.w fscc_ngle_yes # not (greater, less, or equal)? 17766fscc_ngle_no: 17767 clr.b %d0 # set false 17768 bra.w fscc_done # go finish 17769fscc_ngle_yes: 17770 st %d0 # set true 17771 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17772 bra.w fscc_chk_bsun # go finish 17773 17774######################################################################### 17775# # 17776# Miscellaneous tests # 17777# # 17778# For the IEEE aware tests, we only have to set the result based on the # 17779# floating point condition codes. The BSUN exception will not be # 17780# set for any of these tests. # 17781# # 17782######################################################################### 17783 17784# 17785# false: 17786# 17787# False 17788# 17789fscc_f: 17790 clr.b %d0 # set false 17791 bra.w fscc_done # go finish 17792 17793# 17794# true: 17795# 17796# True 17797# 17798fscc_t: 17799 st %d0 # set true 17800 bra.w fscc_done # go finish 17801 17802# 17803# signalling false: 17804# 17805# False 17806# 17807fscc_sf: 17808 clr.b %d0 # set false 17809 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17810 beq.w fscc_done # no;go finish 17811 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17812 bra.w fscc_chk_bsun # go finish 17813 17814# 17815# signalling true: 17816# 17817# True 17818# 17819fscc_st: 17820 st %d0 # set false 17821 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17822 beq.w fscc_done # no;go finish 17823 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17824 bra.w fscc_chk_bsun # go finish 17825 17826# 17827# signalling equal: 17828# 17829# Z 17830# 17831fscc_seq: 17832 fbseq.w fscc_seq_yes # signalling equal? 17833fscc_seq_no: 17834 clr.b %d0 # set false 17835 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17836 beq.w fscc_done # no;go finish 17837 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17838 bra.w fscc_chk_bsun # go finish 17839fscc_seq_yes: 17840 st %d0 # set true 17841 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17842 beq.w fscc_done # no;go finish 17843 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17844 bra.w fscc_chk_bsun # go finish 17845 17846# 17847# signalling not equal: 17848# _ 17849# Z 17850# 17851fscc_sneq: 17852 fbsneq.w fscc_sneq_yes # signalling equal? 17853fscc_sneq_no: 17854 clr.b %d0 # set false 17855 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17856 beq.w fscc_done # no;go finish 17857 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17858 bra.w fscc_chk_bsun # go finish 17859fscc_sneq_yes: 17860 st %d0 # set true 17861 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit 17862 beq.w fscc_done # no;go finish 17863 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit 17864 bra.w fscc_chk_bsun # go finish 17865 17866######################################################################### 17867# # 17868# IEEE Aware tests # 17869# # 17870# For the IEEE aware tests, we only have to set the result based on the # 17871# floating point condition codes. The BSUN exception will not be # 17872# set for any of these tests. # 17873# # 17874######################################################################### 17875 17876# 17877# ordered greater than: 17878# _______ 17879# NANvZvN 17880# 17881fscc_ogt: 17882 fbogt.w fscc_ogt_yes # ordered greater than? 17883fscc_ogt_no: 17884 clr.b %d0 # set false 17885 bra.w fscc_done # go finish 17886fscc_ogt_yes: 17887 st %d0 # set true 17888 bra.w fscc_done # go finish 17889 17890# 17891# unordered or less or equal: 17892# _______ 17893# NANvZvN 17894# 17895fscc_ule: 17896 fbule.w fscc_ule_yes # unordered or less or equal? 17897fscc_ule_no: 17898 clr.b %d0 # set false 17899 bra.w fscc_done # go finish 17900fscc_ule_yes: 17901 st %d0 # set true 17902 bra.w fscc_done # go finish 17903 17904# 17905# ordered greater than or equal: 17906# _____ 17907# Zv(NANvN) 17908# 17909fscc_oge: 17910 fboge.w fscc_oge_yes # ordered greater than or equal? 17911fscc_oge_no: 17912 clr.b %d0 # set false 17913 bra.w fscc_done # go finish 17914fscc_oge_yes: 17915 st %d0 # set true 17916 bra.w fscc_done # go finish 17917 17918# 17919# unordered or less than: 17920# _ 17921# NANv(N^Z) 17922# 17923fscc_ult: 17924 fbult.w fscc_ult_yes # unordered or less than? 17925fscc_ult_no: 17926 clr.b %d0 # set false 17927 bra.w fscc_done # go finish 17928fscc_ult_yes: 17929 st %d0 # set true 17930 bra.w fscc_done # go finish 17931 17932# 17933# ordered less than: 17934# _____ 17935# N^(NANvZ) 17936# 17937fscc_olt: 17938 fbolt.w fscc_olt_yes # ordered less than? 17939fscc_olt_no: 17940 clr.b %d0 # set false 17941 bra.w fscc_done # go finish 17942fscc_olt_yes: 17943 st %d0 # set true 17944 bra.w fscc_done # go finish 17945 17946# 17947# unordered or greater or equal: 17948# 17949# NANvZvN 17950# 17951fscc_uge: 17952 fbuge.w fscc_uge_yes # unordered or greater than? 17953fscc_uge_no: 17954 clr.b %d0 # set false 17955 bra.w fscc_done # go finish 17956fscc_uge_yes: 17957 st %d0 # set true 17958 bra.w fscc_done # go finish 17959 17960# 17961# ordered less than or equal: 17962# ___ 17963# Zv(N^NAN) 17964# 17965fscc_ole: 17966 fbole.w fscc_ole_yes # ordered greater or less than? 17967fscc_ole_no: 17968 clr.b %d0 # set false 17969 bra.w fscc_done # go finish 17970fscc_ole_yes: 17971 st %d0 # set true 17972 bra.w fscc_done # go finish 17973 17974# 17975# unordered or greater than: 17976# ___ 17977# NANv(NvZ) 17978# 17979fscc_ugt: 17980 fbugt.w fscc_ugt_yes # unordered or greater than? 17981fscc_ugt_no: 17982 clr.b %d0 # set false 17983 bra.w fscc_done # go finish 17984fscc_ugt_yes: 17985 st %d0 # set true 17986 bra.w fscc_done # go finish 17987 17988# 17989# ordered greater or less than: 17990# _____ 17991# NANvZ 17992# 17993fscc_ogl: 17994 fbogl.w fscc_ogl_yes # ordered greater or less than? 17995fscc_ogl_no: 17996 clr.b %d0 # set false 17997 bra.w fscc_done # go finish 17998fscc_ogl_yes: 17999 st %d0 # set true 18000 bra.w fscc_done # go finish 18001 18002# 18003# unordered or equal: 18004# 18005# NANvZ 18006# 18007fscc_ueq: 18008 fbueq.w fscc_ueq_yes # unordered or equal? 18009fscc_ueq_no: 18010 clr.b %d0 # set false 18011 bra.w fscc_done # go finish 18012fscc_ueq_yes: 18013 st %d0 # set true 18014 bra.w fscc_done # go finish 18015 18016# 18017# ordered: 18018# ___ 18019# NAN 18020# 18021fscc_or: 18022 fbor.w fscc_or_yes # ordered? 18023fscc_or_no: 18024 clr.b %d0 # set false 18025 bra.w fscc_done # go finish 18026fscc_or_yes: 18027 st %d0 # set true 18028 bra.w fscc_done # go finish 18029 18030# 18031# unordered: 18032# 18033# NAN 18034# 18035fscc_un: 18036 fbun.w fscc_un_yes # unordered? 18037fscc_un_no: 18038 clr.b %d0 # set false 18039 bra.w fscc_done # go finish 18040fscc_un_yes: 18041 st %d0 # set true 18042 bra.w fscc_done # go finish 18043 18044####################################################################### 18045 18046# 18047# the bsun exception bit was set. now, check to see is BSUN 18048# is enabled. if so, don't store result and correct stack frame 18049# for a bsun exception. 18050# 18051fscc_chk_bsun: 18052 btst &bsun_bit,FPCR_ENABLE(%a6) # was BSUN set? 18053 bne.w fscc_bsun 18054 18055# 18056# the bsun exception bit was not set. 18057# the result has been selected. 18058# now, check to see if the result is to be stored in the data register 18059# file or in memory. 18060# 18061fscc_done: 18062 mov.l %d0,%a0 # save result for a moment 18063 18064 mov.b 1+EXC_OPWORD(%a6),%d1 # fetch lo opword 18065 mov.l %d1,%d0 # make a copy 18066 andi.b &0x38,%d1 # extract src mode 18067 18068 bne.b fscc_mem_op # it's a memory operation 18069 18070 mov.l %d0,%d1 18071 andi.w &0x7,%d1 # pass index in d1 18072 mov.l %a0,%d0 # pass result in d0 18073 bsr.l store_dreg_b # save result in regfile 18074 rts 18075 18076# 18077# the stacked <ea> is correct with the exception of: 18078# -> Dn : <ea> is garbage 18079# 18080# if the addressing mode is post-increment or pre-decrement, 18081# then the address registers have not been updated. 18082# 18083fscc_mem_op: 18084 cmpi.b %d1,&0x18 # is <ea> (An)+ ? 18085 beq.b fscc_mem_inc # yes 18086 cmpi.b %d1,&0x20 # is <ea> -(An) ? 18087 beq.b fscc_mem_dec # yes 18088 18089 mov.l %a0,%d0 # pass result in d0 18090 mov.l EXC_EA(%a6),%a0 # fetch <ea> 18091 bsr.l _dmem_write_byte # write result byte 18092 18093 tst.l %d1 # did dstore fail? 18094 bne.w fscc_err # yes 18095 18096 rts 18097 18098# addressing mode is post-increment. write the result byte. if the write 18099# fails then don't update the address register. if write passes then 18100# call inc_areg() to update the address register. 18101fscc_mem_inc: 18102 mov.l %a0,%d0 # pass result in d0 18103 mov.l EXC_EA(%a6),%a0 # fetch <ea> 18104 bsr.l _dmem_write_byte # write result byte 18105 18106 tst.l %d1 # did dstore fail? 18107 bne.w fscc_err # yes 18108 18109 mov.b 0x1+EXC_OPWORD(%a6),%d1 # fetch opword 18110 andi.w &0x7,%d1 # pass index in d1 18111 movq.l &0x1,%d0 # pass amt to inc by 18112 bsr.l inc_areg # increment address register 18113 18114 rts 18115 18116# addressing mode is pre-decrement. write the result byte. if the write 18117# fails then don't update the address register. if the write passes then 18118# call dec_areg() to update the address register. 18119fscc_mem_dec: 18120 mov.l %a0,%d0 # pass result in d0 18121 mov.l EXC_EA(%a6),%a0 # fetch <ea> 18122 bsr.l _dmem_write_byte # write result byte 18123 18124 tst.l %d1 # did dstore fail? 18125 bne.w fscc_err # yes 18126 18127 mov.b 0x1+EXC_OPWORD(%a6),%d1 # fetch opword 18128 andi.w &0x7,%d1 # pass index in d1 18129 movq.l &0x1,%d0 # pass amt to dec by 18130 bsr.l dec_areg # decrement address register 18131 18132 rts 18133 18134# the emulation routine set bsun and BSUN was enabled. have to 18135# fix stack and jump to the bsun handler. 18136# let the caller of this routine shift the stack frame up to 18137# eliminate the effective address field. 18138fscc_bsun: 18139 mov.b &fbsun_flg,SPCOND_FLG(%a6) 18140 rts 18141 18142# the byte write to memory has failed. pass the failing effective address 18143# and a FSLW to funimp_dacc(). 18144fscc_err: 18145 mov.w &0x00a1,EXC_VOFF(%a6) 18146 bra.l facc_finish 18147 18148######################################################################### 18149# XDEF **************************************************************** # 18150# fmovm_dynamic(): emulate "fmovm" dynamic instruction # 18151# # 18152# XREF **************************************************************** # 18153# fetch_dreg() - fetch data register # 18154# {i,d,}mem_read() - fetch data from memory # 18155# _mem_write() - write data to memory # 18156# iea_iacc() - instruction memory access error occurred # 18157# iea_dacc() - data memory access error occurred # 18158# restore() - restore An index regs if access error occurred # 18159# # 18160# INPUT *************************************************************** # 18161# None # 18162# # 18163# OUTPUT ************************************************************** # 18164# If instr is "fmovm Dn,-(A7)" from supervisor mode, # 18165# d0 = size of dump # 18166# d1 = Dn # 18167# Else if instruction access error, # 18168# d0 = FSLW # 18169# Else if data access error, # 18170# d0 = FSLW # 18171# a0 = address of fault # 18172# Else # 18173# none. # 18174# # 18175# ALGORITHM *********************************************************** # 18176# The effective address must be calculated since this is entered # 18177# from an "Unimplemented Effective Address" exception handler. So, we # 18178# have our own fcalc_ea() routine here. If an access error is flagged # 18179# by a _{i,d,}mem_read() call, we must exit through the special # 18180# handler. # 18181# The data register is determined and its value loaded to get the # 18182# string of FP registers affected. This value is used as an index into # 18183# a lookup table such that we can determine the number of bytes # 18184# involved. # 18185# If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used # 18186# to read in all FP values. Again, _mem_read() may fail and require a # 18187# special exit. # 18188# If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used # 18189# to write all FP values. _mem_write() may also fail. # 18190# If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, # 18191# then we return the size of the dump and the string to the caller # 18192# so that the move can occur outside of this routine. This special # 18193# case is required so that moves to the system stack are handled # 18194# correctly. # 18195# # 18196# DYNAMIC: # 18197# fmovm.x dn, <ea> # 18198# fmovm.x <ea>, dn # 18199# # 18200# <WORD 1> <WORD2> # 18201# 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 # 18202# # 18203# & = (0): predecrement addressing mode # 18204# (1): postincrement or control addressing mode # 18205# @ = (0): move listed regs from memory to the FPU # 18206# (1): move listed regs from the FPU to memory # 18207# $$$ : index of data register holding reg select mask # 18208# # 18209# NOTES: # 18210# If the data register holds a zero, then the # 18211# instruction is a nop. # 18212# # 18213######################################################################### 18214 18215 global fmovm_dynamic 18216fmovm_dynamic: 18217 18218# extract the data register in which the bit string resides... 18219 mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword 18220 andi.w &0x70,%d1 # extract reg bits 18221 lsr.b &0x4,%d1 # shift into lo bits 18222 18223# fetch the bit string into d0... 18224 bsr.l fetch_dreg # fetch reg string 18225 18226 andi.l &0x000000ff,%d0 # keep only lo byte 18227 18228 mov.l %d0,-(%sp) # save strg 18229 mov.b (tbl_fmovm_size.w,%pc,%d0),%d0 18230 mov.l %d0,-(%sp) # save size 18231 bsr.l fmovm_calc_ea # calculate <ea> 18232 mov.l (%sp)+,%d0 # restore size 18233 mov.l (%sp)+,%d1 # restore strg 18234 18235# if the bit string is a zero, then the operation is a no-op 18236# but, make sure that we've calculated ea and advanced the opword pointer 18237 beq.w fmovm_data_done 18238 18239# separate move ins from move outs... 18240 btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out? 18241 beq.w fmovm_data_in # it's a move out 18242 18243############# 18244# MOVE OUT: # 18245############# 18246fmovm_data_out: 18247 btst &0x4,EXC_EXTWORD(%a6) # control or predecrement? 18248 bne.w fmovm_out_ctrl # control 18249 18250############################ 18251fmovm_out_predec: 18252# for predecrement mode, the bit string is the opposite of both control 18253# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0) 18254# here, we convert it to be just like the others... 18255 mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1 18256 18257 btst &0x5,EXC_SR(%a6) # user or supervisor mode? 18258 beq.b fmovm_out_ctrl # user 18259 18260fmovm_out_predec_s: 18261 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)? 18262 bne.b fmovm_out_ctrl 18263 18264# the operation was unfortunately an: fmovm.x dn,-(sp) 18265# called from supervisor mode. 18266# we're also passing "size" and "strg" back to the calling routine 18267 rts 18268 18269############################ 18270fmovm_out_ctrl: 18271 mov.l %a0,%a1 # move <ea> to a1 18272 18273 sub.l %d0,%sp # subtract size of dump 18274 lea (%sp),%a0 18275 18276 tst.b %d1 # should FP0 be moved? 18277 bpl.b fmovm_out_ctrl_fp1 # no 18278 18279 mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes 18280 mov.l 0x4+EXC_FP0(%a6),(%a0)+ 18281 mov.l 0x8+EXC_FP0(%a6),(%a0)+ 18282 18283fmovm_out_ctrl_fp1: 18284 lsl.b &0x1,%d1 # should FP1 be moved? 18285 bpl.b fmovm_out_ctrl_fp2 # no 18286 18287 mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes 18288 mov.l 0x4+EXC_FP1(%a6),(%a0)+ 18289 mov.l 0x8+EXC_FP1(%a6),(%a0)+ 18290 18291fmovm_out_ctrl_fp2: 18292 lsl.b &0x1,%d1 # should FP2 be moved? 18293 bpl.b fmovm_out_ctrl_fp3 # no 18294 18295 fmovm.x &0x20,(%a0) # yes 18296 add.l &0xc,%a0 18297 18298fmovm_out_ctrl_fp3: 18299 lsl.b &0x1,%d1 # should FP3 be moved? 18300 bpl.b fmovm_out_ctrl_fp4 # no 18301 18302 fmovm.x &0x10,(%a0) # yes 18303 add.l &0xc,%a0 18304 18305fmovm_out_ctrl_fp4: 18306 lsl.b &0x1,%d1 # should FP4 be moved? 18307 bpl.b fmovm_out_ctrl_fp5 # no 18308 18309 fmovm.x &0x08,(%a0) # yes 18310 add.l &0xc,%a0 18311 18312fmovm_out_ctrl_fp5: 18313 lsl.b &0x1,%d1 # should FP5 be moved? 18314 bpl.b fmovm_out_ctrl_fp6 # no 18315 18316 fmovm.x &0x04,(%a0) # yes 18317 add.l &0xc,%a0 18318 18319fmovm_out_ctrl_fp6: 18320 lsl.b &0x1,%d1 # should FP6 be moved? 18321 bpl.b fmovm_out_ctrl_fp7 # no 18322 18323 fmovm.x &0x02,(%a0) # yes 18324 add.l &0xc,%a0 18325 18326fmovm_out_ctrl_fp7: 18327 lsl.b &0x1,%d1 # should FP7 be moved? 18328 bpl.b fmovm_out_ctrl_done # no 18329 18330 fmovm.x &0x01,(%a0) # yes 18331 add.l &0xc,%a0 18332 18333fmovm_out_ctrl_done: 18334 mov.l %a1,L_SCR1(%a6) 18335 18336 lea (%sp),%a0 # pass: supervisor src 18337 mov.l %d0,-(%sp) # save size 18338 bsr.l _dmem_write # copy data to user mem 18339 18340 mov.l (%sp)+,%d0 18341 add.l %d0,%sp # clear fpreg data from stack 18342 18343 tst.l %d1 # did dstore err? 18344 bne.w fmovm_out_err # yes 18345 18346 rts 18347 18348############ 18349# MOVE IN: # 18350############ 18351fmovm_data_in: 18352 mov.l %a0,L_SCR1(%a6) 18353 18354 sub.l %d0,%sp # make room for fpregs 18355 lea (%sp),%a1 18356 18357 mov.l %d1,-(%sp) # save bit string for later 18358 mov.l %d0,-(%sp) # save # of bytes 18359 18360 bsr.l _dmem_read # copy data from user mem 18361 18362 mov.l (%sp)+,%d0 # retrieve # of bytes 18363 18364 tst.l %d1 # did dfetch fail? 18365 bne.w fmovm_in_err # yes 18366 18367 mov.l (%sp)+,%d1 # load bit string 18368 18369 lea (%sp),%a0 # addr of stack 18370 18371 tst.b %d1 # should FP0 be moved? 18372 bpl.b fmovm_data_in_fp1 # no 18373 18374 mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes 18375 mov.l (%a0)+,0x4+EXC_FP0(%a6) 18376 mov.l (%a0)+,0x8+EXC_FP0(%a6) 18377 18378fmovm_data_in_fp1: 18379 lsl.b &0x1,%d1 # should FP1 be moved? 18380 bpl.b fmovm_data_in_fp2 # no 18381 18382 mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes 18383 mov.l (%a0)+,0x4+EXC_FP1(%a6) 18384 mov.l (%a0)+,0x8+EXC_FP1(%a6) 18385 18386fmovm_data_in_fp2: 18387 lsl.b &0x1,%d1 # should FP2 be moved? 18388 bpl.b fmovm_data_in_fp3 # no 18389 18390 fmovm.x (%a0)+,&0x20 # yes 18391 18392fmovm_data_in_fp3: 18393 lsl.b &0x1,%d1 # should FP3 be moved? 18394 bpl.b fmovm_data_in_fp4 # no 18395 18396 fmovm.x (%a0)+,&0x10 # yes 18397 18398fmovm_data_in_fp4: 18399 lsl.b &0x1,%d1 # should FP4 be moved? 18400 bpl.b fmovm_data_in_fp5 # no 18401 18402 fmovm.x (%a0)+,&0x08 # yes 18403 18404fmovm_data_in_fp5: 18405 lsl.b &0x1,%d1 # should FP5 be moved? 18406 bpl.b fmovm_data_in_fp6 # no 18407 18408 fmovm.x (%a0)+,&0x04 # yes 18409 18410fmovm_data_in_fp6: 18411 lsl.b &0x1,%d1 # should FP6 be moved? 18412 bpl.b fmovm_data_in_fp7 # no 18413 18414 fmovm.x (%a0)+,&0x02 # yes 18415 18416fmovm_data_in_fp7: 18417 lsl.b &0x1,%d1 # should FP7 be moved? 18418 bpl.b fmovm_data_in_done # no 18419 18420 fmovm.x (%a0)+,&0x01 # yes 18421 18422fmovm_data_in_done: 18423 add.l %d0,%sp # remove fpregs from stack 18424 rts 18425 18426##################################### 18427 18428fmovm_data_done: 18429 rts 18430 18431############################################################################## 18432 18433# 18434# table indexed by the operation's bit string that gives the number 18435# of bytes that will be moved. 18436# 18437# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg) 18438# 18439tbl_fmovm_size: 18440 byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24 18441 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 18442 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 18443 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18444 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 18445 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18446 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18447 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18448 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 18449 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18450 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18451 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18452 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18453 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18454 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18455 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 18456 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 18457 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18458 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18459 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18460 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18461 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18462 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18463 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 18464 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 18465 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18466 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18467 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 18468 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 18469 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 18470 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 18471 byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60 18472 18473# 18474# table to convert a pre-decrement bit string into a post-increment 18475# or control bit string. 18476# ex: 0x00 ==> 0x00 18477# 0x01 ==> 0x80 18478# 0x02 ==> 0x40 18479# . 18480# . 18481# 0xfd ==> 0xbf 18482# 0xfe ==> 0x7f 18483# 0xff ==> 0xff 18484# 18485tbl_fmovm_convert: 18486 byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0 18487 byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0 18488 byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8 18489 byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8 18490 byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4 18491 byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4 18492 byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec 18493 byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc 18494 byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2 18495 byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2 18496 byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea 18497 byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa 18498 byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6 18499 byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6 18500 byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee 18501 byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe 18502 byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1 18503 byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1 18504 byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9 18505 byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9 18506 byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5 18507 byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5 18508 byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed 18509 byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd 18510 byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3 18511 byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3 18512 byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb 18513 byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb 18514 byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7 18515 byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7 18516 byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef 18517 byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff 18518 18519 global fmovm_calc_ea 18520############################################### 18521# _fmovm_calc_ea: calculate effective address # 18522############################################### 18523fmovm_calc_ea: 18524 mov.l %d0,%a0 # move # bytes to a0 18525 18526# currently, MODE and REG are taken from the EXC_OPWORD. this could be 18527# easily changed if they were inputs passed in registers. 18528 mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word 18529 mov.w %d0,%d1 # make a copy 18530 18531 andi.w &0x3f,%d0 # extract mode field 18532 andi.l &0x7,%d1 # extract reg field 18533 18534# jump to the corresponding function for each {MODE,REG} pair. 18535 mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance 18536 jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode 18537 18538 swbeg &64 18539tbl_fea_mode: 18540 short tbl_fea_mode - tbl_fea_mode 18541 short tbl_fea_mode - tbl_fea_mode 18542 short tbl_fea_mode - tbl_fea_mode 18543 short tbl_fea_mode - tbl_fea_mode 18544 short tbl_fea_mode - tbl_fea_mode 18545 short tbl_fea_mode - tbl_fea_mode 18546 short tbl_fea_mode - tbl_fea_mode 18547 short tbl_fea_mode - tbl_fea_mode 18548 18549 short tbl_fea_mode - tbl_fea_mode 18550 short tbl_fea_mode - tbl_fea_mode 18551 short tbl_fea_mode - tbl_fea_mode 18552 short tbl_fea_mode - tbl_fea_mode 18553 short tbl_fea_mode - tbl_fea_mode 18554 short tbl_fea_mode - tbl_fea_mode 18555 short tbl_fea_mode - tbl_fea_mode 18556 short tbl_fea_mode - tbl_fea_mode 18557 18558 short faddr_ind_a0 - tbl_fea_mode 18559 short faddr_ind_a1 - tbl_fea_mode 18560 short faddr_ind_a2 - tbl_fea_mode 18561 short faddr_ind_a3 - tbl_fea_mode 18562 short faddr_ind_a4 - tbl_fea_mode 18563 short faddr_ind_a5 - tbl_fea_mode 18564 short faddr_ind_a6 - tbl_fea_mode 18565 short faddr_ind_a7 - tbl_fea_mode 18566 18567 short faddr_ind_p_a0 - tbl_fea_mode 18568 short faddr_ind_p_a1 - tbl_fea_mode 18569 short faddr_ind_p_a2 - tbl_fea_mode 18570 short faddr_ind_p_a3 - tbl_fea_mode 18571 short faddr_ind_p_a4 - tbl_fea_mode 18572 short faddr_ind_p_a5 - tbl_fea_mode 18573 short faddr_ind_p_a6 - tbl_fea_mode 18574 short faddr_ind_p_a7 - tbl_fea_mode 18575 18576 short faddr_ind_m_a0 - tbl_fea_mode 18577 short faddr_ind_m_a1 - tbl_fea_mode 18578 short faddr_ind_m_a2 - tbl_fea_mode 18579 short faddr_ind_m_a3 - tbl_fea_mode 18580 short faddr_ind_m_a4 - tbl_fea_mode 18581 short faddr_ind_m_a5 - tbl_fea_mode 18582 short faddr_ind_m_a6 - tbl_fea_mode 18583 short faddr_ind_m_a7 - tbl_fea_mode 18584 18585 short faddr_ind_disp_a0 - tbl_fea_mode 18586 short faddr_ind_disp_a1 - tbl_fea_mode 18587 short faddr_ind_disp_a2 - tbl_fea_mode 18588 short faddr_ind_disp_a3 - tbl_fea_mode 18589 short faddr_ind_disp_a4 - tbl_fea_mode 18590 short faddr_ind_disp_a5 - tbl_fea_mode 18591 short faddr_ind_disp_a6 - tbl_fea_mode 18592 short faddr_ind_disp_a7 - tbl_fea_mode 18593 18594 short faddr_ind_ext - tbl_fea_mode 18595 short faddr_ind_ext - tbl_fea_mode 18596 short faddr_ind_ext - tbl_fea_mode 18597 short faddr_ind_ext - tbl_fea_mode 18598 short faddr_ind_ext - tbl_fea_mode 18599 short faddr_ind_ext - tbl_fea_mode 18600 short faddr_ind_ext - tbl_fea_mode 18601 short faddr_ind_ext - tbl_fea_mode 18602 18603 short fabs_short - tbl_fea_mode 18604 short fabs_long - tbl_fea_mode 18605 short fpc_ind - tbl_fea_mode 18606 short fpc_ind_ext - tbl_fea_mode 18607 short tbl_fea_mode - tbl_fea_mode 18608 short tbl_fea_mode - tbl_fea_mode 18609 short tbl_fea_mode - tbl_fea_mode 18610 short tbl_fea_mode - tbl_fea_mode 18611 18612################################### 18613# Address register indirect: (An) # 18614################################### 18615faddr_ind_a0: 18616 mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0 18617 rts 18618 18619faddr_ind_a1: 18620 mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1 18621 rts 18622 18623faddr_ind_a2: 18624 mov.l %a2,%a0 # Get current a2 18625 rts 18626 18627faddr_ind_a3: 18628 mov.l %a3,%a0 # Get current a3 18629 rts 18630 18631faddr_ind_a4: 18632 mov.l %a4,%a0 # Get current a4 18633 rts 18634 18635faddr_ind_a5: 18636 mov.l %a5,%a0 # Get current a5 18637 rts 18638 18639faddr_ind_a6: 18640 mov.l (%a6),%a0 # Get current a6 18641 rts 18642 18643faddr_ind_a7: 18644 mov.l EXC_A7(%a6),%a0 # Get current a7 18645 rts 18646 18647##################################################### 18648# Address register indirect w/ postincrement: (An)+ # 18649##################################################### 18650faddr_ind_p_a0: 18651 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0 18652 mov.l %d0,%d1 18653 add.l %a0,%d1 # Increment 18654 mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value 18655 mov.l %d0,%a0 18656 rts 18657 18658faddr_ind_p_a1: 18659 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1 18660 mov.l %d0,%d1 18661 add.l %a0,%d1 # Increment 18662 mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value 18663 mov.l %d0,%a0 18664 rts 18665 18666faddr_ind_p_a2: 18667 mov.l %a2,%d0 # Get current a2 18668 mov.l %d0,%d1 18669 add.l %a0,%d1 # Increment 18670 mov.l %d1,%a2 # Save incr value 18671 mov.l %d0,%a0 18672 rts 18673 18674faddr_ind_p_a3: 18675 mov.l %a3,%d0 # Get current a3 18676 mov.l %d0,%d1 18677 add.l %a0,%d1 # Increment 18678 mov.l %d1,%a3 # Save incr value 18679 mov.l %d0,%a0 18680 rts 18681 18682faddr_ind_p_a4: 18683 mov.l %a4,%d0 # Get current a4 18684 mov.l %d0,%d1 18685 add.l %a0,%d1 # Increment 18686 mov.l %d1,%a4 # Save incr value 18687 mov.l %d0,%a0 18688 rts 18689 18690faddr_ind_p_a5: 18691 mov.l %a5,%d0 # Get current a5 18692 mov.l %d0,%d1 18693 add.l %a0,%d1 # Increment 18694 mov.l %d1,%a5 # Save incr value 18695 mov.l %d0,%a0 18696 rts 18697 18698faddr_ind_p_a6: 18699 mov.l (%a6),%d0 # Get current a6 18700 mov.l %d0,%d1 18701 add.l %a0,%d1 # Increment 18702 mov.l %d1,(%a6) # Save incr value 18703 mov.l %d0,%a0 18704 rts 18705 18706faddr_ind_p_a7: 18707 mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag 18708 18709 mov.l EXC_A7(%a6),%d0 # Get current a7 18710 mov.l %d0,%d1 18711 add.l %a0,%d1 # Increment 18712 mov.l %d1,EXC_A7(%a6) # Save incr value 18713 mov.l %d0,%a0 18714 rts 18715 18716#################################################### 18717# Address register indirect w/ predecrement: -(An) # 18718#################################################### 18719faddr_ind_m_a0: 18720 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0 18721 sub.l %a0,%d0 # Decrement 18722 mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value 18723 mov.l %d0,%a0 18724 rts 18725 18726faddr_ind_m_a1: 18727 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1 18728 sub.l %a0,%d0 # Decrement 18729 mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value 18730 mov.l %d0,%a0 18731 rts 18732 18733faddr_ind_m_a2: 18734 mov.l %a2,%d0 # Get current a2 18735 sub.l %a0,%d0 # Decrement 18736 mov.l %d0,%a2 # Save decr value 18737 mov.l %d0,%a0 18738 rts 18739 18740faddr_ind_m_a3: 18741 mov.l %a3,%d0 # Get current a3 18742 sub.l %a0,%d0 # Decrement 18743 mov.l %d0,%a3 # Save decr value 18744 mov.l %d0,%a0 18745 rts 18746 18747faddr_ind_m_a4: 18748 mov.l %a4,%d0 # Get current a4 18749 sub.l %a0,%d0 # Decrement 18750 mov.l %d0,%a4 # Save decr value 18751 mov.l %d0,%a0 18752 rts 18753 18754faddr_ind_m_a5: 18755 mov.l %a5,%d0 # Get current a5 18756 sub.l %a0,%d0 # Decrement 18757 mov.l %d0,%a5 # Save decr value 18758 mov.l %d0,%a0 18759 rts 18760 18761faddr_ind_m_a6: 18762 mov.l (%a6),%d0 # Get current a6 18763 sub.l %a0,%d0 # Decrement 18764 mov.l %d0,(%a6) # Save decr value 18765 mov.l %d0,%a0 18766 rts 18767 18768faddr_ind_m_a7: 18769 mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag 18770 18771 mov.l EXC_A7(%a6),%d0 # Get current a7 18772 sub.l %a0,%d0 # Decrement 18773 mov.l %d0,EXC_A7(%a6) # Save decr value 18774 mov.l %d0,%a0 18775 rts 18776 18777######################################################## 18778# Address register indirect w/ displacement: (d16, An) # 18779######################################################## 18780faddr_ind_disp_a0: 18781 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18782 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18783 bsr.l _imem_read_word 18784 18785 tst.l %d1 # did ifetch fail? 18786 bne.l iea_iacc # yes 18787 18788 mov.w %d0,%a0 # sign extend displacement 18789 18790 add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16 18791 rts 18792 18793faddr_ind_disp_a1: 18794 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18795 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18796 bsr.l _imem_read_word 18797 18798 tst.l %d1 # did ifetch fail? 18799 bne.l iea_iacc # yes 18800 18801 mov.w %d0,%a0 # sign extend displacement 18802 18803 add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16 18804 rts 18805 18806faddr_ind_disp_a2: 18807 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18808 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18809 bsr.l _imem_read_word 18810 18811 tst.l %d1 # did ifetch fail? 18812 bne.l iea_iacc # yes 18813 18814 mov.w %d0,%a0 # sign extend displacement 18815 18816 add.l %a2,%a0 # a2 + d16 18817 rts 18818 18819faddr_ind_disp_a3: 18820 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18821 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18822 bsr.l _imem_read_word 18823 18824 tst.l %d1 # did ifetch fail? 18825 bne.l iea_iacc # yes 18826 18827 mov.w %d0,%a0 # sign extend displacement 18828 18829 add.l %a3,%a0 # a3 + d16 18830 rts 18831 18832faddr_ind_disp_a4: 18833 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18834 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18835 bsr.l _imem_read_word 18836 18837 tst.l %d1 # did ifetch fail? 18838 bne.l iea_iacc # yes 18839 18840 mov.w %d0,%a0 # sign extend displacement 18841 18842 add.l %a4,%a0 # a4 + d16 18843 rts 18844 18845faddr_ind_disp_a5: 18846 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18847 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18848 bsr.l _imem_read_word 18849 18850 tst.l %d1 # did ifetch fail? 18851 bne.l iea_iacc # yes 18852 18853 mov.w %d0,%a0 # sign extend displacement 18854 18855 add.l %a5,%a0 # a5 + d16 18856 rts 18857 18858faddr_ind_disp_a6: 18859 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18860 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18861 bsr.l _imem_read_word 18862 18863 tst.l %d1 # did ifetch fail? 18864 bne.l iea_iacc # yes 18865 18866 mov.w %d0,%a0 # sign extend displacement 18867 18868 add.l (%a6),%a0 # a6 + d16 18869 rts 18870 18871faddr_ind_disp_a7: 18872 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18873 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18874 bsr.l _imem_read_word 18875 18876 tst.l %d1 # did ifetch fail? 18877 bne.l iea_iacc # yes 18878 18879 mov.w %d0,%a0 # sign extend displacement 18880 18881 add.l EXC_A7(%a6),%a0 # a7 + d16 18882 rts 18883 18884######################################################################## 18885# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) # 18886# " " " w/ " (base displacement): (bd, An, Xn) # 18887# Memory indirect postindexed: ([bd, An], Xn, od) # 18888# Memory indirect preindexed: ([bd, An, Xn], od) # 18889######################################################################## 18890faddr_ind_ext: 18891 addq.l &0x8,%d1 18892 bsr.l fetch_dreg # fetch base areg 18893 mov.l %d0,-(%sp) 18894 18895 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18896 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18897 bsr.l _imem_read_word # fetch extword in d0 18898 18899 tst.l %d1 # did ifetch fail? 18900 bne.l iea_iacc # yes 18901 18902 mov.l (%sp)+,%a0 18903 18904 btst &0x8,%d0 18905 bne.w fcalc_mem_ind 18906 18907 mov.l %d0,L_SCR1(%a6) # hold opword 18908 18909 mov.l %d0,%d1 18910 rol.w &0x4,%d1 18911 andi.w &0xf,%d1 # extract index regno 18912 18913# count on fetch_dreg() not to alter a0... 18914 bsr.l fetch_dreg # fetch index 18915 18916 mov.l %d2,-(%sp) # save d2 18917 mov.l L_SCR1(%a6),%d2 # fetch opword 18918 18919 btst &0xb,%d2 # is it word or long? 18920 bne.b faii8_long 18921 ext.l %d0 # sign extend word index 18922faii8_long: 18923 mov.l %d2,%d1 18924 rol.w &0x7,%d1 18925 andi.l &0x3,%d1 # extract scale value 18926 18927 lsl.l %d1,%d0 # shift index by scale 18928 18929 extb.l %d2 # sign extend displacement 18930 add.l %d2,%d0 # index + disp 18931 add.l %d0,%a0 # An + (index + disp) 18932 18933 mov.l (%sp)+,%d2 # restore old d2 18934 rts 18935 18936########################### 18937# Absolute short: (XXX).W # 18938########################### 18939fabs_short: 18940 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18941 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18942 bsr.l _imem_read_word # fetch short address 18943 18944 tst.l %d1 # did ifetch fail? 18945 bne.l iea_iacc # yes 18946 18947 mov.w %d0,%a0 # return <ea> in a0 18948 rts 18949 18950########################## 18951# Absolute long: (XXX).L # 18952########################## 18953fabs_long: 18954 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18955 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 18956 bsr.l _imem_read_long # fetch long address 18957 18958 tst.l %d1 # did ifetch fail? 18959 bne.l iea_iacc # yes 18960 18961 mov.l %d0,%a0 # return <ea> in a0 18962 rts 18963 18964####################################################### 18965# Program counter indirect w/ displacement: (d16, PC) # 18966####################################################### 18967fpc_ind: 18968 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18969 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18970 bsr.l _imem_read_word # fetch word displacement 18971 18972 tst.l %d1 # did ifetch fail? 18973 bne.l iea_iacc # yes 18974 18975 mov.w %d0,%a0 # sign extend displacement 18976 18977 add.l EXC_EXTWPTR(%a6),%a0 # pc + d16 18978 18979# _imem_read_word() increased the extwptr by 2. need to adjust here. 18980 subq.l &0x2,%a0 # adjust <ea> 18981 rts 18982 18983########################################################## 18984# PC indirect w/ index(8-bit displacement): (d8, PC, An) # 18985# " " w/ " (base displacement): (bd, PC, An) # 18986# PC memory indirect postindexed: ([bd, PC], Xn, od) # 18987# PC memory indirect preindexed: ([bd, PC, Xn], od) # 18988########################################################## 18989fpc_ind_ext: 18990 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 18991 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 18992 bsr.l _imem_read_word # fetch ext word 18993 18994 tst.l %d1 # did ifetch fail? 18995 bne.l iea_iacc # yes 18996 18997 mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0 18998 subq.l &0x2,%a0 # adjust base 18999 19000 btst &0x8,%d0 # is disp only 8 bits? 19001 bne.w fcalc_mem_ind # calc memory indirect 19002 19003 mov.l %d0,L_SCR1(%a6) # store opword 19004 19005 mov.l %d0,%d1 # make extword copy 19006 rol.w &0x4,%d1 # rotate reg num into place 19007 andi.w &0xf,%d1 # extract register number 19008 19009# count on fetch_dreg() not to alter a0... 19010 bsr.l fetch_dreg # fetch index 19011 19012 mov.l %d2,-(%sp) # save d2 19013 mov.l L_SCR1(%a6),%d2 # fetch opword 19014 19015 btst &0xb,%d2 # is index word or long? 19016 bne.b fpii8_long # long 19017 ext.l %d0 # sign extend word index 19018fpii8_long: 19019 mov.l %d2,%d1 19020 rol.w &0x7,%d1 # rotate scale value into place 19021 andi.l &0x3,%d1 # extract scale value 19022 19023 lsl.l %d1,%d0 # shift index by scale 19024 19025 extb.l %d2 # sign extend displacement 19026 add.l %d2,%d0 # disp + index 19027 add.l %d0,%a0 # An + (index + disp) 19028 19029 mov.l (%sp)+,%d2 # restore temp register 19030 rts 19031 19032# d2 = index 19033# d3 = base 19034# d4 = od 19035# d5 = extword 19036fcalc_mem_ind: 19037 btst &0x6,%d0 # is the index suppressed? 19038 beq.b fcalc_index 19039 19040 movm.l &0x3c00,-(%sp) # save d2-d5 19041 19042 mov.l %d0,%d5 # put extword in d5 19043 mov.l %a0,%d3 # put base in d3 19044 19045 clr.l %d2 # yes, so index = 0 19046 bra.b fbase_supp_ck 19047 19048# index: 19049fcalc_index: 19050 mov.l %d0,L_SCR1(%a6) # save d0 (opword) 19051 bfextu %d0{&16:&4},%d1 # fetch dreg index 19052 bsr.l fetch_dreg 19053 19054 movm.l &0x3c00,-(%sp) # save d2-d5 19055 mov.l %d0,%d2 # put index in d2 19056 mov.l L_SCR1(%a6),%d5 19057 mov.l %a0,%d3 19058 19059 btst &0xb,%d5 # is index word or long? 19060 bne.b fno_ext 19061 ext.l %d2 19062 19063fno_ext: 19064 bfextu %d5{&21:&2},%d0 19065 lsl.l %d0,%d2 19066 19067# base address (passed as parameter in d3): 19068# we clear the value here if it should actually be suppressed. 19069fbase_supp_ck: 19070 btst &0x7,%d5 # is the bd suppressed? 19071 beq.b fno_base_sup 19072 clr.l %d3 19073 19074# base displacement: 19075fno_base_sup: 19076 bfextu %d5{&26:&2},%d0 # get bd size 19077# beq.l fmovm_error # if (size == 0) it's reserved 19078 19079 cmpi.b %d0,&0x2 19080 blt.b fno_bd 19081 beq.b fget_word_bd 19082 19083 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19084 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19085 bsr.l _imem_read_long 19086 19087 tst.l %d1 # did ifetch fail? 19088 bne.l fcea_iacc # yes 19089 19090 bra.b fchk_ind 19091 19092fget_word_bd: 19093 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19094 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 19095 bsr.l _imem_read_word 19096 19097 tst.l %d1 # did ifetch fail? 19098 bne.l fcea_iacc # yes 19099 19100 ext.l %d0 # sign extend bd 19101 19102fchk_ind: 19103 add.l %d0,%d3 # base += bd 19104 19105# outer displacement: 19106fno_bd: 19107 bfextu %d5{&30:&2},%d0 # is od suppressed? 19108 beq.w faii_bd 19109 19110 cmpi.b %d0,&0x2 19111 blt.b fnull_od 19112 beq.b fword_od 19113 19114 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19115 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19116 bsr.l _imem_read_long 19117 19118 tst.l %d1 # did ifetch fail? 19119 bne.l fcea_iacc # yes 19120 19121 bra.b fadd_them 19122 19123fword_od: 19124 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19125 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 19126 bsr.l _imem_read_word 19127 19128 tst.l %d1 # did ifetch fail? 19129 bne.l fcea_iacc # yes 19130 19131 ext.l %d0 # sign extend od 19132 bra.b fadd_them 19133 19134fnull_od: 19135 clr.l %d0 19136 19137fadd_them: 19138 mov.l %d0,%d4 19139 19140 btst &0x2,%d5 # pre or post indexing? 19141 beq.b fpre_indexed 19142 19143 mov.l %d3,%a0 19144 bsr.l _dmem_read_long 19145 19146 tst.l %d1 # did dfetch fail? 19147 bne.w fcea_err # yes 19148 19149 add.l %d2,%d0 # <ea> += index 19150 add.l %d4,%d0 # <ea> += od 19151 bra.b fdone_ea 19152 19153fpre_indexed: 19154 add.l %d2,%d3 # preindexing 19155 mov.l %d3,%a0 19156 bsr.l _dmem_read_long 19157 19158 tst.l %d1 # did dfetch fail? 19159 bne.w fcea_err # yes 19160 19161 add.l %d4,%d0 # ea += od 19162 bra.b fdone_ea 19163 19164faii_bd: 19165 add.l %d2,%d3 # ea = (base + bd) + index 19166 mov.l %d3,%d0 19167fdone_ea: 19168 mov.l %d0,%a0 19169 19170 movm.l (%sp)+,&0x003c # restore d2-d5 19171 rts 19172 19173######################################################### 19174fcea_err: 19175 mov.l %d3,%a0 19176 19177 movm.l (%sp)+,&0x003c # restore d2-d5 19178 mov.w &0x0101,%d0 19179 bra.l iea_dacc 19180 19181fcea_iacc: 19182 movm.l (%sp)+,&0x003c # restore d2-d5 19183 bra.l iea_iacc 19184 19185fmovm_out_err: 19186 bsr.l restore 19187 mov.w &0x00e1,%d0 19188 bra.b fmovm_err 19189 19190fmovm_in_err: 19191 bsr.l restore 19192 mov.w &0x0161,%d0 19193 19194fmovm_err: 19195 mov.l L_SCR1(%a6),%a0 19196 bra.l iea_dacc 19197 19198######################################################################### 19199# XDEF **************************************************************** # 19200# fmovm_ctrl(): emulate fmovm.l of control registers instr # 19201# # 19202# XREF **************************************************************** # 19203# _imem_read_long() - read longword from memory # 19204# iea_iacc() - _imem_read_long() failed; error recovery # 19205# # 19206# INPUT *************************************************************** # 19207# None # 19208# # 19209# OUTPUT ************************************************************** # 19210# If _imem_read_long() doesn't fail: # 19211# USER_FPCR(a6) = new FPCR value # 19212# USER_FPSR(a6) = new FPSR value # 19213# USER_FPIAR(a6) = new FPIAR value # 19214# # 19215# ALGORITHM *********************************************************** # 19216# Decode the instruction type by looking at the extension word # 19217# in order to see how many control registers to fetch from memory. # 19218# Fetch them using _imem_read_long(). If this fetch fails, exit through # 19219# the special access error exit handler iea_iacc(). # 19220# # 19221# Instruction word decoding: # 19222# # 19223# fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} # 19224# # 19225# WORD1 WORD2 # 19226# 1111 0010 00 111100 100$ $$00 0000 0000 # 19227# # 19228# $$$ (100): FPCR # 19229# (010): FPSR # 19230# (001): FPIAR # 19231# (000): FPIAR # 19232# # 19233######################################################################### 19234 19235 global fmovm_ctrl 19236fmovm_ctrl: 19237 mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits 19238 cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ? 19239 beq.w fctrl_in_7 # yes 19240 cmpi.b %d0,&0x98 # fpcr & fpsr ? 19241 beq.w fctrl_in_6 # yes 19242 cmpi.b %d0,&0x94 # fpcr & fpiar ? 19243 beq.b fctrl_in_5 # yes 19244 19245# fmovem.l #<data>, fpsr/fpiar 19246fctrl_in_3: 19247 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19248 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19249 bsr.l _imem_read_long # fetch FPSR from mem 19250 19251 tst.l %d1 # did ifetch fail? 19252 bne.l iea_iacc # yes 19253 19254 mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack 19255 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19256 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19257 bsr.l _imem_read_long # fetch FPIAR from mem 19258 19259 tst.l %d1 # did ifetch fail? 19260 bne.l iea_iacc # yes 19261 19262 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack 19263 rts 19264 19265# fmovem.l #<data>, fpcr/fpiar 19266fctrl_in_5: 19267 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19268 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19269 bsr.l _imem_read_long # fetch FPCR from mem 19270 19271 tst.l %d1 # did ifetch fail? 19272 bne.l iea_iacc # yes 19273 19274 mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack 19275 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19276 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19277 bsr.l _imem_read_long # fetch FPIAR from mem 19278 19279 tst.l %d1 # did ifetch fail? 19280 bne.l iea_iacc # yes 19281 19282 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack 19283 rts 19284 19285# fmovem.l #<data>, fpcr/fpsr 19286fctrl_in_6: 19287 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19288 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19289 bsr.l _imem_read_long # fetch FPCR from mem 19290 19291 tst.l %d1 # did ifetch fail? 19292 bne.l iea_iacc # yes 19293 19294 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem 19295 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19296 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19297 bsr.l _imem_read_long # fetch FPSR from mem 19298 19299 tst.l %d1 # did ifetch fail? 19300 bne.l iea_iacc # yes 19301 19302 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem 19303 rts 19304 19305# fmovem.l #<data>, fpcr/fpsr/fpiar 19306fctrl_in_7: 19307 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19308 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19309 bsr.l _imem_read_long # fetch FPCR from mem 19310 19311 tst.l %d1 # did ifetch fail? 19312 bne.l iea_iacc # yes 19313 19314 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem 19315 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19316 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19317 bsr.l _imem_read_long # fetch FPSR from mem 19318 19319 tst.l %d1 # did ifetch fail? 19320 bne.l iea_iacc # yes 19321 19322 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem 19323 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 19324 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 19325 bsr.l _imem_read_long # fetch FPIAR from mem 19326 19327 tst.l %d1 # did ifetch fail? 19328 bne.l iea_iacc # yes 19329 19330 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem 19331 rts 19332 19333######################################################################### 19334# XDEF **************************************************************** # 19335# _dcalc_ea(): calc correct <ea> from <ea> stacked on exception # 19336# # 19337# XREF **************************************************************** # 19338# inc_areg() - increment an address register # 19339# dec_areg() - decrement an address register # 19340# # 19341# INPUT *************************************************************** # 19342# d0 = number of bytes to adjust <ea> by # 19343# # 19344# OUTPUT ************************************************************** # 19345# None # 19346# # 19347# ALGORITHM *********************************************************** # 19348# "Dummy" CALCulate Effective Address: # 19349# The stacked <ea> for FP unimplemented instructions and opclass # 19350# two packed instructions is correct with the exception of... # 19351# # 19352# 1) -(An) : The register is not updated regardless of size. # 19353# Also, for extended precision and packed, the # 19354# stacked <ea> value is 8 bytes too big # 19355# 2) (An)+ : The register is not updated. # 19356# 3) #<data> : The upper longword of the immediate operand is # 19357# stacked b,w,l and s sizes are completely stacked. # 19358# d,x, and p are not. # 19359# # 19360######################################################################### 19361 19362 global _dcalc_ea 19363_dcalc_ea: 19364 mov.l %d0, %a0 # move # bytes to %a0 19365 19366 mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word 19367 mov.l %d0, %d1 # make a copy 19368 19369 andi.w &0x38, %d0 # extract mode field 19370 andi.l &0x7, %d1 # extract reg field 19371 19372 cmpi.b %d0,&0x18 # is mode (An)+ ? 19373 beq.b dcea_pi # yes 19374 19375 cmpi.b %d0,&0x20 # is mode -(An) ? 19376 beq.b dcea_pd # yes 19377 19378 or.w %d1,%d0 # concat mode,reg 19379 cmpi.b %d0,&0x3c # is mode #<data>? 19380 19381 beq.b dcea_imm # yes 19382 19383 mov.l EXC_EA(%a6),%a0 # return <ea> 19384 rts 19385 19386# need to set immediate data flag here since we'll need to do 19387# an imem_read to fetch this later. 19388dcea_imm: 19389 mov.b &immed_flg,SPCOND_FLG(%a6) 19390 lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea> 19391 rts 19392 19393# here, the <ea> is stacked correctly. however, we must update the 19394# address register... 19395dcea_pi: 19396 mov.l %a0,%d0 # pass amt to inc by 19397 bsr.l inc_areg # inc addr register 19398 19399 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 19400 rts 19401 19402# the <ea> is stacked correctly for all but extended and packed which 19403# the <ea>s are 8 bytes too large. 19404# it would make no sense to have a pre-decrement to a7 in supervisor 19405# mode so we don't even worry about this tricky case here : ) 19406dcea_pd: 19407 mov.l %a0,%d0 # pass amt to dec by 19408 bsr.l dec_areg # dec addr register 19409 19410 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 19411 19412 cmpi.b %d0,&0xc # is opsize ext or packed? 19413 beq.b dcea_pd2 # yes 19414 rts 19415dcea_pd2: 19416 sub.l &0x8,%a0 # correct <ea> 19417 mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack 19418 rts 19419 19420######################################################################### 19421# XDEF **************************************************************** # 19422# _calc_ea_fout(): calculate correct stacked <ea> for extended # 19423# and packed data opclass 3 operations. # 19424# # 19425# XREF **************************************************************** # 19426# None # 19427# # 19428# INPUT *************************************************************** # 19429# None # 19430# # 19431# OUTPUT ************************************************************** # 19432# a0 = return correct effective address # 19433# # 19434# ALGORITHM *********************************************************** # 19435# For opclass 3 extended and packed data operations, the <ea> # 19436# stacked for the exception is incorrect for -(an) and (an)+ addressing # 19437# modes. Also, while we're at it, the index register itself must get # 19438# updated. # 19439# So, for -(an), we must subtract 8 off of the stacked <ea> value # 19440# and return that value as the correct <ea> and store that value in An. # 19441# For (an)+, the stacked <ea> is correct but we must adjust An by +12. # 19442# # 19443######################################################################### 19444 19445# This calc_ea is currently used to retrieve the correct <ea> 19446# for fmove outs of type extended and packed. 19447 global _calc_ea_fout 19448_calc_ea_fout: 19449 mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word 19450 mov.l %d0,%d1 # make a copy 19451 19452 andi.w &0x38,%d0 # extract mode field 19453 andi.l &0x7,%d1 # extract reg field 19454 19455 cmpi.b %d0,&0x18 # is mode (An)+ ? 19456 beq.b ceaf_pi # yes 19457 19458 cmpi.b %d0,&0x20 # is mode -(An) ? 19459 beq.w ceaf_pd # yes 19460 19461 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 19462 rts 19463 19464# (An)+ : extended and packed fmove out 19465# : stacked <ea> is correct 19466# : "An" not updated 19467ceaf_pi: 19468 mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1 19469 mov.l EXC_EA(%a6),%a0 19470 jmp (tbl_ceaf_pi.b,%pc,%d1.w*1) 19471 19472 swbeg &0x8 19473tbl_ceaf_pi: 19474 short ceaf_pi0 - tbl_ceaf_pi 19475 short ceaf_pi1 - tbl_ceaf_pi 19476 short ceaf_pi2 - tbl_ceaf_pi 19477 short ceaf_pi3 - tbl_ceaf_pi 19478 short ceaf_pi4 - tbl_ceaf_pi 19479 short ceaf_pi5 - tbl_ceaf_pi 19480 short ceaf_pi6 - tbl_ceaf_pi 19481 short ceaf_pi7 - tbl_ceaf_pi 19482 19483ceaf_pi0: 19484 addi.l &0xc,EXC_DREGS+0x8(%a6) 19485 rts 19486ceaf_pi1: 19487 addi.l &0xc,EXC_DREGS+0xc(%a6) 19488 rts 19489ceaf_pi2: 19490 add.l &0xc,%a2 19491 rts 19492ceaf_pi3: 19493 add.l &0xc,%a3 19494 rts 19495ceaf_pi4: 19496 add.l &0xc,%a4 19497 rts 19498ceaf_pi5: 19499 add.l &0xc,%a5 19500 rts 19501ceaf_pi6: 19502 addi.l &0xc,EXC_A6(%a6) 19503 rts 19504ceaf_pi7: 19505 mov.b &mia7_flg,SPCOND_FLG(%a6) 19506 addi.l &0xc,EXC_A7(%a6) 19507 rts 19508 19509# -(An) : extended and packed fmove out 19510# : stacked <ea> = actual <ea> + 8 19511# : "An" not updated 19512ceaf_pd: 19513 mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1 19514 mov.l EXC_EA(%a6),%a0 19515 sub.l &0x8,%a0 19516 sub.l &0x8,EXC_EA(%a6) 19517 jmp (tbl_ceaf_pd.b,%pc,%d1.w*1) 19518 19519 swbeg &0x8 19520tbl_ceaf_pd: 19521 short ceaf_pd0 - tbl_ceaf_pd 19522 short ceaf_pd1 - tbl_ceaf_pd 19523 short ceaf_pd2 - tbl_ceaf_pd 19524 short ceaf_pd3 - tbl_ceaf_pd 19525 short ceaf_pd4 - tbl_ceaf_pd 19526 short ceaf_pd5 - tbl_ceaf_pd 19527 short ceaf_pd6 - tbl_ceaf_pd 19528 short ceaf_pd7 - tbl_ceaf_pd 19529 19530ceaf_pd0: 19531 mov.l %a0,EXC_DREGS+0x8(%a6) 19532 rts 19533ceaf_pd1: 19534 mov.l %a0,EXC_DREGS+0xc(%a6) 19535 rts 19536ceaf_pd2: 19537 mov.l %a0,%a2 19538 rts 19539ceaf_pd3: 19540 mov.l %a0,%a3 19541 rts 19542ceaf_pd4: 19543 mov.l %a0,%a4 19544 rts 19545ceaf_pd5: 19546 mov.l %a0,%a5 19547 rts 19548ceaf_pd6: 19549 mov.l %a0,EXC_A6(%a6) 19550 rts 19551ceaf_pd7: 19552 mov.l %a0,EXC_A7(%a6) 19553 mov.b &mda7_flg,SPCOND_FLG(%a6) 19554 rts 19555 19556######################################################################### 19557# XDEF **************************************************************** # 19558# _load_fop(): load operand for unimplemented FP exception # 19559# # 19560# XREF **************************************************************** # 19561# set_tag_x() - determine ext prec optype tag # 19562# set_tag_s() - determine sgl prec optype tag # 19563# set_tag_d() - determine dbl prec optype tag # 19564# unnorm_fix() - convert normalized number to denorm or zero # 19565# norm() - normalize a denormalized number # 19566# get_packed() - fetch a packed operand from memory # 19567# _dcalc_ea() - calculate <ea>, fixing An in process # 19568# # 19569# _imem_read_{word,long}() - read from instruction memory # 19570# _dmem_read() - read from data memory # 19571# _dmem_read_{byte,word,long}() - read from data memory # 19572# # 19573# facc_in_{b,w,l,d,x}() - mem read failed; special exit point # 19574# # 19575# INPUT *************************************************************** # 19576# None # 19577# # 19578# OUTPUT ************************************************************** # 19579# If memory access doesn't fail: # 19580# FP_SRC(a6) = source operand in extended precision # 19581# FP_DST(a6) = destination operand in extended precision # 19582# # 19583# ALGORITHM *********************************************************** # 19584# This is called from the Unimplemented FP exception handler in # 19585# order to load the source and maybe destination operand into # 19586# FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load # 19587# the source and destination from the FP register file. Set the optype # 19588# tags for both if dyadic, one for monadic. If a number is an UNNORM, # 19589# convert it to a DENORM or a ZERO. # 19590# If the instruction is opclass two (memory->reg), then fetch # 19591# the destination from the register file and the source operand from # 19592# memory. Tag and fix both as above w/ opclass zero instructions. # 19593# If the source operand is byte,word,long, or single, it may be # 19594# in the data register file. If it's actually out in memory, use one of # 19595# the mem_read() routines to fetch it. If the mem_read() access returns # 19596# a failing value, exit through the special facc_in() routine which # 19597# will create an access error exception frame from the current exception # 19598# frame. # 19599# Immediate data and regular data accesses are separated because # 19600# if an immediate data access fails, the resulting fault status # 19601# longword stacked for the access error exception must have the # 19602# instruction bit set. # 19603# # 19604######################################################################### 19605 19606 global _load_fop 19607_load_fop: 19608 19609# 15 13 12 10 9 7 6 0 19610# / \ / \ / \ / \ 19611# --------------------------------- 19612# | opclass | RX | RY | EXTENSION | (2nd word of general FP instruction) 19613# --------------------------------- 19614# 19615 19616# bfextu EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass 19617# cmpi.b %d0, &0x2 # which class is it? ('000,'010,'011) 19618# beq.w op010 # handle <ea> -> fpn 19619# bgt.w op011 # handle fpn -> <ea> 19620 19621# we're not using op011 for now... 19622 btst &0x6,EXC_CMDREG(%a6) 19623 bne.b op010 19624 19625############################ 19626# OPCLASS '000: reg -> reg # 19627############################ 19628op000: 19629 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension word lo 19630 btst &0x5,%d0 # testing extension bits 19631 beq.b op000_src # (bit 5 == 0) => monadic 19632 btst &0x4,%d0 # (bit 5 == 1) 19633 beq.b op000_dst # (bit 4 == 0) => dyadic 19634 and.w &0x007f,%d0 # extract extension bits {6:0} 19635 cmpi.w %d0,&0x0038 # is it an fcmp (dyadic) ? 19636 bne.b op000_src # it's an fcmp 19637 19638op000_dst: 19639 bfextu EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field 19640 bsr.l load_fpn2 # fetch dst fpreg into FP_DST 19641 19642 bsr.l set_tag_x # get dst optype tag 19643 19644 cmpi.b %d0, &UNNORM # is dst fpreg an UNNORM? 19645 beq.b op000_dst_unnorm # yes 19646op000_dst_cont: 19647 mov.b %d0, DTAG(%a6) # store the dst optype tag 19648 19649op000_src: 19650 bfextu EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field 19651 bsr.l load_fpn1 # fetch src fpreg into FP_SRC 19652 19653 bsr.l set_tag_x # get src optype tag 19654 19655 cmpi.b %d0, &UNNORM # is src fpreg an UNNORM? 19656 beq.b op000_src_unnorm # yes 19657op000_src_cont: 19658 mov.b %d0, STAG(%a6) # store the src optype tag 19659 rts 19660 19661op000_dst_unnorm: 19662 bsr.l unnorm_fix # fix the dst UNNORM 19663 bra.b op000_dst_cont 19664op000_src_unnorm: 19665 bsr.l unnorm_fix # fix the src UNNORM 19666 bra.b op000_src_cont 19667 19668############################# 19669# OPCLASS '010: <ea> -> reg # 19670############################# 19671op010: 19672 mov.w EXC_CMDREG(%a6),%d0 # fetch extension word 19673 btst &0x5,%d0 # testing extension bits 19674 beq.b op010_src # (bit 5 == 0) => monadic 19675 btst &0x4,%d0 # (bit 5 == 1) 19676 beq.b op010_dst # (bit 4 == 0) => dyadic 19677 and.w &0x007f,%d0 # extract extension bits {6:0} 19678 cmpi.w %d0,&0x0038 # is it an fcmp (dyadic) ? 19679 bne.b op010_src # it's an fcmp 19680 19681op010_dst: 19682 bfextu EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field 19683 bsr.l load_fpn2 # fetch dst fpreg ptr 19684 19685 bsr.l set_tag_x # get dst type tag 19686 19687 cmpi.b %d0, &UNNORM # is dst fpreg an UNNORM? 19688 beq.b op010_dst_unnorm # yes 19689op010_dst_cont: 19690 mov.b %d0, DTAG(%a6) # store the dst optype tag 19691 19692op010_src: 19693 bfextu EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field 19694 19695 bfextu EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field 19696 bne.w fetch_from_mem # src op is in memory 19697 19698op010_dreg: 19699 clr.b STAG(%a6) # either NORM or ZERO 19700 bfextu EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field 19701 19702 mov.w (tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype 19703 jmp (tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg 19704 19705op010_dst_unnorm: 19706 bsr.l unnorm_fix # fix the dst UNNORM 19707 bra.b op010_dst_cont 19708 19709 swbeg &0x8 19710tbl_op010_dreg: 19711 short opd_long - tbl_op010_dreg 19712 short opd_sgl - tbl_op010_dreg 19713 short tbl_op010_dreg - tbl_op010_dreg 19714 short tbl_op010_dreg - tbl_op010_dreg 19715 short opd_word - tbl_op010_dreg 19716 short tbl_op010_dreg - tbl_op010_dreg 19717 short opd_byte - tbl_op010_dreg 19718 short tbl_op010_dreg - tbl_op010_dreg 19719 19720# 19721# LONG: can be either NORM or ZERO... 19722# 19723opd_long: 19724 bsr.l fetch_dreg # fetch long in d0 19725 fmov.l %d0, %fp0 # load a long 19726 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19727 fbeq.w opd_long_zero # long is a ZERO 19728 rts 19729opd_long_zero: 19730 mov.b &ZERO, STAG(%a6) # set ZERO optype flag 19731 rts 19732 19733# 19734# WORD: can be either NORM or ZERO... 19735# 19736opd_word: 19737 bsr.l fetch_dreg # fetch word in d0 19738 fmov.w %d0, %fp0 # load a word 19739 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19740 fbeq.w opd_word_zero # WORD is a ZERO 19741 rts 19742opd_word_zero: 19743 mov.b &ZERO, STAG(%a6) # set ZERO optype flag 19744 rts 19745 19746# 19747# BYTE: can be either NORM or ZERO... 19748# 19749opd_byte: 19750 bsr.l fetch_dreg # fetch word in d0 19751 fmov.b %d0, %fp0 # load a byte 19752 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19753 fbeq.w opd_byte_zero # byte is a ZERO 19754 rts 19755opd_byte_zero: 19756 mov.b &ZERO, STAG(%a6) # set ZERO optype flag 19757 rts 19758 19759# 19760# SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM 19761# 19762# separate SNANs and DENORMs so they can be loaded w/ special care. 19763# all others can simply be moved "in" using fmove. 19764# 19765opd_sgl: 19766 bsr.l fetch_dreg # fetch sgl in d0 19767 mov.l %d0,L_SCR1(%a6) 19768 19769 lea L_SCR1(%a6), %a0 # pass: ptr to the sgl 19770 bsr.l set_tag_s # determine sgl type 19771 mov.b %d0, STAG(%a6) # save the src tag 19772 19773 cmpi.b %d0, &SNAN # is it an SNAN? 19774 beq.w get_sgl_snan # yes 19775 19776 cmpi.b %d0, &DENORM # is it a DENORM? 19777 beq.w get_sgl_denorm # yes 19778 19779 fmov.s (%a0), %fp0 # no, so can load it regular 19780 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19781 rts 19782 19783############################################################################## 19784 19785######################################################################### 19786# fetch_from_mem(): # 19787# - src is out in memory. must: # 19788# (1) calc ea - must read AFTER you know the src type since # 19789# if the ea is -() or ()+, need to know # of bytes. # 19790# (2) read it in from either user or supervisor space # 19791# (3) if (b || w || l) then simply read in # 19792# if (s || d || x) then check for SNAN,UNNORM,DENORM # 19793# if (packed) then punt for now # 19794# INPUT: # 19795# %d0 : src type field # 19796######################################################################### 19797fetch_from_mem: 19798 clr.b STAG(%a6) # either NORM or ZERO 19799 19800 mov.w (tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field 19801 jmp (tbl_fp_type.b,%pc,%d0.w*1) 19802 19803 swbeg &0x8 19804tbl_fp_type: 19805 short load_long - tbl_fp_type 19806 short load_sgl - tbl_fp_type 19807 short load_ext - tbl_fp_type 19808 short load_packed - tbl_fp_type 19809 short load_word - tbl_fp_type 19810 short load_dbl - tbl_fp_type 19811 short load_byte - tbl_fp_type 19812 short tbl_fp_type - tbl_fp_type 19813 19814######################################### 19815# load a LONG into %fp0: # 19816# -number can't fault # 19817# (1) calc ea # 19818# (2) read 4 bytes into L_SCR1 # 19819# (3) fmov.l into %fp0 # 19820######################################### 19821load_long: 19822 movq.l &0x4, %d0 # pass: 4 (bytes) 19823 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0 19824 19825 cmpi.b SPCOND_FLG(%a6),&immed_flg 19826 beq.b load_long_immed 19827 19828 bsr.l _dmem_read_long # fetch src operand from memory 19829 19830 tst.l %d1 # did dfetch fail? 19831 bne.l facc_in_l # yes 19832 19833load_long_cont: 19834 fmov.l %d0, %fp0 # read into %fp0;convert to xprec 19835 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19836 19837 fbeq.w load_long_zero # src op is a ZERO 19838 rts 19839load_long_zero: 19840 mov.b &ZERO, STAG(%a6) # set optype tag to ZERO 19841 rts 19842 19843load_long_immed: 19844 bsr.l _imem_read_long # fetch src operand immed data 19845 19846 tst.l %d1 # did ifetch fail? 19847 bne.l funimp_iacc # yes 19848 bra.b load_long_cont 19849 19850######################################### 19851# load a WORD into %fp0: # 19852# -number can't fault # 19853# (1) calc ea # 19854# (2) read 2 bytes into L_SCR1 # 19855# (3) fmov.w into %fp0 # 19856######################################### 19857load_word: 19858 movq.l &0x2, %d0 # pass: 2 (bytes) 19859 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0 19860 19861 cmpi.b SPCOND_FLG(%a6),&immed_flg 19862 beq.b load_word_immed 19863 19864 bsr.l _dmem_read_word # fetch src operand from memory 19865 19866 tst.l %d1 # did dfetch fail? 19867 bne.l facc_in_w # yes 19868 19869load_word_cont: 19870 fmov.w %d0, %fp0 # read into %fp0;convert to xprec 19871 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19872 19873 fbeq.w load_word_zero # src op is a ZERO 19874 rts 19875load_word_zero: 19876 mov.b &ZERO, STAG(%a6) # set optype tag to ZERO 19877 rts 19878 19879load_word_immed: 19880 bsr.l _imem_read_word # fetch src operand immed data 19881 19882 tst.l %d1 # did ifetch fail? 19883 bne.l funimp_iacc # yes 19884 bra.b load_word_cont 19885 19886######################################### 19887# load a BYTE into %fp0: # 19888# -number can't fault # 19889# (1) calc ea # 19890# (2) read 1 byte into L_SCR1 # 19891# (3) fmov.b into %fp0 # 19892######################################### 19893load_byte: 19894 movq.l &0x1, %d0 # pass: 1 (byte) 19895 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0 19896 19897 cmpi.b SPCOND_FLG(%a6),&immed_flg 19898 beq.b load_byte_immed 19899 19900 bsr.l _dmem_read_byte # fetch src operand from memory 19901 19902 tst.l %d1 # did dfetch fail? 19903 bne.l facc_in_b # yes 19904 19905load_byte_cont: 19906 fmov.b %d0, %fp0 # read into %fp0;convert to xprec 19907 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19908 19909 fbeq.w load_byte_zero # src op is a ZERO 19910 rts 19911load_byte_zero: 19912 mov.b &ZERO, STAG(%a6) # set optype tag to ZERO 19913 rts 19914 19915load_byte_immed: 19916 bsr.l _imem_read_word # fetch src operand immed data 19917 19918 tst.l %d1 # did ifetch fail? 19919 bne.l funimp_iacc # yes 19920 bra.b load_byte_cont 19921 19922######################################### 19923# load a SGL into %fp0: # 19924# -number can't fault # 19925# (1) calc ea # 19926# (2) read 4 bytes into L_SCR1 # 19927# (3) fmov.s into %fp0 # 19928######################################### 19929load_sgl: 19930 movq.l &0x4, %d0 # pass: 4 (bytes) 19931 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0 19932 19933 cmpi.b SPCOND_FLG(%a6),&immed_flg 19934 beq.b load_sgl_immed 19935 19936 bsr.l _dmem_read_long # fetch src operand from memory 19937 mov.l %d0, L_SCR1(%a6) # store src op on stack 19938 19939 tst.l %d1 # did dfetch fail? 19940 bne.l facc_in_l # yes 19941 19942load_sgl_cont: 19943 lea L_SCR1(%a6), %a0 # pass: ptr to sgl src op 19944 bsr.l set_tag_s # determine src type tag 19945 mov.b %d0, STAG(%a6) # save src optype tag on stack 19946 19947 cmpi.b %d0, &DENORM # is it a sgl DENORM? 19948 beq.w get_sgl_denorm # yes 19949 19950 cmpi.b %d0, &SNAN # is it a sgl SNAN? 19951 beq.w get_sgl_snan # yes 19952 19953 fmov.s L_SCR1(%a6), %fp0 # read into %fp0;convert to xprec 19954 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 19955 rts 19956 19957load_sgl_immed: 19958 bsr.l _imem_read_long # fetch src operand immed data 19959 19960 tst.l %d1 # did ifetch fail? 19961 bne.l funimp_iacc # yes 19962 bra.b load_sgl_cont 19963 19964# must convert sgl denorm format to an Xprec denorm fmt suitable for 19965# normalization... 19966# %a0 : points to sgl denorm 19967get_sgl_denorm: 19968 clr.w FP_SRC_EX(%a6) 19969 bfextu (%a0){&9:&23}, %d0 # fetch sgl hi(_mantissa) 19970 lsl.l &0x8, %d0 19971 mov.l %d0, FP_SRC_HI(%a6) # set ext hi(_mantissa) 19972 clr.l FP_SRC_LO(%a6) # set ext lo(_mantissa) 19973 19974 clr.w FP_SRC_EX(%a6) 19975 btst &0x7, (%a0) # is sgn bit set? 19976 beq.b sgl_dnrm_norm 19977 bset &0x7, FP_SRC_EX(%a6) # set sgn of xprec value 19978 19979sgl_dnrm_norm: 19980 lea FP_SRC(%a6), %a0 19981 bsr.l norm # normalize number 19982 mov.w &0x3f81, %d1 # xprec exp = 0x3f81 19983 sub.w %d0, %d1 # exp = 0x3f81 - shft amt. 19984 or.w %d1, FP_SRC_EX(%a6) # {sgn,exp} 19985 19986 mov.b &NORM, STAG(%a6) # fix src type tag 19987 rts 19988 19989# convert sgl to ext SNAN 19990# %a0 : points to sgl SNAN 19991get_sgl_snan: 19992 mov.w &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN 19993 bfextu (%a0){&9:&23}, %d0 19994 lsl.l &0x8, %d0 # extract and insert hi(man) 19995 mov.l %d0, FP_SRC_HI(%a6) 19996 clr.l FP_SRC_LO(%a6) 19997 19998 btst &0x7, (%a0) # see if sign of SNAN is set 19999 beq.b no_sgl_snan_sgn 20000 bset &0x7, FP_SRC_EX(%a6) 20001no_sgl_snan_sgn: 20002 rts 20003 20004######################################### 20005# load a DBL into %fp0: # 20006# -number can't fault # 20007# (1) calc ea # 20008# (2) read 8 bytes into L_SCR(1,2)# 20009# (3) fmov.d into %fp0 # 20010######################################### 20011load_dbl: 20012 movq.l &0x8, %d0 # pass: 8 (bytes) 20013 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0 20014 20015 cmpi.b SPCOND_FLG(%a6),&immed_flg 20016 beq.b load_dbl_immed 20017 20018 lea L_SCR1(%a6), %a1 # pass: ptr to input dbl tmp space 20019 movq.l &0x8, %d0 # pass: # bytes to read 20020 bsr.l _dmem_read # fetch src operand from memory 20021 20022 tst.l %d1 # did dfetch fail? 20023 bne.l facc_in_d # yes 20024 20025load_dbl_cont: 20026 lea L_SCR1(%a6), %a0 # pass: ptr to input dbl 20027 bsr.l set_tag_d # determine src type tag 20028 mov.b %d0, STAG(%a6) # set src optype tag 20029 20030 cmpi.b %d0, &DENORM # is it a dbl DENORM? 20031 beq.w get_dbl_denorm # yes 20032 20033 cmpi.b %d0, &SNAN # is it a dbl SNAN? 20034 beq.w get_dbl_snan # yes 20035 20036 fmov.d L_SCR1(%a6), %fp0 # read into %fp0;convert to xprec 20037 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC 20038 rts 20039 20040load_dbl_immed: 20041 lea L_SCR1(%a6), %a1 # pass: ptr to input dbl tmp space 20042 movq.l &0x8, %d0 # pass: # bytes to read 20043 bsr.l _imem_read # fetch src operand from memory 20044 20045 tst.l %d1 # did ifetch fail? 20046 bne.l funimp_iacc # yes 20047 bra.b load_dbl_cont 20048 20049# must convert dbl denorm format to an Xprec denorm fmt suitable for 20050# normalization... 20051# %a0 : loc. of dbl denorm 20052get_dbl_denorm: 20053 clr.w FP_SRC_EX(%a6) 20054 bfextu (%a0){&12:&31}, %d0 # fetch hi(_mantissa) 20055 mov.l %d0, FP_SRC_HI(%a6) 20056 bfextu 4(%a0){&11:&21}, %d0 # fetch lo(_mantissa) 20057 mov.l &0xb, %d1 20058 lsl.l %d1, %d0 20059 mov.l %d0, FP_SRC_LO(%a6) 20060 20061 btst &0x7, (%a0) # is sgn bit set? 20062 beq.b dbl_dnrm_norm 20063 bset &0x7, FP_SRC_EX(%a6) # set sgn of xprec value 20064 20065dbl_dnrm_norm: 20066 lea FP_SRC(%a6), %a0 20067 bsr.l norm # normalize number 20068 mov.w &0x3c01, %d1 # xprec exp = 0x3c01 20069 sub.w %d0, %d1 # exp = 0x3c01 - shft amt. 20070 or.w %d1, FP_SRC_EX(%a6) # {sgn,exp} 20071 20072 mov.b &NORM, STAG(%a6) # fix src type tag 20073 rts 20074 20075# convert dbl to ext SNAN 20076# %a0 : points to dbl SNAN 20077get_dbl_snan: 20078 mov.w &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN 20079 20080 bfextu (%a0){&12:&31}, %d0 # fetch hi(_mantissa) 20081 mov.l %d0, FP_SRC_HI(%a6) 20082 bfextu 4(%a0){&11:&21}, %d0 # fetch lo(_mantissa) 20083 mov.l &0xb, %d1 20084 lsl.l %d1, %d0 20085 mov.l %d0, FP_SRC_LO(%a6) 20086 20087 btst &0x7, (%a0) # see if sign of SNAN is set 20088 beq.b no_dbl_snan_sgn 20089 bset &0x7, FP_SRC_EX(%a6) 20090no_dbl_snan_sgn: 20091 rts 20092 20093################################################# 20094# load a Xprec into %fp0: # 20095# -number can't fault # 20096# (1) calc ea # 20097# (2) read 12 bytes into L_SCR(1,2) # 20098# (3) fmov.x into %fp0 # 20099################################################# 20100load_ext: 20101 mov.l &0xc, %d0 # pass: 12 (bytes) 20102 bsr.l _dcalc_ea # calc <ea> 20103 20104 lea FP_SRC(%a6), %a1 # pass: ptr to input ext tmp space 20105 mov.l &0xc, %d0 # pass: # of bytes to read 20106 bsr.l _dmem_read # fetch src operand from memory 20107 20108 tst.l %d1 # did dfetch fail? 20109 bne.l facc_in_x # yes 20110 20111 lea FP_SRC(%a6), %a0 # pass: ptr to src op 20112 bsr.l set_tag_x # determine src type tag 20113 20114 cmpi.b %d0, &UNNORM # is the src op an UNNORM? 20115 beq.b load_ext_unnorm # yes 20116 20117 mov.b %d0, STAG(%a6) # store the src optype tag 20118 rts 20119 20120load_ext_unnorm: 20121 bsr.l unnorm_fix # fix the src UNNORM 20122 mov.b %d0, STAG(%a6) # store the src optype tag 20123 rts 20124 20125################################################# 20126# load a packed into %fp0: # 20127# -number can't fault # 20128# (1) calc ea # 20129# (2) read 12 bytes into L_SCR(1,2,3) # 20130# (3) fmov.x into %fp0 # 20131################################################# 20132load_packed: 20133 bsr.l get_packed 20134 20135 lea FP_SRC(%a6),%a0 # pass ptr to src op 20136 bsr.l set_tag_x # determine src type tag 20137 cmpi.b %d0,&UNNORM # is the src op an UNNORM ZERO? 20138 beq.b load_packed_unnorm # yes 20139 20140 mov.b %d0,STAG(%a6) # store the src optype tag 20141 rts 20142 20143load_packed_unnorm: 20144 bsr.l unnorm_fix # fix the UNNORM ZERO 20145 mov.b %d0,STAG(%a6) # store the src optype tag 20146 rts 20147 20148######################################################################### 20149# XDEF **************************************************************** # 20150# fout(): move from fp register to memory or data register # 20151# # 20152# XREF **************************************************************** # 20153# _round() - needed to create EXOP for sgl/dbl precision # 20154# norm() - needed to create EXOP for extended precision # 20155# ovf_res() - create default overflow result for sgl/dbl precision# 20156# unf_res() - create default underflow result for sgl/dbl prec. # 20157# dst_dbl() - create rounded dbl precision result. # 20158# dst_sgl() - create rounded sgl precision result. # 20159# fetch_dreg() - fetch dynamic k-factor reg for packed. # 20160# bindec() - convert FP binary number to packed number. # 20161# _mem_write() - write data to memory. # 20162# _mem_write2() - write data to memory unless supv mode -(a7) exc.# 20163# _dmem_write_{byte,word,long}() - write data to memory. # 20164# store_dreg_{b,w,l}() - store data to data register file. # 20165# facc_out_{b,w,l,d,x}() - data access error occurred. # 20166# # 20167# INPUT *************************************************************** # 20168# a0 = pointer to extended precision source operand # 20169# d0 = round prec,mode # 20170# # 20171# OUTPUT ************************************************************** # 20172# fp0 : intermediate underflow or overflow result if # 20173# OVFL/UNFL occurred for a sgl or dbl operand # 20174# # 20175# ALGORITHM *********************************************************** # 20176# This routine is accessed by many handlers that need to do an # 20177# opclass three move of an operand out to memory. # 20178# Decode an fmove out (opclass 3) instruction to determine if # 20179# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data # 20180# register or memory. The algorithm uses a standard "fmove" to create # 20181# the rounded result. Also, since exceptions are disabled, this also # 20182# create the correct OPERR default result if appropriate. # 20183# For sgl or dbl precision, overflow or underflow can occur. If # 20184# either occurs and is enabled, the EXOP. # 20185# For extended precision, the stacked <ea> must be fixed along # 20186# w/ the address index register as appropriate w/ _calc_ea_fout(). If # 20187# the source is a denorm and if underflow is enabled, an EXOP must be # 20188# created. # 20189# For packed, the k-factor must be fetched from the instruction # 20190# word or a data register. The <ea> must be fixed as w/ extended # 20191# precision. Then, bindec() is called to create the appropriate # 20192# packed result. # 20193# If at any time an access error is flagged by one of the move- # 20194# to-memory routines, then a special exit must be made so that the # 20195# access error can be handled properly. # 20196# # 20197######################################################################### 20198 20199 global fout 20200fout: 20201 bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt 20202 mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index 20203 jmp (tbl_fout.b,%pc,%a1) # jump to routine 20204 20205 swbeg &0x8 20206tbl_fout: 20207 short fout_long - tbl_fout 20208 short fout_sgl - tbl_fout 20209 short fout_ext - tbl_fout 20210 short fout_pack - tbl_fout 20211 short fout_word - tbl_fout 20212 short fout_dbl - tbl_fout 20213 short fout_byte - tbl_fout 20214 short fout_pack - tbl_fout 20215 20216################################################################# 20217# fmove.b out ################################################### 20218################################################################# 20219 20220# Only "Unimplemented Data Type" exceptions enter here. The operand 20221# is either a DENORM or a NORM. 20222fout_byte: 20223 tst.b STAG(%a6) # is operand normalized? 20224 bne.b fout_byte_denorm # no 20225 20226 fmovm.x SRC(%a0),&0x80 # load value 20227 20228fout_byte_norm: 20229 fmov.l %d0,%fpcr # insert rnd prec,mode 20230 20231 fmov.b %fp0,%d0 # exec move out w/ correct rnd mode 20232 20233 fmov.l &0x0,%fpcr # clear FPCR 20234 fmov.l %fpsr,%d1 # fetch FPSR 20235 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 20236 20237 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 20238 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 20239 beq.b fout_byte_dn # must save to integer regfile 20240 20241 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 20242 bsr.l _dmem_write_byte # write byte 20243 20244 tst.l %d1 # did dstore fail? 20245 bne.l facc_out_b # yes 20246 20247 rts 20248 20249fout_byte_dn: 20250 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 20251 andi.w &0x7,%d1 20252 bsr.l store_dreg_b 20253 rts 20254 20255fout_byte_denorm: 20256 mov.l SRC_EX(%a0),%d1 20257 andi.l &0x80000000,%d1 # keep DENORM sign 20258 ori.l &0x00800000,%d1 # make smallest sgl 20259 fmov.s %d1,%fp0 20260 bra.b fout_byte_norm 20261 20262################################################################# 20263# fmove.w out ################################################### 20264################################################################# 20265 20266# Only "Unimplemented Data Type" exceptions enter here. The operand 20267# is either a DENORM or a NORM. 20268fout_word: 20269 tst.b STAG(%a6) # is operand normalized? 20270 bne.b fout_word_denorm # no 20271 20272 fmovm.x SRC(%a0),&0x80 # load value 20273 20274fout_word_norm: 20275 fmov.l %d0,%fpcr # insert rnd prec:mode 20276 20277 fmov.w %fp0,%d0 # exec move out w/ correct rnd mode 20278 20279 fmov.l &0x0,%fpcr # clear FPCR 20280 fmov.l %fpsr,%d1 # fetch FPSR 20281 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 20282 20283 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 20284 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 20285 beq.b fout_word_dn # must save to integer regfile 20286 20287 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 20288 bsr.l _dmem_write_word # write word 20289 20290 tst.l %d1 # did dstore fail? 20291 bne.l facc_out_w # yes 20292 20293 rts 20294 20295fout_word_dn: 20296 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 20297 andi.w &0x7,%d1 20298 bsr.l store_dreg_w 20299 rts 20300 20301fout_word_denorm: 20302 mov.l SRC_EX(%a0),%d1 20303 andi.l &0x80000000,%d1 # keep DENORM sign 20304 ori.l &0x00800000,%d1 # make smallest sgl 20305 fmov.s %d1,%fp0 20306 bra.b fout_word_norm 20307 20308################################################################# 20309# fmove.l out ################################################### 20310################################################################# 20311 20312# Only "Unimplemented Data Type" exceptions enter here. The operand 20313# is either a DENORM or a NORM. 20314fout_long: 20315 tst.b STAG(%a6) # is operand normalized? 20316 bne.b fout_long_denorm # no 20317 20318 fmovm.x SRC(%a0),&0x80 # load value 20319 20320fout_long_norm: 20321 fmov.l %d0,%fpcr # insert rnd prec:mode 20322 20323 fmov.l %fp0,%d0 # exec move out w/ correct rnd mode 20324 20325 fmov.l &0x0,%fpcr # clear FPCR 20326 fmov.l %fpsr,%d1 # fetch FPSR 20327 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 20328 20329fout_long_write: 20330 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 20331 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 20332 beq.b fout_long_dn # must save to integer regfile 20333 20334 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 20335 bsr.l _dmem_write_long # write long 20336 20337 tst.l %d1 # did dstore fail? 20338 bne.l facc_out_l # yes 20339 20340 rts 20341 20342fout_long_dn: 20343 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 20344 andi.w &0x7,%d1 20345 bsr.l store_dreg_l 20346 rts 20347 20348fout_long_denorm: 20349 mov.l SRC_EX(%a0),%d1 20350 andi.l &0x80000000,%d1 # keep DENORM sign 20351 ori.l &0x00800000,%d1 # make smallest sgl 20352 fmov.s %d1,%fp0 20353 bra.b fout_long_norm 20354 20355################################################################# 20356# fmove.x out ################################################### 20357################################################################# 20358 20359# Only "Unimplemented Data Type" exceptions enter here. The operand 20360# is either a DENORM or a NORM. 20361# The DENORM causes an Underflow exception. 20362fout_ext: 20363 20364# we copy the extended precision result to FP_SCR0 so that the reserved 20365# 16-bit field gets zeroed. we do this since we promise not to disturb 20366# what's at SRC(a0). 20367 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 20368 clr.w 2+FP_SCR0_EX(%a6) # clear reserved field 20369 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 20370 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 20371 20372 fmovm.x SRC(%a0),&0x80 # return result 20373 20374 bsr.l _calc_ea_fout # fix stacked <ea> 20375 20376 mov.l %a0,%a1 # pass: dst addr 20377 lea FP_SCR0(%a6),%a0 # pass: src addr 20378 mov.l &0xc,%d0 # pass: opsize is 12 bytes 20379 20380# we must not yet write the extended precision data to the stack 20381# in the pre-decrement case from supervisor mode or else we'll corrupt 20382# the stack frame. so, leave it in FP_SRC for now and deal with it later... 20383 cmpi.b SPCOND_FLG(%a6),&mda7_flg 20384 beq.b fout_ext_a7 20385 20386 bsr.l _dmem_write # write ext prec number to memory 20387 20388 tst.l %d1 # did dstore fail? 20389 bne.w fout_ext_err # yes 20390 20391 tst.b STAG(%a6) # is operand normalized? 20392 bne.b fout_ext_denorm # no 20393 rts 20394 20395# the number is a DENORM. must set the underflow exception bit 20396fout_ext_denorm: 20397 bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit 20398 20399 mov.b FPCR_ENABLE(%a6),%d0 20400 andi.b &0x0a,%d0 # is UNFL or INEX enabled? 20401 bne.b fout_ext_exc # yes 20402 rts 20403 20404# we don't want to do the write if the exception occurred in supervisor mode 20405# so _mem_write2() handles this for us. 20406fout_ext_a7: 20407 bsr.l _mem_write2 # write ext prec number to memory 20408 20409 tst.l %d1 # did dstore fail? 20410 bne.w fout_ext_err # yes 20411 20412 tst.b STAG(%a6) # is operand normalized? 20413 bne.b fout_ext_denorm # no 20414 rts 20415 20416fout_ext_exc: 20417 lea FP_SCR0(%a6),%a0 20418 bsr.l norm # normalize the mantissa 20419 neg.w %d0 # new exp = -(shft amt) 20420 andi.w &0x7fff,%d0 20421 andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign 20422 or.w %d0,FP_SCR0_EX(%a6) # insert new exponent 20423 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 20424 rts 20425 20426fout_ext_err: 20427 mov.l EXC_A6(%a6),(%a6) # fix stacked a6 20428 bra.l facc_out_x 20429 20430######################################################################### 20431# fmove.s out ########################################################### 20432######################################################################### 20433fout_sgl: 20434 andi.b &0x30,%d0 # clear rnd prec 20435 ori.b &s_mode*0x10,%d0 # insert sgl prec 20436 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack 20437 20438# 20439# operand is a normalized number. first, we check to see if the move out 20440# would cause either an underflow or overflow. these cases are handled 20441# separately. otherwise, set the FPCR to the proper rounding mode and 20442# execute the move. 20443# 20444 mov.w SRC_EX(%a0),%d0 # extract exponent 20445 andi.w &0x7fff,%d0 # strip sign 20446 20447 cmpi.w %d0,&SGL_HI # will operand overflow? 20448 bgt.w fout_sgl_ovfl # yes; go handle OVFL 20449 beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL 20450 cmpi.w %d0,&SGL_LO # will operand underflow? 20451 blt.w fout_sgl_unfl # yes; go handle underflow 20452 20453# 20454# NORMs(in range) can be stored out by a simple "fmov.s" 20455# Unnormalized inputs can come through this point. 20456# 20457fout_sgl_exg: 20458 fmovm.x SRC(%a0),&0x80 # fetch fop from stack 20459 20460 fmov.l L_SCR3(%a6),%fpcr # set FPCR 20461 fmov.l &0x0,%fpsr # clear FPSR 20462 20463 fmov.s %fp0,%d0 # store does convert and round 20464 20465 fmov.l &0x0,%fpcr # clear FPCR 20466 fmov.l %fpsr,%d1 # save FPSR 20467 20468 or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex 20469 20470fout_sgl_exg_write: 20471 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 20472 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 20473 beq.b fout_sgl_exg_write_dn # must save to integer regfile 20474 20475 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 20476 bsr.l _dmem_write_long # write long 20477 20478 tst.l %d1 # did dstore fail? 20479 bne.l facc_out_l # yes 20480 20481 rts 20482 20483fout_sgl_exg_write_dn: 20484 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 20485 andi.w &0x7,%d1 20486 bsr.l store_dreg_l 20487 rts 20488 20489# 20490# here, we know that the operand would UNFL if moved out to single prec, 20491# so, denorm and round and then use generic store single routine to 20492# write the value to memory. 20493# 20494fout_sgl_unfl: 20495 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL 20496 20497 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 20498 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 20499 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 20500 mov.l %a0,-(%sp) 20501 20502 clr.l %d0 # pass: S.F. = 0 20503 20504 cmpi.b STAG(%a6),&DENORM # fetch src optype tag 20505 bne.b fout_sgl_unfl_cont # let DENORMs fall through 20506 20507 lea FP_SCR0(%a6),%a0 20508 bsr.l norm # normalize the DENORM 20509 20510fout_sgl_unfl_cont: 20511 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 20512 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 20513 bsr.l unf_res # calc default underflow result 20514 20515 lea FP_SCR0(%a6),%a0 # pass: ptr to fop 20516 bsr.l dst_sgl # convert to single prec 20517 20518 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 20519 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 20520 beq.b fout_sgl_unfl_dn # must save to integer regfile 20521 20522 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 20523 bsr.l _dmem_write_long # write long 20524 20525 tst.l %d1 # did dstore fail? 20526 bne.l facc_out_l # yes 20527 20528 bra.b fout_sgl_unfl_chkexc 20529 20530fout_sgl_unfl_dn: 20531 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 20532 andi.w &0x7,%d1 20533 bsr.l store_dreg_l 20534 20535fout_sgl_unfl_chkexc: 20536 mov.b FPCR_ENABLE(%a6),%d1 20537 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 20538 bne.w fout_sd_exc_unfl # yes 20539 addq.l &0x4,%sp 20540 rts 20541 20542# 20543# it's definitely an overflow so call ovf_res to get the correct answer 20544# 20545fout_sgl_ovfl: 20546 tst.b 3+SRC_HI(%a0) # is result inexact? 20547 bne.b fout_sgl_ovfl_inex2 20548 tst.l SRC_LO(%a0) # is result inexact? 20549 bne.b fout_sgl_ovfl_inex2 20550 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 20551 bra.b fout_sgl_ovfl_cont 20552fout_sgl_ovfl_inex2: 20553 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2 20554 20555fout_sgl_ovfl_cont: 20556 mov.l %a0,-(%sp) 20557 20558# call ovf_res() w/ sgl prec and the correct rnd mode to create the default 20559# overflow result. DON'T save the returned ccodes from ovf_res() since 20560# fmove out doesn't alter them. 20561 tst.b SRC_EX(%a0) # is operand negative? 20562 smi %d1 # set if so 20563 mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode 20564 bsr.l ovf_res # calc OVFL result 20565 fmovm.x (%a0),&0x80 # load default overflow result 20566 fmov.s %fp0,%d0 # store to single 20567 20568 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 20569 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 20570 beq.b fout_sgl_ovfl_dn # must save to integer regfile 20571 20572 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 20573 bsr.l _dmem_write_long # write long 20574 20575 tst.l %d1 # did dstore fail? 20576 bne.l facc_out_l # yes 20577 20578 bra.b fout_sgl_ovfl_chkexc 20579 20580fout_sgl_ovfl_dn: 20581 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 20582 andi.w &0x7,%d1 20583 bsr.l store_dreg_l 20584 20585fout_sgl_ovfl_chkexc: 20586 mov.b FPCR_ENABLE(%a6),%d1 20587 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 20588 bne.w fout_sd_exc_ovfl # yes 20589 addq.l &0x4,%sp 20590 rts 20591 20592# 20593# move out MAY overflow: 20594# (1) force the exp to 0x3fff 20595# (2) do a move w/ appropriate rnd mode 20596# (3) if exp still equals zero, then insert original exponent 20597# for the correct result. 20598# if exp now equals one, then it overflowed so call ovf_res. 20599# 20600fout_sgl_may_ovfl: 20601 mov.w SRC_EX(%a0),%d1 # fetch current sign 20602 andi.w &0x8000,%d1 # keep it,clear exp 20603 ori.w &0x3fff,%d1 # insert exp = 0 20604 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp 20605 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man) 20606 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man) 20607 20608 fmov.l L_SCR3(%a6),%fpcr # set FPCR 20609 20610 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded 20611 fmov.l &0x0,%fpcr # clear FPCR 20612 20613 fabs.x %fp0 # need absolute value 20614 fcmp.b %fp0,&0x2 # did exponent increase? 20615 fblt.w fout_sgl_exg # no; go finish NORM 20616 bra.w fout_sgl_ovfl # yes; go handle overflow 20617 20618################ 20619 20620fout_sd_exc_unfl: 20621 mov.l (%sp)+,%a0 20622 20623 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 20624 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 20625 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 20626 20627 cmpi.b STAG(%a6),&DENORM # was src a DENORM? 20628 bne.b fout_sd_exc_cont # no 20629 20630 lea FP_SCR0(%a6),%a0 20631 bsr.l norm 20632 neg.l %d0 20633 andi.w &0x7fff,%d0 20634 bfins %d0,FP_SCR0_EX(%a6){&1:&15} 20635 bra.b fout_sd_exc_cont 20636 20637fout_sd_exc: 20638fout_sd_exc_ovfl: 20639 mov.l (%sp)+,%a0 # restore a0 20640 20641 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 20642 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 20643 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 20644 20645fout_sd_exc_cont: 20646 bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit 20647 sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit 20648 lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM 20649 20650 mov.b 3+L_SCR3(%a6),%d1 20651 lsr.b &0x4,%d1 20652 andi.w &0x0c,%d1 20653 swap %d1 20654 mov.b 3+L_SCR3(%a6),%d1 20655 lsr.b &0x4,%d1 20656 andi.w &0x03,%d1 20657 clr.l %d0 # pass: zero g,r,s 20658 bsr.l _round # round the DENORM 20659 20660 tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative? 20661 beq.b fout_sd_exc_done # no 20662 bset &0x7,FP_SCR0_EX(%a6) # yes 20663 20664fout_sd_exc_done: 20665 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 20666 rts 20667 20668################################################################# 20669# fmove.d out ################################################### 20670################################################################# 20671fout_dbl: 20672 andi.b &0x30,%d0 # clear rnd prec 20673 ori.b &d_mode*0x10,%d0 # insert dbl prec 20674 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack 20675 20676# 20677# operand is a normalized number. first, we check to see if the move out 20678# would cause either an underflow or overflow. these cases are handled 20679# separately. otherwise, set the FPCR to the proper rounding mode and 20680# execute the move. 20681# 20682 mov.w SRC_EX(%a0),%d0 # extract exponent 20683 andi.w &0x7fff,%d0 # strip sign 20684 20685 cmpi.w %d0,&DBL_HI # will operand overflow? 20686 bgt.w fout_dbl_ovfl # yes; go handle OVFL 20687 beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL 20688 cmpi.w %d0,&DBL_LO # will operand underflow? 20689 blt.w fout_dbl_unfl # yes; go handle underflow 20690 20691# 20692# NORMs(in range) can be stored out by a simple "fmov.d" 20693# Unnormalized inputs can come through this point. 20694# 20695fout_dbl_exg: 20696 fmovm.x SRC(%a0),&0x80 # fetch fop from stack 20697 20698 fmov.l L_SCR3(%a6),%fpcr # set FPCR 20699 fmov.l &0x0,%fpsr # clear FPSR 20700 20701 fmov.d %fp0,L_SCR1(%a6) # store does convert and round 20702 20703 fmov.l &0x0,%fpcr # clear FPCR 20704 fmov.l %fpsr,%d0 # save FPSR 20705 20706 or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex 20707 20708 mov.l EXC_EA(%a6),%a1 # pass: dst addr 20709 lea L_SCR1(%a6),%a0 # pass: src addr 20710 movq.l &0x8,%d0 # pass: opsize is 8 bytes 20711 bsr.l _dmem_write # store dbl fop to memory 20712 20713 tst.l %d1 # did dstore fail? 20714 bne.l facc_out_d # yes 20715 20716 rts # no; so we're finished 20717 20718# 20719# here, we know that the operand would UNFL if moved out to double prec, 20720# so, denorm and round and then use generic store double routine to 20721# write the value to memory. 20722# 20723fout_dbl_unfl: 20724 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL 20725 20726 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 20727 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 20728 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 20729 mov.l %a0,-(%sp) 20730 20731 clr.l %d0 # pass: S.F. = 0 20732 20733 cmpi.b STAG(%a6),&DENORM # fetch src optype tag 20734 bne.b fout_dbl_unfl_cont # let DENORMs fall through 20735 20736 lea FP_SCR0(%a6),%a0 20737 bsr.l norm # normalize the DENORM 20738 20739fout_dbl_unfl_cont: 20740 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 20741 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 20742 bsr.l unf_res # calc default underflow result 20743 20744 lea FP_SCR0(%a6),%a0 # pass: ptr to fop 20745 bsr.l dst_dbl # convert to single prec 20746 mov.l %d0,L_SCR1(%a6) 20747 mov.l %d1,L_SCR2(%a6) 20748 20749 mov.l EXC_EA(%a6),%a1 # pass: dst addr 20750 lea L_SCR1(%a6),%a0 # pass: src addr 20751 movq.l &0x8,%d0 # pass: opsize is 8 bytes 20752 bsr.l _dmem_write # store dbl fop to memory 20753 20754 tst.l %d1 # did dstore fail? 20755 bne.l facc_out_d # yes 20756 20757 mov.b FPCR_ENABLE(%a6),%d1 20758 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 20759 bne.w fout_sd_exc_unfl # yes 20760 addq.l &0x4,%sp 20761 rts 20762 20763# 20764# it's definitely an overflow so call ovf_res to get the correct answer 20765# 20766fout_dbl_ovfl: 20767 mov.w 2+SRC_LO(%a0),%d0 20768 andi.w &0x7ff,%d0 20769 bne.b fout_dbl_ovfl_inex2 20770 20771 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 20772 bra.b fout_dbl_ovfl_cont 20773fout_dbl_ovfl_inex2: 20774 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2 20775 20776fout_dbl_ovfl_cont: 20777 mov.l %a0,-(%sp) 20778 20779# call ovf_res() w/ dbl prec and the correct rnd mode to create the default 20780# overflow result. DON'T save the returned ccodes from ovf_res() since 20781# fmove out doesn't alter them. 20782 tst.b SRC_EX(%a0) # is operand negative? 20783 smi %d1 # set if so 20784 mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode 20785 bsr.l ovf_res # calc OVFL result 20786 fmovm.x (%a0),&0x80 # load default overflow result 20787 fmov.d %fp0,L_SCR1(%a6) # store to double 20788 20789 mov.l EXC_EA(%a6),%a1 # pass: dst addr 20790 lea L_SCR1(%a6),%a0 # pass: src addr 20791 movq.l &0x8,%d0 # pass: opsize is 8 bytes 20792 bsr.l _dmem_write # store dbl fop to memory 20793 20794 tst.l %d1 # did dstore fail? 20795 bne.l facc_out_d # yes 20796 20797 mov.b FPCR_ENABLE(%a6),%d1 20798 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 20799 bne.w fout_sd_exc_ovfl # yes 20800 addq.l &0x4,%sp 20801 rts 20802 20803# 20804# move out MAY overflow: 20805# (1) force the exp to 0x3fff 20806# (2) do a move w/ appropriate rnd mode 20807# (3) if exp still equals zero, then insert original exponent 20808# for the correct result. 20809# if exp now equals one, then it overflowed so call ovf_res. 20810# 20811fout_dbl_may_ovfl: 20812 mov.w SRC_EX(%a0),%d1 # fetch current sign 20813 andi.w &0x8000,%d1 # keep it,clear exp 20814 ori.w &0x3fff,%d1 # insert exp = 0 20815 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp 20816 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man) 20817 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man) 20818 20819 fmov.l L_SCR3(%a6),%fpcr # set FPCR 20820 20821 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded 20822 fmov.l &0x0,%fpcr # clear FPCR 20823 20824 fabs.x %fp0 # need absolute value 20825 fcmp.b %fp0,&0x2 # did exponent increase? 20826 fblt.w fout_dbl_exg # no; go finish NORM 20827 bra.w fout_dbl_ovfl # yes; go handle overflow 20828 20829######################################################################### 20830# XDEF **************************************************************** # 20831# dst_dbl(): create double precision value from extended prec. # 20832# # 20833# XREF **************************************************************** # 20834# None # 20835# # 20836# INPUT *************************************************************** # 20837# a0 = pointer to source operand in extended precision # 20838# # 20839# OUTPUT ************************************************************** # 20840# d0 = hi(double precision result) # 20841# d1 = lo(double precision result) # 20842# # 20843# ALGORITHM *********************************************************** # 20844# # 20845# Changes extended precision to double precision. # 20846# Note: no attempt is made to round the extended value to double. # 20847# dbl_sign = ext_sign # 20848# dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) # 20849# get rid of ext integer bit # 20850# dbl_mant = ext_mant{62:12} # 20851# # 20852# --------------- --------------- --------------- # 20853# extended -> |s| exp | |1| ms mant | | ls mant | # 20854# --------------- --------------- --------------- # 20855# 95 64 63 62 32 31 11 0 # 20856# | | # 20857# | | # 20858# | | # 20859# v v # 20860# --------------- --------------- # 20861# double -> |s|exp| mant | | mant | # 20862# --------------- --------------- # 20863# 63 51 32 31 0 # 20864# # 20865######################################################################### 20866 20867dst_dbl: 20868 clr.l %d0 # clear d0 20869 mov.w FTEMP_EX(%a0),%d0 # get exponent 20870 subi.w &EXT_BIAS,%d0 # subtract extended precision bias 20871 addi.w &DBL_BIAS,%d0 # add double precision bias 20872 tst.b FTEMP_HI(%a0) # is number a denorm? 20873 bmi.b dst_get_dupper # no 20874 subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1 20875dst_get_dupper: 20876 swap %d0 # d0 now in upper word 20877 lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp 20878 tst.b FTEMP_EX(%a0) # test sign 20879 bpl.b dst_get_dman # if positive, go process mantissa 20880 bset &0x1f,%d0 # if negative, set sign 20881dst_get_dman: 20882 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 20883 bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms 20884 or.l %d1,%d0 # put these bits in ms word of double 20885 mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack 20886 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 20887 mov.l &21,%d0 # load shift count 20888 lsl.l %d0,%d1 # put lower 11 bits in upper bits 20889 mov.l %d1,L_SCR2(%a6) # build lower lword in memory 20890 mov.l FTEMP_LO(%a0),%d1 # get ls mantissa 20891 bfextu %d1{&0:&21},%d0 # get ls 21 bits of double 20892 mov.l L_SCR2(%a6),%d1 20893 or.l %d0,%d1 # put them in double result 20894 mov.l L_SCR1(%a6),%d0 20895 rts 20896 20897######################################################################### 20898# XDEF **************************************************************** # 20899# dst_sgl(): create single precision value from extended prec # 20900# # 20901# XREF **************************************************************** # 20902# # 20903# INPUT *************************************************************** # 20904# a0 = pointer to source operand in extended precision # 20905# # 20906# OUTPUT ************************************************************** # 20907# d0 = single precision result # 20908# # 20909# ALGORITHM *********************************************************** # 20910# # 20911# Changes extended precision to single precision. # 20912# sgl_sign = ext_sign # 20913# sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) # 20914# get rid of ext integer bit # 20915# sgl_mant = ext_mant{62:12} # 20916# # 20917# --------------- --------------- --------------- # 20918# extended -> |s| exp | |1| ms mant | | ls mant | # 20919# --------------- --------------- --------------- # 20920# 95 64 63 62 40 32 31 12 0 # 20921# | | # 20922# | | # 20923# | | # 20924# v v # 20925# --------------- # 20926# single -> |s|exp| mant | # 20927# --------------- # 20928# 31 22 0 # 20929# # 20930######################################################################### 20931 20932dst_sgl: 20933 clr.l %d0 20934 mov.w FTEMP_EX(%a0),%d0 # get exponent 20935 subi.w &EXT_BIAS,%d0 # subtract extended precision bias 20936 addi.w &SGL_BIAS,%d0 # add single precision bias 20937 tst.b FTEMP_HI(%a0) # is number a denorm? 20938 bmi.b dst_get_supper # no 20939 subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1 20940dst_get_supper: 20941 swap %d0 # put exp in upper word of d0 20942 lsl.l &0x7,%d0 # shift it into single exp bits 20943 tst.b FTEMP_EX(%a0) # test sign 20944 bpl.b dst_get_sman # if positive, continue 20945 bset &0x1f,%d0 # if negative, put in sign first 20946dst_get_sman: 20947 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 20948 andi.l &0x7fffff00,%d1 # get upper 23 bits of ms 20949 lsr.l &0x8,%d1 # and put them flush right 20950 or.l %d1,%d0 # put these bits in ms word of single 20951 rts 20952 20953############################################################################## 20954fout_pack: 20955 bsr.l _calc_ea_fout # fetch the <ea> 20956 mov.l %a0,-(%sp) 20957 20958 mov.b STAG(%a6),%d0 # fetch input type 20959 bne.w fout_pack_not_norm # input is not NORM 20960 20961fout_pack_norm: 20962 btst &0x4,EXC_CMDREG(%a6) # static or dynamic? 20963 beq.b fout_pack_s # static 20964 20965fout_pack_d: 20966 mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg 20967 lsr.b &0x4,%d1 20968 andi.w &0x7,%d1 20969 20970 bsr.l fetch_dreg # fetch Dn w/ k-factor 20971 20972 bra.b fout_pack_type 20973fout_pack_s: 20974 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field 20975 20976fout_pack_type: 20977 bfexts %d0{&25:&7},%d0 # extract k-factor 20978 mov.l %d0,-(%sp) 20979 20980 lea FP_SRC(%a6),%a0 # pass: ptr to input 20981 20982# bindec is currently scrambling FP_SRC for denorm inputs. 20983# we'll have to change this, but for now, tough luck!!! 20984 bsr.l bindec # convert xprec to packed 20985 20986# andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields 20987 andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields 20988 20989 mov.l (%sp)+,%d0 20990 20991 tst.b 3+FP_SCR0_EX(%a6) 20992 bne.b fout_pack_set 20993 tst.l FP_SCR0_HI(%a6) 20994 bne.b fout_pack_set 20995 tst.l FP_SCR0_LO(%a6) 20996 bne.b fout_pack_set 20997 20998# add the extra condition that only if the k-factor was zero, too, should 20999# we zero the exponent 21000 tst.l %d0 21001 bne.b fout_pack_set 21002# "mantissa" is all zero which means that the answer is zero. but, the '040 21003# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore, 21004# if the mantissa is zero, I will zero the exponent, too. 21005# the question now is whether the exponents sign bit is allowed to be non-zero 21006# for a zero, also... 21007 andi.w &0xf000,FP_SCR0(%a6) 21008 21009fout_pack_set: 21010 21011 lea FP_SCR0(%a6),%a0 # pass: src addr 21012 21013fout_pack_write: 21014 mov.l (%sp)+,%a1 # pass: dst addr 21015 mov.l &0xc,%d0 # pass: opsize is 12 bytes 21016 21017 cmpi.b SPCOND_FLG(%a6),&mda7_flg 21018 beq.b fout_pack_a7 21019 21020 bsr.l _dmem_write # write ext prec number to memory 21021 21022 tst.l %d1 # did dstore fail? 21023 bne.w fout_ext_err # yes 21024 21025 rts 21026 21027# we don't want to do the write if the exception occurred in supervisor mode 21028# so _mem_write2() handles this for us. 21029fout_pack_a7: 21030 bsr.l _mem_write2 # write ext prec number to memory 21031 21032 tst.l %d1 # did dstore fail? 21033 bne.w fout_ext_err # yes 21034 21035 rts 21036 21037fout_pack_not_norm: 21038 cmpi.b %d0,&DENORM # is it a DENORM? 21039 beq.w fout_pack_norm # yes 21040 lea FP_SRC(%a6),%a0 21041 clr.w 2+FP_SRC_EX(%a6) 21042 cmpi.b %d0,&SNAN # is it an SNAN? 21043 beq.b fout_pack_snan # yes 21044 bra.b fout_pack_write # no 21045 21046fout_pack_snan: 21047 ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP 21048 bset &0x6,FP_SRC_HI(%a6) # set snan bit 21049 bra.b fout_pack_write 21050 21051######################################################################### 21052# XDEF **************************************************************** # 21053# fetch_dreg(): fetch register according to index in d1 # 21054# # 21055# XREF **************************************************************** # 21056# None # 21057# # 21058# INPUT *************************************************************** # 21059# d1 = index of register to fetch from # 21060# # 21061# OUTPUT ************************************************************** # 21062# d0 = value of register fetched # 21063# # 21064# ALGORITHM *********************************************************** # 21065# According to the index value in d1 which can range from zero # 21066# to fifteen, load the corresponding register file value (where # 21067# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the # 21068# stack. The rest should still be in their original places. # 21069# # 21070######################################################################### 21071 21072# this routine leaves d1 intact for subsequent store_dreg calls. 21073 global fetch_dreg 21074fetch_dreg: 21075 mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0 21076 jmp (tbl_fdreg.b,%pc,%d0.w*1) 21077 21078tbl_fdreg: 21079 short fdreg0 - tbl_fdreg 21080 short fdreg1 - tbl_fdreg 21081 short fdreg2 - tbl_fdreg 21082 short fdreg3 - tbl_fdreg 21083 short fdreg4 - tbl_fdreg 21084 short fdreg5 - tbl_fdreg 21085 short fdreg6 - tbl_fdreg 21086 short fdreg7 - tbl_fdreg 21087 short fdreg8 - tbl_fdreg 21088 short fdreg9 - tbl_fdreg 21089 short fdrega - tbl_fdreg 21090 short fdregb - tbl_fdreg 21091 short fdregc - tbl_fdreg 21092 short fdregd - tbl_fdreg 21093 short fdrege - tbl_fdreg 21094 short fdregf - tbl_fdreg 21095 21096fdreg0: 21097 mov.l EXC_DREGS+0x0(%a6),%d0 21098 rts 21099fdreg1: 21100 mov.l EXC_DREGS+0x4(%a6),%d0 21101 rts 21102fdreg2: 21103 mov.l %d2,%d0 21104 rts 21105fdreg3: 21106 mov.l %d3,%d0 21107 rts 21108fdreg4: 21109 mov.l %d4,%d0 21110 rts 21111fdreg5: 21112 mov.l %d5,%d0 21113 rts 21114fdreg6: 21115 mov.l %d6,%d0 21116 rts 21117fdreg7: 21118 mov.l %d7,%d0 21119 rts 21120fdreg8: 21121 mov.l EXC_DREGS+0x8(%a6),%d0 21122 rts 21123fdreg9: 21124 mov.l EXC_DREGS+0xc(%a6),%d0 21125 rts 21126fdrega: 21127 mov.l %a2,%d0 21128 rts 21129fdregb: 21130 mov.l %a3,%d0 21131 rts 21132fdregc: 21133 mov.l %a4,%d0 21134 rts 21135fdregd: 21136 mov.l %a5,%d0 21137 rts 21138fdrege: 21139 mov.l (%a6),%d0 21140 rts 21141fdregf: 21142 mov.l EXC_A7(%a6),%d0 21143 rts 21144 21145######################################################################### 21146# XDEF **************************************************************** # 21147# store_dreg_l(): store longword to data register specified by d1 # 21148# # 21149# XREF **************************************************************** # 21150# None # 21151# # 21152# INPUT *************************************************************** # 21153# d0 = longowrd value to store # 21154# d1 = index of register to fetch from # 21155# # 21156# OUTPUT ************************************************************** # 21157# (data register is updated) # 21158# # 21159# ALGORITHM *********************************************************** # 21160# According to the index value in d1, store the longword value # 21161# in d0 to the corresponding data register. D0/D1 are on the stack # 21162# while the rest are in their initial places. # 21163# # 21164######################################################################### 21165 21166 global store_dreg_l 21167store_dreg_l: 21168 mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1 21169 jmp (tbl_sdregl.b,%pc,%d1.w*1) 21170 21171tbl_sdregl: 21172 short sdregl0 - tbl_sdregl 21173 short sdregl1 - tbl_sdregl 21174 short sdregl2 - tbl_sdregl 21175 short sdregl3 - tbl_sdregl 21176 short sdregl4 - tbl_sdregl 21177 short sdregl5 - tbl_sdregl 21178 short sdregl6 - tbl_sdregl 21179 short sdregl7 - tbl_sdregl 21180 21181sdregl0: 21182 mov.l %d0,EXC_DREGS+0x0(%a6) 21183 rts 21184sdregl1: 21185 mov.l %d0,EXC_DREGS+0x4(%a6) 21186 rts 21187sdregl2: 21188 mov.l %d0,%d2 21189 rts 21190sdregl3: 21191 mov.l %d0,%d3 21192 rts 21193sdregl4: 21194 mov.l %d0,%d4 21195 rts 21196sdregl5: 21197 mov.l %d0,%d5 21198 rts 21199sdregl6: 21200 mov.l %d0,%d6 21201 rts 21202sdregl7: 21203 mov.l %d0,%d7 21204 rts 21205 21206######################################################################### 21207# XDEF **************************************************************** # 21208# store_dreg_w(): store word to data register specified by d1 # 21209# # 21210# XREF **************************************************************** # 21211# None # 21212# # 21213# INPUT *************************************************************** # 21214# d0 = word value to store # 21215# d1 = index of register to fetch from # 21216# # 21217# OUTPUT ************************************************************** # 21218# (data register is updated) # 21219# # 21220# ALGORITHM *********************************************************** # 21221# According to the index value in d1, store the word value # 21222# in d0 to the corresponding data register. D0/D1 are on the stack # 21223# while the rest are in their initial places. # 21224# # 21225######################################################################### 21226 21227 global store_dreg_w 21228store_dreg_w: 21229 mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1 21230 jmp (tbl_sdregw.b,%pc,%d1.w*1) 21231 21232tbl_sdregw: 21233 short sdregw0 - tbl_sdregw 21234 short sdregw1 - tbl_sdregw 21235 short sdregw2 - tbl_sdregw 21236 short sdregw3 - tbl_sdregw 21237 short sdregw4 - tbl_sdregw 21238 short sdregw5 - tbl_sdregw 21239 short sdregw6 - tbl_sdregw 21240 short sdregw7 - tbl_sdregw 21241 21242sdregw0: 21243 mov.w %d0,2+EXC_DREGS+0x0(%a6) 21244 rts 21245sdregw1: 21246 mov.w %d0,2+EXC_DREGS+0x4(%a6) 21247 rts 21248sdregw2: 21249 mov.w %d0,%d2 21250 rts 21251sdregw3: 21252 mov.w %d0,%d3 21253 rts 21254sdregw4: 21255 mov.w %d0,%d4 21256 rts 21257sdregw5: 21258 mov.w %d0,%d5 21259 rts 21260sdregw6: 21261 mov.w %d0,%d6 21262 rts 21263sdregw7: 21264 mov.w %d0,%d7 21265 rts 21266 21267######################################################################### 21268# XDEF **************************************************************** # 21269# store_dreg_b(): store byte to data register specified by d1 # 21270# # 21271# XREF **************************************************************** # 21272# None # 21273# # 21274# INPUT *************************************************************** # 21275# d0 = byte value to store # 21276# d1 = index of register to fetch from # 21277# # 21278# OUTPUT ************************************************************** # 21279# (data register is updated) # 21280# # 21281# ALGORITHM *********************************************************** # 21282# According to the index value in d1, store the byte value # 21283# in d0 to the corresponding data register. D0/D1 are on the stack # 21284# while the rest are in their initial places. # 21285# # 21286######################################################################### 21287 21288 global store_dreg_b 21289store_dreg_b: 21290 mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1 21291 jmp (tbl_sdregb.b,%pc,%d1.w*1) 21292 21293tbl_sdregb: 21294 short sdregb0 - tbl_sdregb 21295 short sdregb1 - tbl_sdregb 21296 short sdregb2 - tbl_sdregb 21297 short sdregb3 - tbl_sdregb 21298 short sdregb4 - tbl_sdregb 21299 short sdregb5 - tbl_sdregb 21300 short sdregb6 - tbl_sdregb 21301 short sdregb7 - tbl_sdregb 21302 21303sdregb0: 21304 mov.b %d0,3+EXC_DREGS+0x0(%a6) 21305 rts 21306sdregb1: 21307 mov.b %d0,3+EXC_DREGS+0x4(%a6) 21308 rts 21309sdregb2: 21310 mov.b %d0,%d2 21311 rts 21312sdregb3: 21313 mov.b %d0,%d3 21314 rts 21315sdregb4: 21316 mov.b %d0,%d4 21317 rts 21318sdregb5: 21319 mov.b %d0,%d5 21320 rts 21321sdregb6: 21322 mov.b %d0,%d6 21323 rts 21324sdregb7: 21325 mov.b %d0,%d7 21326 rts 21327 21328######################################################################### 21329# XDEF **************************************************************** # 21330# inc_areg(): increment an address register by the value in d0 # 21331# # 21332# XREF **************************************************************** # 21333# None # 21334# # 21335# INPUT *************************************************************** # 21336# d0 = amount to increment by # 21337# d1 = index of address register to increment # 21338# # 21339# OUTPUT ************************************************************** # 21340# (address register is updated) # 21341# # 21342# ALGORITHM *********************************************************** # 21343# Typically used for an instruction w/ a post-increment <ea>, # 21344# this routine adds the increment value in d0 to the address register # 21345# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside # 21346# in their original places. # 21347# For a7, if the increment amount is one, then we have to # 21348# increment by two. For any a7 update, set the mia7_flag so that if # 21349# an access error exception occurs later in emulation, this address # 21350# register update can be undone. # 21351# # 21352######################################################################### 21353 21354 global inc_areg 21355inc_areg: 21356 mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1 21357 jmp (tbl_iareg.b,%pc,%d1.w*1) 21358 21359tbl_iareg: 21360 short iareg0 - tbl_iareg 21361 short iareg1 - tbl_iareg 21362 short iareg2 - tbl_iareg 21363 short iareg3 - tbl_iareg 21364 short iareg4 - tbl_iareg 21365 short iareg5 - tbl_iareg 21366 short iareg6 - tbl_iareg 21367 short iareg7 - tbl_iareg 21368 21369iareg0: add.l %d0,EXC_DREGS+0x8(%a6) 21370 rts 21371iareg1: add.l %d0,EXC_DREGS+0xc(%a6) 21372 rts 21373iareg2: add.l %d0,%a2 21374 rts 21375iareg3: add.l %d0,%a3 21376 rts 21377iareg4: add.l %d0,%a4 21378 rts 21379iareg5: add.l %d0,%a5 21380 rts 21381iareg6: add.l %d0,(%a6) 21382 rts 21383iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6) 21384 cmpi.b %d0,&0x1 21385 beq.b iareg7b 21386 add.l %d0,EXC_A7(%a6) 21387 rts 21388iareg7b: 21389 addq.l &0x2,EXC_A7(%a6) 21390 rts 21391 21392######################################################################### 21393# XDEF **************************************************************** # 21394# dec_areg(): decrement an address register by the value in d0 # 21395# # 21396# XREF **************************************************************** # 21397# None # 21398# # 21399# INPUT *************************************************************** # 21400# d0 = amount to decrement by # 21401# d1 = index of address register to decrement # 21402# # 21403# OUTPUT ************************************************************** # 21404# (address register is updated) # 21405# # 21406# ALGORITHM *********************************************************** # 21407# Typically used for an instruction w/ a pre-decrement <ea>, # 21408# this routine adds the decrement value in d0 to the address register # 21409# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside # 21410# in their original places. # 21411# For a7, if the decrement amount is one, then we have to # 21412# decrement by two. For any a7 update, set the mda7_flag so that if # 21413# an access error exception occurs later in emulation, this address # 21414# register update can be undone. # 21415# # 21416######################################################################### 21417 21418 global dec_areg 21419dec_areg: 21420 mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1 21421 jmp (tbl_dareg.b,%pc,%d1.w*1) 21422 21423tbl_dareg: 21424 short dareg0 - tbl_dareg 21425 short dareg1 - tbl_dareg 21426 short dareg2 - tbl_dareg 21427 short dareg3 - tbl_dareg 21428 short dareg4 - tbl_dareg 21429 short dareg5 - tbl_dareg 21430 short dareg6 - tbl_dareg 21431 short dareg7 - tbl_dareg 21432 21433dareg0: sub.l %d0,EXC_DREGS+0x8(%a6) 21434 rts 21435dareg1: sub.l %d0,EXC_DREGS+0xc(%a6) 21436 rts 21437dareg2: sub.l %d0,%a2 21438 rts 21439dareg3: sub.l %d0,%a3 21440 rts 21441dareg4: sub.l %d0,%a4 21442 rts 21443dareg5: sub.l %d0,%a5 21444 rts 21445dareg6: sub.l %d0,(%a6) 21446 rts 21447dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6) 21448 cmpi.b %d0,&0x1 21449 beq.b dareg7b 21450 sub.l %d0,EXC_A7(%a6) 21451 rts 21452dareg7b: 21453 subq.l &0x2,EXC_A7(%a6) 21454 rts 21455 21456############################################################################## 21457 21458######################################################################### 21459# XDEF **************************************************************** # 21460# load_fpn1(): load FP register value into FP_SRC(a6). # 21461# # 21462# XREF **************************************************************** # 21463# None # 21464# # 21465# INPUT *************************************************************** # 21466# d0 = index of FP register to load # 21467# # 21468# OUTPUT ************************************************************** # 21469# FP_SRC(a6) = value loaded from FP register file # 21470# # 21471# ALGORITHM *********************************************************** # 21472# Using the index in d0, load FP_SRC(a6) with a number from the # 21473# FP register file. # 21474# # 21475######################################################################### 21476 21477 global load_fpn1 21478load_fpn1: 21479 mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0 21480 jmp (tbl_load_fpn1.b,%pc,%d0.w*1) 21481 21482tbl_load_fpn1: 21483 short load_fpn1_0 - tbl_load_fpn1 21484 short load_fpn1_1 - tbl_load_fpn1 21485 short load_fpn1_2 - tbl_load_fpn1 21486 short load_fpn1_3 - tbl_load_fpn1 21487 short load_fpn1_4 - tbl_load_fpn1 21488 short load_fpn1_5 - tbl_load_fpn1 21489 short load_fpn1_6 - tbl_load_fpn1 21490 short load_fpn1_7 - tbl_load_fpn1 21491 21492load_fpn1_0: 21493 mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6) 21494 mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6) 21495 mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6) 21496 lea FP_SRC(%a6), %a0 21497 rts 21498load_fpn1_1: 21499 mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6) 21500 mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6) 21501 mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6) 21502 lea FP_SRC(%a6), %a0 21503 rts 21504load_fpn1_2: 21505 fmovm.x &0x20, FP_SRC(%a6) 21506 lea FP_SRC(%a6), %a0 21507 rts 21508load_fpn1_3: 21509 fmovm.x &0x10, FP_SRC(%a6) 21510 lea FP_SRC(%a6), %a0 21511 rts 21512load_fpn1_4: 21513 fmovm.x &0x08, FP_SRC(%a6) 21514 lea FP_SRC(%a6), %a0 21515 rts 21516load_fpn1_5: 21517 fmovm.x &0x04, FP_SRC(%a6) 21518 lea FP_SRC(%a6), %a0 21519 rts 21520load_fpn1_6: 21521 fmovm.x &0x02, FP_SRC(%a6) 21522 lea FP_SRC(%a6), %a0 21523 rts 21524load_fpn1_7: 21525 fmovm.x &0x01, FP_SRC(%a6) 21526 lea FP_SRC(%a6), %a0 21527 rts 21528 21529############################################################################# 21530 21531######################################################################### 21532# XDEF **************************************************************** # 21533# load_fpn2(): load FP register value into FP_DST(a6). # 21534# # 21535# XREF **************************************************************** # 21536# None # 21537# # 21538# INPUT *************************************************************** # 21539# d0 = index of FP register to load # 21540# # 21541# OUTPUT ************************************************************** # 21542# FP_DST(a6) = value loaded from FP register file # 21543# # 21544# ALGORITHM *********************************************************** # 21545# Using the index in d0, load FP_DST(a6) with a number from the # 21546# FP register file. # 21547# # 21548######################################################################### 21549 21550 global load_fpn2 21551load_fpn2: 21552 mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0 21553 jmp (tbl_load_fpn2.b,%pc,%d0.w*1) 21554 21555tbl_load_fpn2: 21556 short load_fpn2_0 - tbl_load_fpn2 21557 short load_fpn2_1 - tbl_load_fpn2 21558 short load_fpn2_2 - tbl_load_fpn2 21559 short load_fpn2_3 - tbl_load_fpn2 21560 short load_fpn2_4 - tbl_load_fpn2 21561 short load_fpn2_5 - tbl_load_fpn2 21562 short load_fpn2_6 - tbl_load_fpn2 21563 short load_fpn2_7 - tbl_load_fpn2 21564 21565load_fpn2_0: 21566 mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6) 21567 mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6) 21568 mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6) 21569 lea FP_DST(%a6), %a0 21570 rts 21571load_fpn2_1: 21572 mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6) 21573 mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6) 21574 mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6) 21575 lea FP_DST(%a6), %a0 21576 rts 21577load_fpn2_2: 21578 fmovm.x &0x20, FP_DST(%a6) 21579 lea FP_DST(%a6), %a0 21580 rts 21581load_fpn2_3: 21582 fmovm.x &0x10, FP_DST(%a6) 21583 lea FP_DST(%a6), %a0 21584 rts 21585load_fpn2_4: 21586 fmovm.x &0x08, FP_DST(%a6) 21587 lea FP_DST(%a6), %a0 21588 rts 21589load_fpn2_5: 21590 fmovm.x &0x04, FP_DST(%a6) 21591 lea FP_DST(%a6), %a0 21592 rts 21593load_fpn2_6: 21594 fmovm.x &0x02, FP_DST(%a6) 21595 lea FP_DST(%a6), %a0 21596 rts 21597load_fpn2_7: 21598 fmovm.x &0x01, FP_DST(%a6) 21599 lea FP_DST(%a6), %a0 21600 rts 21601 21602############################################################################# 21603 21604######################################################################### 21605# XDEF **************************************************************** # 21606# store_fpreg(): store an fp value to the fpreg designated d0. # 21607# # 21608# XREF **************************************************************** # 21609# None # 21610# # 21611# INPUT *************************************************************** # 21612# fp0 = extended precision value to store # 21613# d0 = index of floating-point register # 21614# # 21615# OUTPUT ************************************************************** # 21616# None # 21617# # 21618# ALGORITHM *********************************************************** # 21619# Store the value in fp0 to the FP register designated by the # 21620# value in d0. The FP number can be DENORM or SNAN so we have to be # 21621# careful that we don't take an exception here. # 21622# # 21623######################################################################### 21624 21625 global store_fpreg 21626store_fpreg: 21627 mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0 21628 jmp (tbl_store_fpreg.b,%pc,%d0.w*1) 21629 21630tbl_store_fpreg: 21631 short store_fpreg_0 - tbl_store_fpreg 21632 short store_fpreg_1 - tbl_store_fpreg 21633 short store_fpreg_2 - tbl_store_fpreg 21634 short store_fpreg_3 - tbl_store_fpreg 21635 short store_fpreg_4 - tbl_store_fpreg 21636 short store_fpreg_5 - tbl_store_fpreg 21637 short store_fpreg_6 - tbl_store_fpreg 21638 short store_fpreg_7 - tbl_store_fpreg 21639 21640store_fpreg_0: 21641 fmovm.x &0x80, EXC_FP0(%a6) 21642 rts 21643store_fpreg_1: 21644 fmovm.x &0x80, EXC_FP1(%a6) 21645 rts 21646store_fpreg_2: 21647 fmovm.x &0x01, -(%sp) 21648 fmovm.x (%sp)+, &0x20 21649 rts 21650store_fpreg_3: 21651 fmovm.x &0x01, -(%sp) 21652 fmovm.x (%sp)+, &0x10 21653 rts 21654store_fpreg_4: 21655 fmovm.x &0x01, -(%sp) 21656 fmovm.x (%sp)+, &0x08 21657 rts 21658store_fpreg_5: 21659 fmovm.x &0x01, -(%sp) 21660 fmovm.x (%sp)+, &0x04 21661 rts 21662store_fpreg_6: 21663 fmovm.x &0x01, -(%sp) 21664 fmovm.x (%sp)+, &0x02 21665 rts 21666store_fpreg_7: 21667 fmovm.x &0x01, -(%sp) 21668 fmovm.x (%sp)+, &0x01 21669 rts 21670 21671######################################################################### 21672# XDEF **************************************************************** # 21673# _denorm(): denormalize an intermediate result # 21674# # 21675# XREF **************************************************************** # 21676# None # 21677# # 21678# INPUT *************************************************************** # 21679# a0 = points to the operand to be denormalized # 21680# (in the internal extended format) # 21681# # 21682# d0 = rounding precision # 21683# # 21684# OUTPUT ************************************************************** # 21685# a0 = pointer to the denormalized result # 21686# (in the internal extended format) # 21687# # 21688# d0 = guard,round,sticky # 21689# # 21690# ALGORITHM *********************************************************** # 21691# According to the exponent underflow threshold for the given # 21692# precision, shift the mantissa bits to the right in order raise the # 21693# exponent of the operand to the threshold value. While shifting the # 21694# mantissa bits right, maintain the value of the guard, round, and # 21695# sticky bits. # 21696# other notes: # 21697# (1) _denorm() is called by the underflow routines # 21698# (2) _denorm() does NOT affect the status register # 21699# # 21700######################################################################### 21701 21702# 21703# table of exponent threshold values for each precision 21704# 21705tbl_thresh: 21706 short 0x0 21707 short sgl_thresh 21708 short dbl_thresh 21709 21710 global _denorm 21711_denorm: 21712# 21713# Load the exponent threshold for the precision selected and check 21714# to see if (threshold - exponent) is > 65 in which case we can 21715# simply calculate the sticky bit and zero the mantissa. otherwise 21716# we have to call the denormalization routine. 21717# 21718 lsr.b &0x2, %d0 # shift prec to lo bits 21719 mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold 21720 mov.w %d1, %d0 # copy d1 into d0 21721 sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp 21722 cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits) 21723 bpl.b denorm_set_stky # yes; just calc sticky 21724 21725 clr.l %d0 # clear g,r,s 21726 btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set? 21727 beq.b denorm_call # no; don't change anything 21728 bset &29, %d0 # yes; set sticky bit 21729 21730denorm_call: 21731 bsr.l dnrm_lp # denormalize the number 21732 rts 21733 21734# 21735# all bit would have been shifted off during the denorm so simply 21736# calculate if the sticky should be set and clear the entire mantissa. 21737# 21738denorm_set_stky: 21739 mov.l &0x20000000, %d0 # set sticky bit in return value 21740 mov.w %d1, FTEMP_EX(%a0) # load exp with threshold 21741 clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa) 21742 clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa) 21743 rts 21744 21745# # 21746# dnrm_lp(): normalize exponent/mantissa to specified threshold # 21747# # 21748# INPUT: # 21749# %a0 : points to the operand to be denormalized # 21750# %d0{31:29} : initial guard,round,sticky # 21751# %d1{15:0} : denormalization threshold # 21752# OUTPUT: # 21753# %a0 : points to the denormalized operand # 21754# %d0{31:29} : final guard,round,sticky # 21755# # 21756 21757# *** Local Equates *** # 21758set GRS, L_SCR2 # g,r,s temp storage 21759set FTEMP_LO2, L_SCR1 # FTEMP_LO copy 21760 21761 global dnrm_lp 21762dnrm_lp: 21763 21764# 21765# make a copy of FTEMP_LO and place the g,r,s bits directly after it 21766# in memory so as to make the bitfield extraction for denormalization easier. 21767# 21768 mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy 21769 mov.l %d0, GRS(%a6) # place g,r,s after it 21770 21771# 21772# check to see how much less than the underflow threshold the operand 21773# exponent is. 21774# 21775 mov.l %d1, %d0 # copy the denorm threshold 21776 sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent 21777 ble.b dnrm_no_lp # d1 <= 0 21778 cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ? 21779 blt.b case_1 # yes 21780 cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ? 21781 blt.b case_2 # yes 21782 bra.w case_3 # (d1 >= 64) 21783 21784# 21785# No normalization necessary 21786# 21787dnrm_no_lp: 21788 mov.l GRS(%a6), %d0 # restore original g,r,s 21789 rts 21790 21791# 21792# case (0<d1<32) 21793# 21794# %d0 = denorm threshold 21795# %d1 = "n" = amt to shift 21796# 21797# --------------------------------------------------------- 21798# | FTEMP_HI | FTEMP_LO |grs000.........000| 21799# --------------------------------------------------------- 21800# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)-> 21801# \ \ \ \ 21802# \ \ \ \ 21803# \ \ \ \ 21804# \ \ \ \ 21805# \ \ \ \ 21806# \ \ \ \ 21807# \ \ \ \ 21808# \ \ \ \ 21809# <-(n)-><-(32 - n)-><------(32)-------><------(32)-------> 21810# --------------------------------------------------------- 21811# |0.....0| NEW_HI | NEW_FTEMP_LO |grs | 21812# --------------------------------------------------------- 21813# 21814case_1: 21815 mov.l %d2, -(%sp) # create temp storage 21816 21817 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold 21818 mov.l &32, %d0 21819 sub.w %d1, %d0 # %d0 = 32 - %d1 21820 21821 cmpi.w %d1, &29 # is shft amt >= 29 21822 blt.b case1_extract # no; no fix needed 21823 mov.b GRS(%a6), %d2 21824 or.b %d2, 3+FTEMP_LO2(%a6) 21825 21826case1_extract: 21827 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI 21828 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO 21829 bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S 21830 21831 mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI 21832 mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO 21833 21834 bftst %d0{&2:&30} # were bits shifted off? 21835 beq.b case1_sticky_clear # no; go finish 21836 bset &rnd_stky_bit, %d0 # yes; set sticky bit 21837 21838case1_sticky_clear: 21839 and.l &0xe0000000, %d0 # clear all but G,R,S 21840 mov.l (%sp)+, %d2 # restore temp register 21841 rts 21842 21843# 21844# case (32<=d1<64) 21845# 21846# %d0 = denorm threshold 21847# %d1 = "n" = amt to shift 21848# 21849# --------------------------------------------------------- 21850# | FTEMP_HI | FTEMP_LO |grs000.........000| 21851# --------------------------------------------------------- 21852# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)-> 21853# \ \ \ 21854# \ \ \ 21855# \ \ ------------------- 21856# \ -------------------- \ 21857# ------------------- \ \ 21858# \ \ \ 21859# \ \ \ 21860# \ \ \ 21861# <-------(32)------><-(n)-><-(32 - n)-><------(32)-------> 21862# --------------------------------------------------------- 21863# |0...............0|0....0| NEW_LO |grs | 21864# --------------------------------------------------------- 21865# 21866case_2: 21867 mov.l %d2, -(%sp) # create temp storage 21868 21869 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold 21870 subi.w &0x20, %d1 # %d1 now between 0 and 32 21871 mov.l &0x20, %d0 21872 sub.w %d1, %d0 # %d0 = 32 - %d1 21873 21874# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize 21875# the number of bits to check for the sticky detect. 21876# it only plays a role in shift amounts of 61-63. 21877 mov.b GRS(%a6), %d2 21878 or.b %d2, 3+FTEMP_LO2(%a6) 21879 21880 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO 21881 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S 21882 21883 bftst %d1{&2:&30} # were any bits shifted off? 21884 bne.b case2_set_sticky # yes; set sticky bit 21885 bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off? 21886 bne.b case2_set_sticky # yes; set sticky bit 21887 21888 mov.l %d1, %d0 # move new G,R,S to %d0 21889 bra.b case2_end 21890 21891case2_set_sticky: 21892 mov.l %d1, %d0 # move new G,R,S to %d0 21893 bset &rnd_stky_bit, %d0 # set sticky bit 21894 21895case2_end: 21896 clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0 21897 mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO 21898 and.l &0xe0000000, %d0 # clear all but G,R,S 21899 21900 mov.l (%sp)+,%d2 # restore temp register 21901 rts 21902 21903# 21904# case (d1>=64) 21905# 21906# %d0 = denorm threshold 21907# %d1 = amt to shift 21908# 21909case_3: 21910 mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold 21911 21912 cmpi.w %d1, &65 # is shift amt > 65? 21913 blt.b case3_64 # no; it's == 64 21914 beq.b case3_65 # no; it's == 65 21915 21916# 21917# case (d1>65) 21918# 21919# Shift value is > 65 and out of range. All bits are shifted off. 21920# Return a zero mantissa with the sticky bit set 21921# 21922 clr.l FTEMP_HI(%a0) # clear hi(mantissa) 21923 clr.l FTEMP_LO(%a0) # clear lo(mantissa) 21924 mov.l &0x20000000, %d0 # set sticky bit 21925 rts 21926 21927# 21928# case (d1 == 64) 21929# 21930# --------------------------------------------------------- 21931# | FTEMP_HI | FTEMP_LO |grs000.........000| 21932# --------------------------------------------------------- 21933# <-------(32)------> 21934# \ \ 21935# \ \ 21936# \ \ 21937# \ ------------------------------ 21938# ------------------------------- \ 21939# \ \ 21940# \ \ 21941# \ \ 21942# <-------(32)------> 21943# --------------------------------------------------------- 21944# |0...............0|0................0|grs | 21945# --------------------------------------------------------- 21946# 21947case3_64: 21948 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa) 21949 mov.l %d0, %d1 # make a copy 21950 and.l &0xc0000000, %d0 # extract G,R 21951 and.l &0x3fffffff, %d1 # extract other bits 21952 21953 bra.b case3_complete 21954 21955# 21956# case (d1 == 65) 21957# 21958# --------------------------------------------------------- 21959# | FTEMP_HI | FTEMP_LO |grs000.........000| 21960# --------------------------------------------------------- 21961# <-------(32)------> 21962# \ \ 21963# \ \ 21964# \ \ 21965# \ ------------------------------ 21966# -------------------------------- \ 21967# \ \ 21968# \ \ 21969# \ \ 21970# <-------(31)-----> 21971# --------------------------------------------------------- 21972# |0...............0|0................0|0rs | 21973# --------------------------------------------------------- 21974# 21975case3_65: 21976 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa) 21977 and.l &0x80000000, %d0 # extract R bit 21978 lsr.l &0x1, %d0 # shift high bit into R bit 21979 and.l &0x7fffffff, %d1 # extract other bits 21980 21981case3_complete: 21982# last operation done was an "and" of the bits shifted off so the condition 21983# codes are already set so branch accordingly. 21984 bne.b case3_set_sticky # yes; go set new sticky 21985 tst.l FTEMP_LO(%a0) # were any bits shifted off? 21986 bne.b case3_set_sticky # yes; go set new sticky 21987 tst.b GRS(%a6) # were any bits shifted off? 21988 bne.b case3_set_sticky # yes; go set new sticky 21989 21990# 21991# no bits were shifted off so don't set the sticky bit. 21992# the guard and 21993# the entire mantissa is zero. 21994# 21995 clr.l FTEMP_HI(%a0) # clear hi(mantissa) 21996 clr.l FTEMP_LO(%a0) # clear lo(mantissa) 21997 rts 21998 21999# 22000# some bits were shifted off so set the sticky bit. 22001# the entire mantissa is zero. 22002# 22003case3_set_sticky: 22004 bset &rnd_stky_bit,%d0 # set new sticky bit 22005 clr.l FTEMP_HI(%a0) # clear hi(mantissa) 22006 clr.l FTEMP_LO(%a0) # clear lo(mantissa) 22007 rts 22008 22009######################################################################### 22010# XDEF **************************************************************** # 22011# _round(): round result according to precision/mode # 22012# # 22013# XREF **************************************************************** # 22014# None # 22015# # 22016# INPUT *************************************************************** # 22017# a0 = ptr to input operand in internal extended format # 22018# d1(hi) = contains rounding precision: # 22019# ext = $0000xxxx # 22020# sgl = $0004xxxx # 22021# dbl = $0008xxxx # 22022# d1(lo) = contains rounding mode: # 22023# RN = $xxxx0000 # 22024# RZ = $xxxx0001 # 22025# RM = $xxxx0002 # 22026# RP = $xxxx0003 # 22027# d0{31:29} = contains the g,r,s bits (extended) # 22028# # 22029# OUTPUT ************************************************************** # 22030# a0 = pointer to rounded result # 22031# # 22032# ALGORITHM *********************************************************** # 22033# On return the value pointed to by a0 is correctly rounded, # 22034# a0 is preserved and the g-r-s bits in d0 are cleared. # 22035# The result is not typed - the tag field is invalid. The # 22036# result is still in the internal extended format. # 22037# # 22038# The INEX bit of USER_FPSR will be set if the rounded result was # 22039# inexact (i.e. if any of the g-r-s bits were set). # 22040# # 22041######################################################################### 22042 22043 global _round 22044_round: 22045# 22046# ext_grs() looks at the rounding precision and sets the appropriate 22047# G,R,S bits. 22048# If (G,R,S == 0) then result is exact and round is done, else set 22049# the inex flag in status reg and continue. 22050# 22051 bsr.l ext_grs # extract G,R,S 22052 22053 tst.l %d0 # are G,R,S zero? 22054 beq.w truncate # yes; round is complete 22055 22056 or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex 22057 22058# 22059# Use rounding mode as an index into a jump table for these modes. 22060# All of the following assumes grs != 0. 22061# 22062 mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset 22063 jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler 22064 22065tbl_mode: 22066 short rnd_near - tbl_mode 22067 short truncate - tbl_mode # RZ always truncates 22068 short rnd_mnus - tbl_mode 22069 short rnd_plus - tbl_mode 22070 22071################################################################# 22072# ROUND PLUS INFINITY # 22073# # 22074# If sign of fp number = 0 (positive), then add 1 to l. # 22075################################################################# 22076rnd_plus: 22077 tst.b FTEMP_SGN(%a0) # check for sign 22078 bmi.w truncate # if positive then truncate 22079 22080 mov.l &0xffffffff, %d0 # force g,r,s to be all f's 22081 swap %d1 # set up d1 for round prec. 22082 22083 cmpi.b %d1, &s_mode # is prec = sgl? 22084 beq.w add_sgl # yes 22085 bgt.w add_dbl # no; it's dbl 22086 bra.w add_ext # no; it's ext 22087 22088################################################################# 22089# ROUND MINUS INFINITY # 22090# # 22091# If sign of fp number = 1 (negative), then add 1 to l. # 22092################################################################# 22093rnd_mnus: 22094 tst.b FTEMP_SGN(%a0) # check for sign 22095 bpl.w truncate # if negative then truncate 22096 22097 mov.l &0xffffffff, %d0 # force g,r,s to be all f's 22098 swap %d1 # set up d1 for round prec. 22099 22100 cmpi.b %d1, &s_mode # is prec = sgl? 22101 beq.w add_sgl # yes 22102 bgt.w add_dbl # no; it's dbl 22103 bra.w add_ext # no; it's ext 22104 22105################################################################# 22106# ROUND NEAREST # 22107# # 22108# If (g=1), then add 1 to l and if (r=s=0), then clear l # 22109# Note that this will round to even in case of a tie. # 22110################################################################# 22111rnd_near: 22112 asl.l &0x1, %d0 # shift g-bit to c-bit 22113 bcc.w truncate # if (g=1) then 22114 22115 swap %d1 # set up d1 for round prec. 22116 22117 cmpi.b %d1, &s_mode # is prec = sgl? 22118 beq.w add_sgl # yes 22119 bgt.w add_dbl # no; it's dbl 22120 bra.w add_ext # no; it's ext 22121 22122# *** LOCAL EQUATES *** 22123set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec 22124set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec 22125 22126######################### 22127# ADD SINGLE # 22128######################### 22129add_sgl: 22130 add.l &ad_1_sgl, FTEMP_HI(%a0) 22131 bcc.b scc_clr # no mantissa overflow 22132 roxr.w FTEMP_HI(%a0) # shift v-bit back in 22133 roxr.w FTEMP_HI+2(%a0) # shift v-bit back in 22134 add.w &0x1, FTEMP_EX(%a0) # and incr exponent 22135scc_clr: 22136 tst.l %d0 # test for rs = 0 22137 bne.b sgl_done 22138 and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit 22139sgl_done: 22140 and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit 22141 clr.l FTEMP_LO(%a0) # clear d2 22142 rts 22143 22144######################### 22145# ADD EXTENDED # 22146######################### 22147add_ext: 22148 addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit 22149 bcc.b xcc_clr # test for carry out 22150 addq.l &1,FTEMP_HI(%a0) # propagate carry 22151 bcc.b xcc_clr 22152 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit 22153 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit 22154 roxr.w FTEMP_LO(%a0) 22155 roxr.w FTEMP_LO+2(%a0) 22156 add.w &0x1,FTEMP_EX(%a0) # and inc exp 22157xcc_clr: 22158 tst.l %d0 # test rs = 0 22159 bne.b add_ext_done 22160 and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit 22161add_ext_done: 22162 rts 22163 22164######################### 22165# ADD DOUBLE # 22166######################### 22167add_dbl: 22168 add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb 22169 bcc.b dcc_clr # no carry 22170 addq.l &0x1, FTEMP_HI(%a0) # propagate carry 22171 bcc.b dcc_clr # no carry 22172 22173 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit 22174 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit 22175 roxr.w FTEMP_LO(%a0) 22176 roxr.w FTEMP_LO+2(%a0) 22177 addq.w &0x1, FTEMP_EX(%a0) # incr exponent 22178dcc_clr: 22179 tst.l %d0 # test for rs = 0 22180 bne.b dbl_done 22181 and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit 22182 22183dbl_done: 22184 and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit 22185 rts 22186 22187########################### 22188# Truncate all other bits # 22189########################### 22190truncate: 22191 swap %d1 # select rnd prec 22192 22193 cmpi.b %d1, &s_mode # is prec sgl? 22194 beq.w sgl_done # yes 22195 bgt.b dbl_done # no; it's dbl 22196 rts # no; it's ext 22197 22198 22199# 22200# ext_grs(): extract guard, round and sticky bits according to 22201# rounding precision. 22202# 22203# INPUT 22204# d0 = extended precision g,r,s (in d0{31:29}) 22205# d1 = {PREC,ROUND} 22206# OUTPUT 22207# d0{31:29} = guard, round, sticky 22208# 22209# The ext_grs extract the guard/round/sticky bits according to the 22210# selected rounding precision. It is called by the round subroutine 22211# only. All registers except d0 are kept intact. d0 becomes an 22212# updated guard,round,sticky in d0{31:29} 22213# 22214# Notes: the ext_grs uses the round PREC, and therefore has to swap d1 22215# prior to usage, and needs to restore d1 to original. this 22216# routine is tightly tied to the round routine and not meant to 22217# uphold standard subroutine calling practices. 22218# 22219 22220ext_grs: 22221 swap %d1 # have d1.w point to round precision 22222 tst.b %d1 # is rnd prec = extended? 22223 bne.b ext_grs_not_ext # no; go handle sgl or dbl 22224 22225# 22226# %d0 actually already hold g,r,s since _round() had it before calling 22227# this function. so, as long as we don't disturb it, we are "returning" it. 22228# 22229ext_grs_ext: 22230 swap %d1 # yes; return to correct positions 22231 rts 22232 22233ext_grs_not_ext: 22234 movm.l &0x3000, -(%sp) # make some temp registers {d2/d3} 22235 22236 cmpi.b %d1, &s_mode # is rnd prec = sgl? 22237 bne.b ext_grs_dbl # no; go handle dbl 22238 22239# 22240# sgl: 22241# 96 64 40 32 0 22242# ----------------------------------------------------- 22243# | EXP |XXXXXXX| |xx | |grs| 22244# ----------------------------------------------------- 22245# <--(24)--->nn\ / 22246# ee --------------------- 22247# ww | 22248# v 22249# gr new sticky 22250# 22251ext_grs_sgl: 22252 bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right 22253 mov.l &30, %d2 # of the sgl prec. limits 22254 lsl.l %d2, %d3 # shift g-r bits to MSB of d3 22255 mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test 22256 and.l &0x0000003f, %d2 # s bit is the or of all other 22257 bne.b ext_grs_st_stky # bits to the right of g-r 22258 tst.l FTEMP_LO(%a0) # test lower mantissa 22259 bne.b ext_grs_st_stky # if any are set, set sticky 22260 tst.l %d0 # test original g,r,s 22261 bne.b ext_grs_st_stky # if any are set, set sticky 22262 bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit 22263 22264# 22265# dbl: 22266# 96 64 32 11 0 22267# ----------------------------------------------------- 22268# | EXP |XXXXXXX| | |xx |grs| 22269# ----------------------------------------------------- 22270# nn\ / 22271# ee ------- 22272# ww | 22273# v 22274# gr new sticky 22275# 22276ext_grs_dbl: 22277 bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right 22278 mov.l &30, %d2 # of the dbl prec. limits 22279 lsl.l %d2, %d3 # shift g-r bits to the MSB of d3 22280 mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test 22281 and.l &0x000001ff, %d2 # s bit is the or-ing of all 22282 bne.b ext_grs_st_stky # other bits to the right of g-r 22283 tst.l %d0 # test word original g,r,s 22284 bne.b ext_grs_st_stky # if any are set, set sticky 22285 bra.b ext_grs_end_sd # if clear, exit 22286 22287ext_grs_st_stky: 22288 bset &rnd_stky_bit, %d3 # set sticky bit 22289ext_grs_end_sd: 22290 mov.l %d3, %d0 # return grs to d0 22291 22292 movm.l (%sp)+, &0xc # restore scratch registers {d2/d3} 22293 22294 swap %d1 # restore d1 to original 22295 rts 22296 22297######################################################################### 22298# norm(): normalize the mantissa of an extended precision input. the # 22299# input operand should not be normalized already. # 22300# # 22301# XDEF **************************************************************** # 22302# norm() # 22303# # 22304# XREF **************************************************************** # 22305# none # 22306# # 22307# INPUT *************************************************************** # 22308# a0 = pointer fp extended precision operand to normalize # 22309# # 22310# OUTPUT ************************************************************** # 22311# d0 = number of bit positions the mantissa was shifted # 22312# a0 = the input operand's mantissa is normalized; the exponent # 22313# is unchanged. # 22314# # 22315######################################################################### 22316 global norm 22317norm: 22318 mov.l %d2, -(%sp) # create some temp regs 22319 mov.l %d3, -(%sp) 22320 22321 mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa) 22322 mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa) 22323 22324 bfffo %d0{&0:&32}, %d2 # how many places to shift? 22325 beq.b norm_lo # hi(man) is all zeroes! 22326 22327norm_hi: 22328 lsl.l %d2, %d0 # left shift hi(man) 22329 bfextu %d1{&0:%d2}, %d3 # extract lo bits 22330 22331 or.l %d3, %d0 # create hi(man) 22332 lsl.l %d2, %d1 # create lo(man) 22333 22334 mov.l %d0, FTEMP_HI(%a0) # store new hi(man) 22335 mov.l %d1, FTEMP_LO(%a0) # store new lo(man) 22336 22337 mov.l %d2, %d0 # return shift amount 22338 22339 mov.l (%sp)+, %d3 # restore temp regs 22340 mov.l (%sp)+, %d2 22341 22342 rts 22343 22344norm_lo: 22345 bfffo %d1{&0:&32}, %d2 # how many places to shift? 22346 lsl.l %d2, %d1 # shift lo(man) 22347 add.l &32, %d2 # add 32 to shft amount 22348 22349 mov.l %d1, FTEMP_HI(%a0) # store hi(man) 22350 clr.l FTEMP_LO(%a0) # lo(man) is now zero 22351 22352 mov.l %d2, %d0 # return shift amount 22353 22354 mov.l (%sp)+, %d3 # restore temp regs 22355 mov.l (%sp)+, %d2 22356 22357 rts 22358 22359######################################################################### 22360# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO # 22361# - returns corresponding optype tag # 22362# # 22363# XDEF **************************************************************** # 22364# unnorm_fix() # 22365# # 22366# XREF **************************************************************** # 22367# norm() - normalize the mantissa # 22368# # 22369# INPUT *************************************************************** # 22370# a0 = pointer to unnormalized extended precision number # 22371# # 22372# OUTPUT ************************************************************** # 22373# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO # 22374# a0 = input operand has been converted to a norm, denorm, or # 22375# zero; both the exponent and mantissa are changed. # 22376# # 22377######################################################################### 22378 22379 global unnorm_fix 22380unnorm_fix: 22381 bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed? 22382 bne.b unnorm_shift # hi(man) is not all zeroes 22383 22384# 22385# hi(man) is all zeroes so see if any bits in lo(man) are set 22386# 22387unnorm_chk_lo: 22388 bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero? 22389 beq.w unnorm_zero # yes 22390 22391 add.w &32, %d0 # no; fix shift distance 22392 22393# 22394# d0 = # shifts needed for complete normalization 22395# 22396unnorm_shift: 22397 clr.l %d1 # clear top word 22398 mov.w FTEMP_EX(%a0), %d1 # extract exponent 22399 and.w &0x7fff, %d1 # strip off sgn 22400 22401 cmp.w %d0, %d1 # will denorm push exp < 0? 22402 bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0 22403 22404# 22405# exponent would not go < 0. Therefore, number stays normalized 22406# 22407 sub.w %d0, %d1 # shift exponent value 22408 mov.w FTEMP_EX(%a0), %d0 # load old exponent 22409 and.w &0x8000, %d0 # save old sign 22410 or.w %d0, %d1 # {sgn,new exp} 22411 mov.w %d1, FTEMP_EX(%a0) # insert new exponent 22412 22413 bsr.l norm # normalize UNNORM 22414 22415 mov.b &NORM, %d0 # return new optype tag 22416 rts 22417 22418# 22419# exponent would go < 0, so only denormalize until exp = 0 22420# 22421unnorm_nrm_zero: 22422 cmp.b %d1, &32 # is exp <= 32? 22423 bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent 22424 22425 bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man) 22426 mov.l %d0, FTEMP_HI(%a0) # save new hi(man) 22427 22428 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) 22429 lsl.l %d1, %d0 # extract new lo(man) 22430 mov.l %d0, FTEMP_LO(%a0) # save new lo(man) 22431 22432 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 22433 22434 mov.b &DENORM, %d0 # return new optype tag 22435 rts 22436 22437# 22438# only mantissa bits set are in lo(man) 22439# 22440unnorm_nrm_zero_lrg: 22441 sub.w &32, %d1 # adjust shft amt by 32 22442 22443 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) 22444 lsl.l %d1, %d0 # left shift lo(man) 22445 22446 mov.l %d0, FTEMP_HI(%a0) # store new hi(man) 22447 clr.l FTEMP_LO(%a0) # lo(man) = 0 22448 22449 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 22450 22451 mov.b &DENORM, %d0 # return new optype tag 22452 rts 22453 22454# 22455# whole mantissa is zero so this UNNORM is actually a zero 22456# 22457unnorm_zero: 22458 and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero 22459 22460 mov.b &ZERO, %d0 # fix optype tag 22461 rts 22462 22463######################################################################### 22464# XDEF **************************************************************** # 22465# set_tag_x(): return the optype of the input ext fp number # 22466# # 22467# XREF **************************************************************** # 22468# None # 22469# # 22470# INPUT *************************************************************** # 22471# a0 = pointer to extended precision operand # 22472# # 22473# OUTPUT ************************************************************** # 22474# d0 = value of type tag # 22475# one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO # 22476# # 22477# ALGORITHM *********************************************************** # 22478# Simply test the exponent, j-bit, and mantissa values to # 22479# determine the type of operand. # 22480# If it's an unnormalized zero, alter the operand and force it # 22481# to be a normal zero. # 22482# # 22483######################################################################### 22484 22485 global set_tag_x 22486set_tag_x: 22487 mov.w FTEMP_EX(%a0), %d0 # extract exponent 22488 andi.w &0x7fff, %d0 # strip off sign 22489 cmpi.w %d0, &0x7fff # is (EXP == MAX)? 22490 beq.b inf_or_nan_x 22491not_inf_or_nan_x: 22492 btst &0x7,FTEMP_HI(%a0) 22493 beq.b not_norm_x 22494is_norm_x: 22495 mov.b &NORM, %d0 22496 rts 22497not_norm_x: 22498 tst.w %d0 # is exponent = 0? 22499 bne.b is_unnorm_x 22500not_unnorm_x: 22501 tst.l FTEMP_HI(%a0) 22502 bne.b is_denorm_x 22503 tst.l FTEMP_LO(%a0) 22504 bne.b is_denorm_x 22505is_zero_x: 22506 mov.b &ZERO, %d0 22507 rts 22508is_denorm_x: 22509 mov.b &DENORM, %d0 22510 rts 22511# must distinguish now "Unnormalized zeroes" which we 22512# must convert to zero. 22513is_unnorm_x: 22514 tst.l FTEMP_HI(%a0) 22515 bne.b is_unnorm_reg_x 22516 tst.l FTEMP_LO(%a0) 22517 bne.b is_unnorm_reg_x 22518# it's an "unnormalized zero". let's convert it to an actual zero... 22519 andi.w &0x8000,FTEMP_EX(%a0) # clear exponent 22520 mov.b &ZERO, %d0 22521 rts 22522is_unnorm_reg_x: 22523 mov.b &UNNORM, %d0 22524 rts 22525inf_or_nan_x: 22526 tst.l FTEMP_LO(%a0) 22527 bne.b is_nan_x 22528 mov.l FTEMP_HI(%a0), %d0 22529 and.l &0x7fffffff, %d0 # msb is a don't care! 22530 bne.b is_nan_x 22531is_inf_x: 22532 mov.b &INF, %d0 22533 rts 22534is_nan_x: 22535 btst &0x6, FTEMP_HI(%a0) 22536 beq.b is_snan_x 22537 mov.b &QNAN, %d0 22538 rts 22539is_snan_x: 22540 mov.b &SNAN, %d0 22541 rts 22542 22543######################################################################### 22544# XDEF **************************************************************** # 22545# set_tag_d(): return the optype of the input dbl fp number # 22546# # 22547# XREF **************************************************************** # 22548# None # 22549# # 22550# INPUT *************************************************************** # 22551# a0 = points to double precision operand # 22552# # 22553# OUTPUT ************************************************************** # 22554# d0 = value of type tag # 22555# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # 22556# # 22557# ALGORITHM *********************************************************** # 22558# Simply test the exponent, j-bit, and mantissa values to # 22559# determine the type of operand. # 22560# # 22561######################################################################### 22562 22563 global set_tag_d 22564set_tag_d: 22565 mov.l FTEMP(%a0), %d0 22566 mov.l %d0, %d1 22567 22568 andi.l &0x7ff00000, %d0 22569 beq.b zero_or_denorm_d 22570 22571 cmpi.l %d0, &0x7ff00000 22572 beq.b inf_or_nan_d 22573 22574is_norm_d: 22575 mov.b &NORM, %d0 22576 rts 22577zero_or_denorm_d: 22578 and.l &0x000fffff, %d1 22579 bne is_denorm_d 22580 tst.l 4+FTEMP(%a0) 22581 bne is_denorm_d 22582is_zero_d: 22583 mov.b &ZERO, %d0 22584 rts 22585is_denorm_d: 22586 mov.b &DENORM, %d0 22587 rts 22588inf_or_nan_d: 22589 and.l &0x000fffff, %d1 22590 bne is_nan_d 22591 tst.l 4+FTEMP(%a0) 22592 bne is_nan_d 22593is_inf_d: 22594 mov.b &INF, %d0 22595 rts 22596is_nan_d: 22597 btst &19, %d1 22598 bne is_qnan_d 22599is_snan_d: 22600 mov.b &SNAN, %d0 22601 rts 22602is_qnan_d: 22603 mov.b &QNAN, %d0 22604 rts 22605 22606######################################################################### 22607# XDEF **************************************************************** # 22608# set_tag_s(): return the optype of the input sgl fp number # 22609# # 22610# XREF **************************************************************** # 22611# None # 22612# # 22613# INPUT *************************************************************** # 22614# a0 = pointer to single precision operand # 22615# # 22616# OUTPUT ************************************************************** # 22617# d0 = value of type tag # 22618# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # 22619# # 22620# ALGORITHM *********************************************************** # 22621# Simply test the exponent, j-bit, and mantissa values to # 22622# determine the type of operand. # 22623# # 22624######################################################################### 22625 22626 global set_tag_s 22627set_tag_s: 22628 mov.l FTEMP(%a0), %d0 22629 mov.l %d0, %d1 22630 22631 andi.l &0x7f800000, %d0 22632 beq.b zero_or_denorm_s 22633 22634 cmpi.l %d0, &0x7f800000 22635 beq.b inf_or_nan_s 22636 22637is_norm_s: 22638 mov.b &NORM, %d0 22639 rts 22640zero_or_denorm_s: 22641 and.l &0x007fffff, %d1 22642 bne is_denorm_s 22643is_zero_s: 22644 mov.b &ZERO, %d0 22645 rts 22646is_denorm_s: 22647 mov.b &DENORM, %d0 22648 rts 22649inf_or_nan_s: 22650 and.l &0x007fffff, %d1 22651 bne is_nan_s 22652is_inf_s: 22653 mov.b &INF, %d0 22654 rts 22655is_nan_s: 22656 btst &22, %d1 22657 bne is_qnan_s 22658is_snan_s: 22659 mov.b &SNAN, %d0 22660 rts 22661is_qnan_s: 22662 mov.b &QNAN, %d0 22663 rts 22664 22665######################################################################### 22666# XDEF **************************************************************** # 22667# unf_res(): routine to produce default underflow result of a # 22668# scaled extended precision number; this is used by # 22669# fadd/fdiv/fmul/etc. emulation routines. # 22670# unf_res4(): same as above but for fsglmul/fsgldiv which use # 22671# single round prec and extended prec mode. # 22672# # 22673# XREF **************************************************************** # 22674# _denorm() - denormalize according to scale factor # 22675# _round() - round denormalized number according to rnd prec # 22676# # 22677# INPUT *************************************************************** # 22678# a0 = pointer to extended precison operand # 22679# d0 = scale factor # 22680# d1 = rounding precision/mode # 22681# # 22682# OUTPUT ************************************************************** # 22683# a0 = pointer to default underflow result in extended precision # 22684# d0.b = result FPSR_cc which caller may or may not want to save # 22685# # 22686# ALGORITHM *********************************************************** # 22687# Convert the input operand to "internal format" which means the # 22688# exponent is extended to 16 bits and the sign is stored in the unused # 22689# portion of the extended precison operand. Denormalize the number # 22690# according to the scale factor passed in d0. Then, round the # 22691# denormalized result. # 22692# Set the FPSR_exc bits as appropriate but return the cc bits in # 22693# d0 in case the caller doesn't want to save them (as is the case for # 22694# fmove out). # 22695# unf_res4() for fsglmul/fsgldiv forces the denorm to extended # 22696# precision and the rounding mode to single. # 22697# # 22698######################################################################### 22699 global unf_res 22700unf_res: 22701 mov.l %d1, -(%sp) # save rnd prec,mode on stack 22702 22703 btst &0x7, FTEMP_EX(%a0) # make "internal" format 22704 sne FTEMP_SGN(%a0) 22705 22706 mov.w FTEMP_EX(%a0), %d1 # extract exponent 22707 and.w &0x7fff, %d1 22708 sub.w %d0, %d1 22709 mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent 22710 22711 mov.l %a0, -(%sp) # save operand ptr during calls 22712 22713 mov.l 0x4(%sp),%d0 # pass rnd prec. 22714 andi.w &0x00c0,%d0 22715 lsr.w &0x4,%d0 22716 bsr.l _denorm # denorm result 22717 22718 mov.l (%sp),%a0 22719 mov.w 0x6(%sp),%d1 # load prec:mode into %d1 22720 andi.w &0xc0,%d1 # extract rnd prec 22721 lsr.w &0x4,%d1 22722 swap %d1 22723 mov.w 0x6(%sp),%d1 22724 andi.w &0x30,%d1 22725 lsr.w &0x4,%d1 22726 bsr.l _round # round the denorm 22727 22728 mov.l (%sp)+, %a0 22729 22730# result is now rounded properly. convert back to normal format 22731 bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue 22732 tst.b FTEMP_SGN(%a0) # is "internal result" sign set? 22733 beq.b unf_res_chkifzero # no; result is positive 22734 bset &0x7, FTEMP_EX(%a0) # set result sgn 22735 clr.b FTEMP_SGN(%a0) # clear temp sign 22736 22737# the number may have become zero after rounding. set ccodes accordingly. 22738unf_res_chkifzero: 22739 clr.l %d0 22740 tst.l FTEMP_HI(%a0) # is value now a zero? 22741 bne.b unf_res_cont # no 22742 tst.l FTEMP_LO(%a0) 22743 bne.b unf_res_cont # no 22744# bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit 22745 bset &z_bit, %d0 # yes; set zero ccode bit 22746 22747unf_res_cont: 22748 22749# 22750# can inex1 also be set along with unfl and inex2??? 22751# 22752# we know that underflow has occurred. aunfl should be set if INEX2 is also set. 22753# 22754 btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set? 22755 beq.b unf_res_end # no 22756 bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl 22757 22758unf_res_end: 22759 add.l &0x4, %sp # clear stack 22760 rts 22761 22762# unf_res() for fsglmul() and fsgldiv(). 22763 global unf_res4 22764unf_res4: 22765 mov.l %d1,-(%sp) # save rnd prec,mode on stack 22766 22767 btst &0x7,FTEMP_EX(%a0) # make "internal" format 22768 sne FTEMP_SGN(%a0) 22769 22770 mov.w FTEMP_EX(%a0),%d1 # extract exponent 22771 and.w &0x7fff,%d1 22772 sub.w %d0,%d1 22773 mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent 22774 22775 mov.l %a0,-(%sp) # save operand ptr during calls 22776 22777 clr.l %d0 # force rnd prec = ext 22778 bsr.l _denorm # denorm result 22779 22780 mov.l (%sp),%a0 22781 mov.w &s_mode,%d1 # force rnd prec = sgl 22782 swap %d1 22783 mov.w 0x6(%sp),%d1 # load rnd mode 22784 andi.w &0x30,%d1 # extract rnd prec 22785 lsr.w &0x4,%d1 22786 bsr.l _round # round the denorm 22787 22788 mov.l (%sp)+,%a0 22789 22790# result is now rounded properly. convert back to normal format 22791 bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue 22792 tst.b FTEMP_SGN(%a0) # is "internal result" sign set? 22793 beq.b unf_res4_chkifzero # no; result is positive 22794 bset &0x7,FTEMP_EX(%a0) # set result sgn 22795 clr.b FTEMP_SGN(%a0) # clear temp sign 22796 22797# the number may have become zero after rounding. set ccodes accordingly. 22798unf_res4_chkifzero: 22799 clr.l %d0 22800 tst.l FTEMP_HI(%a0) # is value now a zero? 22801 bne.b unf_res4_cont # no 22802 tst.l FTEMP_LO(%a0) 22803 bne.b unf_res4_cont # no 22804# bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit 22805 bset &z_bit,%d0 # yes; set zero ccode bit 22806 22807unf_res4_cont: 22808 22809# 22810# can inex1 also be set along with unfl and inex2??? 22811# 22812# we know that underflow has occurred. aunfl should be set if INEX2 is also set. 22813# 22814 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 22815 beq.b unf_res4_end # no 22816 bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl 22817 22818unf_res4_end: 22819 add.l &0x4,%sp # clear stack 22820 rts 22821 22822######################################################################### 22823# XDEF **************************************************************** # 22824# ovf_res(): routine to produce the default overflow result of # 22825# an overflowing number. # 22826# ovf_res2(): same as above but the rnd mode/prec are passed # 22827# differently. # 22828# # 22829# XREF **************************************************************** # 22830# none # 22831# # 22832# INPUT *************************************************************** # 22833# d1.b = '-1' => (-); '0' => (+) # 22834# ovf_res(): # 22835# d0 = rnd mode/prec # 22836# ovf_res2(): # 22837# hi(d0) = rnd prec # 22838# lo(d0) = rnd mode # 22839# # 22840# OUTPUT ************************************************************** # 22841# a0 = points to extended precision result # 22842# d0.b = condition code bits # 22843# # 22844# ALGORITHM *********************************************************** # 22845# The default overflow result can be determined by the sign of # 22846# the result and the rounding mode/prec in effect. These bits are # 22847# concatenated together to create an index into the default result # 22848# table. A pointer to the correct result is returned in a0. The # 22849# resulting condition codes are returned in d0 in case the caller # 22850# doesn't want FPSR_cc altered (as is the case for fmove out). # 22851# # 22852######################################################################### 22853 22854 global ovf_res 22855ovf_res: 22856 andi.w &0x10,%d1 # keep result sign 22857 lsr.b &0x4,%d0 # shift prec/mode 22858 or.b %d0,%d1 # concat the two 22859 mov.w %d1,%d0 # make a copy 22860 lsl.b &0x1,%d1 # multiply d1 by 2 22861 bra.b ovf_res_load 22862 22863 global ovf_res2 22864ovf_res2: 22865 and.w &0x10, %d1 # keep result sign 22866 or.b %d0, %d1 # insert rnd mode 22867 swap %d0 22868 or.b %d0, %d1 # insert rnd prec 22869 mov.w %d1, %d0 # make a copy 22870 lsl.b &0x1, %d1 # shift left by 1 22871 22872# 22873# use the rounding mode, precision, and result sign as in index into the 22874# two tables below to fetch the default result and the result ccodes. 22875# 22876ovf_res_load: 22877 mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes 22878 lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr 22879 22880 rts 22881 22882tbl_ovfl_cc: 22883 byte 0x2, 0x0, 0x0, 0x2 22884 byte 0x2, 0x0, 0x0, 0x2 22885 byte 0x2, 0x0, 0x0, 0x2 22886 byte 0x0, 0x0, 0x0, 0x0 22887 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 22888 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 22889 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 22890 22891tbl_ovfl_result: 22892 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 22893 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ 22894 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM 22895 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 22896 22897 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 22898 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ 22899 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM 22900 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 22901 22902 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 22903 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ 22904 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM 22905 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 22906 22907 long 0x00000000,0x00000000,0x00000000,0x00000000 22908 long 0x00000000,0x00000000,0x00000000,0x00000000 22909 long 0x00000000,0x00000000,0x00000000,0x00000000 22910 long 0x00000000,0x00000000,0x00000000,0x00000000 22911 22912 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 22913 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ 22914 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 22915 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP 22916 22917 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 22918 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ 22919 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 22920 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP 22921 22922 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 22923 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ 22924 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 22925 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP 22926 22927######################################################################### 22928# XDEF **************************************************************** # 22929# get_packed(): fetch a packed operand from memory and then # 22930# convert it to a floating-point binary number. # 22931# # 22932# XREF **************************************************************** # 22933# _dcalc_ea() - calculate the correct <ea> # 22934# _mem_read() - fetch the packed operand from memory # 22935# facc_in_x() - the fetch failed so jump to special exit code # 22936# decbin() - convert packed to binary extended precision # 22937# # 22938# INPUT *************************************************************** # 22939# None # 22940# # 22941# OUTPUT ************************************************************** # 22942# If no failure on _mem_read(): # 22943# FP_SRC(a6) = packed operand now as a binary FP number # 22944# # 22945# ALGORITHM *********************************************************** # 22946# Get the correct <ea> which is the value on the exception stack # 22947# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. # 22948# Then, fetch the operand from memory. If the fetch fails, exit # 22949# through facc_in_x(). # 22950# If the packed operand is a ZERO,NAN, or INF, convert it to # 22951# its binary representation here. Else, call decbin() which will # 22952# convert the packed value to an extended precision binary value. # 22953# # 22954######################################################################### 22955 22956# the stacked <ea> for packed is correct except for -(An). 22957# the base reg must be updated for both -(An) and (An)+. 22958 global get_packed 22959get_packed: 22960 mov.l &0xc,%d0 # packed is 12 bytes 22961 bsr.l _dcalc_ea # fetch <ea>; correct An 22962 22963 lea FP_SRC(%a6),%a1 # pass: ptr to super dst 22964 mov.l &0xc,%d0 # pass: 12 bytes 22965 bsr.l _dmem_read # read packed operand 22966 22967 tst.l %d1 # did dfetch fail? 22968 bne.l facc_in_x # yes 22969 22970# The packed operand is an INF or a NAN if the exponent field is all ones. 22971 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp 22972 cmpi.w %d0,&0x7fff # INF or NAN? 22973 bne.b gp_try_zero # no 22974 rts # operand is an INF or NAN 22975 22976# The packed operand is a zero if the mantissa is all zero, else it's 22977# a normal packed op. 22978gp_try_zero: 22979 mov.b 3+FP_SRC(%a6),%d0 # get byte 4 22980 andi.b &0x0f,%d0 # clear all but last nybble 22981 bne.b gp_not_spec # not a zero 22982 tst.l FP_SRC_HI(%a6) # is lw 2 zero? 22983 bne.b gp_not_spec # not a zero 22984 tst.l FP_SRC_LO(%a6) # is lw 3 zero? 22985 bne.b gp_not_spec # not a zero 22986 rts # operand is a ZERO 22987gp_not_spec: 22988 lea FP_SRC(%a6),%a0 # pass: ptr to packed op 22989 bsr.l decbin # convert to extended 22990 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop 22991 rts 22992 22993######################################################################### 22994# decbin(): Converts normalized packed bcd value pointed to by register # 22995# a0 to extended-precision value in fp0. # 22996# # 22997# INPUT *************************************************************** # 22998# a0 = pointer to normalized packed bcd value # 22999# # 23000# OUTPUT ************************************************************** # 23001# fp0 = exact fp representation of the packed bcd value. # 23002# # 23003# ALGORITHM *********************************************************** # 23004# Expected is a normal bcd (i.e. non-exceptional; all inf, zero, # 23005# and NaN operands are dispatched without entering this routine) # 23006# value in 68881/882 format at location (a0). # 23007# # 23008# A1. Convert the bcd exponent to binary by successive adds and # 23009# muls. Set the sign according to SE. Subtract 16 to compensate # 23010# for the mantissa which is to be interpreted as 17 integer # 23011# digits, rather than 1 integer and 16 fraction digits. # 23012# Note: this operation can never overflow. # 23013# # 23014# A2. Convert the bcd mantissa to binary by successive # 23015# adds and muls in FP0. Set the sign according to SM. # 23016# The mantissa digits will be converted with the decimal point # 23017# assumed following the least-significant digit. # 23018# Note: this operation can never overflow. # 23019# # 23020# A3. Count the number of leading/trailing zeros in the # 23021# bcd string. If SE is positive, count the leading zeros; # 23022# if negative, count the trailing zeros. Set the adjusted # 23023# exponent equal to the exponent from A1 and the zero count # 23024# added if SM = 1 and subtracted if SM = 0. Scale the # 23025# mantissa the equivalent of forcing in the bcd value: # 23026# # 23027# SM = 0 a non-zero digit in the integer position # 23028# SM = 1 a non-zero digit in Mant0, lsd of the fraction # 23029# # 23030# this will insure that any value, regardless of its # 23031# representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted # 23032# consistently. # 23033# # 23034# A4. Calculate the factor 10^exp in FP1 using a table of # 23035# 10^(2^n) values. To reduce the error in forming factors # 23036# greater than 10^27, a directed rounding scheme is used with # 23037# tables rounded to RN, RM, and RP, according to the table # 23038# in the comments of the pwrten section. # 23039# # 23040# A5. Form the final binary number by scaling the mantissa by # 23041# the exponent factor. This is done by multiplying the # 23042# mantissa in FP0 by the factor in FP1 if the adjusted # 23043# exponent sign is positive, and dividing FP0 by FP1 if # 23044# it is negative. # 23045# # 23046# Clean up and return. Check if the final mul or div was inexact. # 23047# If so, set INEX1 in USER_FPSR. # 23048# # 23049######################################################################### 23050 23051# 23052# PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded 23053# to nearest, minus, and plus, respectively. The tables include 23054# 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding 23055# is required until the power is greater than 27, however, all 23056# tables include the first 5 for ease of indexing. 23057# 23058RTABLE: 23059 byte 0,0,0,0 23060 byte 2,3,2,3 23061 byte 2,3,3,2 23062 byte 3,2,2,3 23063 23064 set FNIBS,7 23065 set FSTRT,0 23066 23067 set ESTRT,4 23068 set EDIGITS,2 23069 23070 global decbin 23071decbin: 23072 mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input 23073 mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it 23074 mov.l 0x8(%a0),FP_SCR0_LO(%a6) 23075 23076 lea FP_SCR0(%a6),%a0 23077 23078 movm.l &0x3c00,-(%sp) # save d2-d5 23079 fmovm.x &0x1,-(%sp) # save fp1 23080# 23081# Calculate exponent: 23082# 1. Copy bcd value in memory for use as a working copy. 23083# 2. Calculate absolute value of exponent in d1 by mul and add. 23084# 3. Correct for exponent sign. 23085# 4. Subtract 16 to compensate for interpreting the mant as all integer digits. 23086# (i.e., all digits assumed left of the decimal point.) 23087# 23088# Register usage: 23089# 23090# calc_e: 23091# (*) d0: temp digit storage 23092# (*) d1: accumulator for binary exponent 23093# (*) d2: digit count 23094# (*) d3: offset pointer 23095# ( ) d4: first word of bcd 23096# ( ) a0: pointer to working bcd value 23097# ( ) a6: pointer to original bcd value 23098# (*) FP_SCR1: working copy of original bcd value 23099# (*) L_SCR1: copy of original exponent word 23100# 23101calc_e: 23102 mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part 23103 mov.l &ESTRT,%d3 # counter to pick up digits 23104 mov.l (%a0),%d4 # get first word of bcd 23105 clr.l %d1 # zero d1 for accumulator 23106e_gd: 23107 mulu.l &0xa,%d1 # mul partial product by one digit place 23108 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0 23109 add.l %d0,%d1 # d1 = d1 + d0 23110 addq.b &4,%d3 # advance d3 to the next digit 23111 dbf.w %d2,e_gd # if we have used all 3 digits, exit loop 23112 btst &30,%d4 # get SE 23113 beq.b e_pos # don't negate if pos 23114 neg.l %d1 # negate before subtracting 23115e_pos: 23116 sub.l &16,%d1 # sub to compensate for shift of mant 23117 bge.b e_save # if still pos, do not neg 23118 neg.l %d1 # now negative, make pos and set SE 23119 or.l &0x40000000,%d4 # set SE in d4, 23120 or.l &0x40000000,(%a0) # and in working bcd 23121e_save: 23122 mov.l %d1,-(%sp) # save exp on stack 23123# 23124# 23125# Calculate mantissa: 23126# 1. Calculate absolute value of mantissa in fp0 by mul and add. 23127# 2. Correct for mantissa sign. 23128# (i.e., all digits assumed left of the decimal point.) 23129# 23130# Register usage: 23131# 23132# calc_m: 23133# (*) d0: temp digit storage 23134# (*) d1: lword counter 23135# (*) d2: digit count 23136# (*) d3: offset pointer 23137# ( ) d4: words 2 and 3 of bcd 23138# ( ) a0: pointer to working bcd value 23139# ( ) a6: pointer to original bcd value 23140# (*) fp0: mantissa accumulator 23141# ( ) FP_SCR1: working copy of original bcd value 23142# ( ) L_SCR1: copy of original exponent word 23143# 23144calc_m: 23145 mov.l &1,%d1 # word counter, init to 1 23146 fmov.s &0x00000000,%fp0 # accumulator 23147# 23148# 23149# Since the packed number has a long word between the first & second parts, 23150# get the integer digit then skip down & get the rest of the 23151# mantissa. We will unroll the loop once. 23152# 23153 bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word 23154 fadd.b %d0,%fp0 # add digit to sum in fp0 23155# 23156# 23157# Get the rest of the mantissa. 23158# 23159loadlw: 23160 mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4 23161 mov.l &FSTRT,%d3 # counter to pick up digits 23162 mov.l &FNIBS,%d2 # reset number of digits per a0 ptr 23163md2b: 23164 fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10 23165 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend 23166 fadd.b %d0,%fp0 # fp0 = fp0 + digit 23167# 23168# 23169# If all the digits (8) in that long word have been converted (d2=0), 23170# then inc d1 (=2) to point to the next long word and reset d3 to 0 23171# to initialize the digit offset, and set d2 to 7 for the digit count; 23172# else continue with this long word. 23173# 23174 addq.b &4,%d3 # advance d3 to the next digit 23175 dbf.w %d2,md2b # check for last digit in this lw 23176nextlw: 23177 addq.l &1,%d1 # inc lw pointer in mantissa 23178 cmp.l %d1,&2 # test for last lw 23179 ble.b loadlw # if not, get last one 23180# 23181# Check the sign of the mant and make the value in fp0 the same sign. 23182# 23183m_sign: 23184 btst &31,(%a0) # test sign of the mantissa 23185 beq.b ap_st_z # if clear, go to append/strip zeros 23186 fneg.x %fp0 # if set, negate fp0 23187# 23188# Append/strip zeros: 23189# 23190# For adjusted exponents which have an absolute value greater than 27*, 23191# this routine calculates the amount needed to normalize the mantissa 23192# for the adjusted exponent. That number is subtracted from the exp 23193# if the exp was positive, and added if it was negative. The purpose 23194# of this is to reduce the value of the exponent and the possibility 23195# of error in calculation of pwrten. 23196# 23197# 1. Branch on the sign of the adjusted exponent. 23198# 2p.(positive exp) 23199# 2. Check M16 and the digits in lwords 2 and 3 in descending order. 23200# 3. Add one for each zero encountered until a non-zero digit. 23201# 4. Subtract the count from the exp. 23202# 5. Check if the exp has crossed zero in #3 above; make the exp abs 23203# and set SE. 23204# 6. Multiply the mantissa by 10**count. 23205# 2n.(negative exp) 23206# 2. Check the digits in lwords 3 and 2 in descending order. 23207# 3. Add one for each zero encountered until a non-zero digit. 23208# 4. Add the count to the exp. 23209# 5. Check if the exp has crossed zero in #3 above; clear SE. 23210# 6. Divide the mantissa by 10**count. 23211# 23212# *Why 27? If the adjusted exponent is within -28 < expA < 28, than 23213# any adjustment due to append/strip zeros will drive the resultane 23214# exponent towards zero. Since all pwrten constants with a power 23215# of 27 or less are exact, there is no need to use this routine to 23216# attempt to lessen the resultant exponent. 23217# 23218# Register usage: 23219# 23220# ap_st_z: 23221# (*) d0: temp digit storage 23222# (*) d1: zero count 23223# (*) d2: digit count 23224# (*) d3: offset pointer 23225# ( ) d4: first word of bcd 23226# (*) d5: lword counter 23227# ( ) a0: pointer to working bcd value 23228# ( ) FP_SCR1: working copy of original bcd value 23229# ( ) L_SCR1: copy of original exponent word 23230# 23231# 23232# First check the absolute value of the exponent to see if this 23233# routine is necessary. If so, then check the sign of the exponent 23234# and do append (+) or strip (-) zeros accordingly. 23235# This section handles a positive adjusted exponent. 23236# 23237ap_st_z: 23238 mov.l (%sp),%d1 # load expA for range test 23239 cmp.l %d1,&27 # test is with 27 23240 ble.w pwrten # if abs(expA) <28, skip ap/st zeros 23241 btst &30,(%a0) # check sign of exp 23242 bne.b ap_st_n # if neg, go to neg side 23243 clr.l %d1 # zero count reg 23244 mov.l (%a0),%d4 # load lword 1 to d4 23245 bfextu %d4{&28:&4},%d0 # get M16 in d0 23246 bne.b ap_p_fx # if M16 is non-zero, go fix exp 23247 addq.l &1,%d1 # inc zero count 23248 mov.l &1,%d5 # init lword counter 23249 mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4 23250 bne.b ap_p_cl # if lw 2 is zero, skip it 23251 addq.l &8,%d1 # and inc count by 8 23252 addq.l &1,%d5 # inc lword counter 23253 mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4 23254ap_p_cl: 23255 clr.l %d3 # init offset reg 23256 mov.l &7,%d2 # init digit counter 23257ap_p_gd: 23258 bfextu %d4{%d3:&4},%d0 # get digit 23259 bne.b ap_p_fx # if non-zero, go to fix exp 23260 addq.l &4,%d3 # point to next digit 23261 addq.l &1,%d1 # inc digit counter 23262 dbf.w %d2,ap_p_gd # get next digit 23263ap_p_fx: 23264 mov.l %d1,%d0 # copy counter to d2 23265 mov.l (%sp),%d1 # get adjusted exp from memory 23266 sub.l %d0,%d1 # subtract count from exp 23267 bge.b ap_p_fm # if still pos, go to pwrten 23268 neg.l %d1 # now its neg; get abs 23269 mov.l (%a0),%d4 # load lword 1 to d4 23270 or.l &0x40000000,%d4 # and set SE in d4 23271 or.l &0x40000000,(%a0) # and in memory 23272# 23273# Calculate the mantissa multiplier to compensate for the striping of 23274# zeros from the mantissa. 23275# 23276ap_p_fm: 23277 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table 23278 clr.l %d3 # init table index 23279 fmov.s &0x3f800000,%fp1 # init fp1 to 1 23280 mov.l &3,%d2 # init d2 to count bits in counter 23281ap_p_el: 23282 asr.l &1,%d0 # shift lsb into carry 23283 bcc.b ap_p_en # if 1, mul fp1 by pwrten factor 23284 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 23285ap_p_en: 23286 add.l &12,%d3 # inc d3 to next rtable entry 23287 tst.l %d0 # check if d0 is zero 23288 bne.b ap_p_el # if not, get next bit 23289 fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted) 23290 bra.b pwrten # go calc pwrten 23291# 23292# This section handles a negative adjusted exponent. 23293# 23294ap_st_n: 23295 clr.l %d1 # clr counter 23296 mov.l &2,%d5 # set up d5 to point to lword 3 23297 mov.l (%a0,%d5.L*4),%d4 # get lword 3 23298 bne.b ap_n_cl # if not zero, check digits 23299 sub.l &1,%d5 # dec d5 to point to lword 2 23300 addq.l &8,%d1 # inc counter by 8 23301 mov.l (%a0,%d5.L*4),%d4 # get lword 2 23302ap_n_cl: 23303 mov.l &28,%d3 # point to last digit 23304 mov.l &7,%d2 # init digit counter 23305ap_n_gd: 23306 bfextu %d4{%d3:&4},%d0 # get digit 23307 bne.b ap_n_fx # if non-zero, go to exp fix 23308 subq.l &4,%d3 # point to previous digit 23309 addq.l &1,%d1 # inc digit counter 23310 dbf.w %d2,ap_n_gd # get next digit 23311ap_n_fx: 23312 mov.l %d1,%d0 # copy counter to d0 23313 mov.l (%sp),%d1 # get adjusted exp from memory 23314 sub.l %d0,%d1 # subtract count from exp 23315 bgt.b ap_n_fm # if still pos, go fix mantissa 23316 neg.l %d1 # take abs of exp and clr SE 23317 mov.l (%a0),%d4 # load lword 1 to d4 23318 and.l &0xbfffffff,%d4 # and clr SE in d4 23319 and.l &0xbfffffff,(%a0) # and in memory 23320# 23321# Calculate the mantissa multiplier to compensate for the appending of 23322# zeros to the mantissa. 23323# 23324ap_n_fm: 23325 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table 23326 clr.l %d3 # init table index 23327 fmov.s &0x3f800000,%fp1 # init fp1 to 1 23328 mov.l &3,%d2 # init d2 to count bits in counter 23329ap_n_el: 23330 asr.l &1,%d0 # shift lsb into carry 23331 bcc.b ap_n_en # if 1, mul fp1 by pwrten factor 23332 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 23333ap_n_en: 23334 add.l &12,%d3 # inc d3 to next rtable entry 23335 tst.l %d0 # check if d0 is zero 23336 bne.b ap_n_el # if not, get next bit 23337 fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted) 23338# 23339# 23340# Calculate power-of-ten factor from adjusted and shifted exponent. 23341# 23342# Register usage: 23343# 23344# pwrten: 23345# (*) d0: temp 23346# ( ) d1: exponent 23347# (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp 23348# (*) d3: FPCR work copy 23349# ( ) d4: first word of bcd 23350# (*) a1: RTABLE pointer 23351# calc_p: 23352# (*) d0: temp 23353# ( ) d1: exponent 23354# (*) d3: PWRTxx table index 23355# ( ) a0: pointer to working copy of bcd 23356# (*) a1: PWRTxx pointer 23357# (*) fp1: power-of-ten accumulator 23358# 23359# Pwrten calculates the exponent factor in the selected rounding mode 23360# according to the following table: 23361# 23362# Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode 23363# 23364# ANY ANY RN RN 23365# 23366# + + RP RP 23367# - + RP RM 23368# + - RP RM 23369# - - RP RP 23370# 23371# + + RM RM 23372# - + RM RP 23373# + - RM RP 23374# - - RM RM 23375# 23376# + + RZ RM 23377# - + RZ RM 23378# + - RZ RP 23379# - - RZ RP 23380# 23381# 23382pwrten: 23383 mov.l USER_FPCR(%a6),%d3 # get user's FPCR 23384 bfextu %d3{&26:&2},%d2 # isolate rounding mode bits 23385 mov.l (%a0),%d4 # reload 1st bcd word to d4 23386 asl.l &2,%d2 # format d2 to be 23387 bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE} 23388 add.l %d0,%d2 # in d2 as index into RTABLE 23389 lea.l RTABLE(%pc),%a1 # load rtable base 23390 mov.b (%a1,%d2),%d0 # load new rounding bits from table 23391 clr.l %d3 # clear d3 to force no exc and extended 23392 bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR 23393 fmov.l %d3,%fpcr # write new FPCR 23394 asr.l &1,%d0 # write correct PTENxx table 23395 bcc.b not_rp # to a1 23396 lea.l PTENRP(%pc),%a1 # it is RP 23397 bra.b calc_p # go to init section 23398not_rp: 23399 asr.l &1,%d0 # keep checking 23400 bcc.b not_rm 23401 lea.l PTENRM(%pc),%a1 # it is RM 23402 bra.b calc_p # go to init section 23403not_rm: 23404 lea.l PTENRN(%pc),%a1 # it is RN 23405calc_p: 23406 mov.l %d1,%d0 # copy exp to d0;use d0 23407 bpl.b no_neg # if exp is negative, 23408 neg.l %d0 # invert it 23409 or.l &0x40000000,(%a0) # and set SE bit 23410no_neg: 23411 clr.l %d3 # table index 23412 fmov.s &0x3f800000,%fp1 # init fp1 to 1 23413e_loop: 23414 asr.l &1,%d0 # shift next bit into carry 23415 bcc.b e_next # if zero, skip the mul 23416 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 23417e_next: 23418 add.l &12,%d3 # inc d3 to next rtable entry 23419 tst.l %d0 # check if d0 is zero 23420 bne.b e_loop # not zero, continue shifting 23421# 23422# 23423# Check the sign of the adjusted exp and make the value in fp0 the 23424# same sign. If the exp was pos then multiply fp1*fp0; 23425# else divide fp0/fp1. 23426# 23427# Register Usage: 23428# norm: 23429# ( ) a0: pointer to working bcd value 23430# (*) fp0: mantissa accumulator 23431# ( ) fp1: scaling factor - 10**(abs(exp)) 23432# 23433pnorm: 23434 btst &30,(%a0) # test the sign of the exponent 23435 beq.b mul # if clear, go to multiply 23436div: 23437 fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp 23438 bra.b end_dec 23439mul: 23440 fmul.x %fp1,%fp0 # exp is positive, so multiply by exp 23441# 23442# 23443# Clean up and return with result in fp0. 23444# 23445# If the final mul/div in decbin incurred an inex exception, 23446# it will be inex2, but will be reported as inex1 by get_op. 23447# 23448end_dec: 23449 fmov.l %fpsr,%d0 # get status register 23450 bclr &inex2_bit+8,%d0 # test for inex2 and clear it 23451 beq.b no_exc # skip this if no exc 23452 ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX 23453no_exc: 23454 add.l &0x4,%sp # clear 1 lw param 23455 fmovm.x (%sp)+,&0x40 # restore fp1 23456 movm.l (%sp)+,&0x3c # restore d2-d5 23457 fmov.l &0x0,%fpcr 23458 fmov.l &0x0,%fpsr 23459 rts 23460 23461######################################################################### 23462# bindec(): Converts an input in extended precision format to bcd format# 23463# # 23464# INPUT *************************************************************** # 23465# a0 = pointer to the input extended precision value in memory. # 23466# the input may be either normalized, unnormalized, or # 23467# denormalized. # 23468# d0 = contains the k-factor sign-extended to 32-bits. # 23469# # 23470# OUTPUT ************************************************************** # 23471# FP_SCR0(a6) = bcd format result on the stack. # 23472# # 23473# ALGORITHM *********************************************************** # 23474# # 23475# A1. Set RM and size ext; Set SIGMA = sign of input. # 23476# The k-factor is saved for use in d7. Clear the # 23477# BINDEC_FLG for separating normalized/denormalized # 23478# input. If input is unnormalized or denormalized, # 23479# normalize it. # 23480# # 23481# A2. Set X = abs(input). # 23482# # 23483# A3. Compute ILOG. # 23484# ILOG is the log base 10 of the input value. It is # 23485# approximated by adding e + 0.f when the original # 23486# value is viewed as 2^^e * 1.f in extended precision. # 23487# This value is stored in d6. # 23488# # 23489# A4. Clr INEX bit. # 23490# The operation in A3 above may have set INEX2. # 23491# # 23492# A5. Set ICTR = 0; # 23493# ICTR is a flag used in A13. It must be set before the # 23494# loop entry A6. # 23495# # 23496# A6. Calculate LEN. # 23497# LEN is the number of digits to be displayed. The # 23498# k-factor can dictate either the total number of digits, # 23499# if it is a positive number, or the number of digits # 23500# after the decimal point which are to be included as # 23501# significant. See the 68882 manual for examples. # 23502# If LEN is computed to be greater than 17, set OPERR in # 23503# USER_FPSR. LEN is stored in d4. # 23504# # 23505# A7. Calculate SCALE. # 23506# SCALE is equal to 10^ISCALE, where ISCALE is the number # 23507# of decimal places needed to insure LEN integer digits # 23508# in the output before conversion to bcd. LAMBDA is the # 23509# sign of ISCALE, used in A9. Fp1 contains # 23510# 10^^(abs(ISCALE)) using a rounding mode which is a # 23511# function of the original rounding mode and the signs # 23512# of ISCALE and X. A table is given in the code. # 23513# # 23514# A8. Clr INEX; Force RZ. # 23515# The operation in A3 above may have set INEX2. # 23516# RZ mode is forced for the scaling operation to insure # 23517# only one rounding error. The grs bits are collected in # 23518# the INEX flag for use in A10. # 23519# # 23520# A9. Scale X -> Y. # 23521# The mantissa is scaled to the desired number of # 23522# significant digits. The excess digits are collected # 23523# in INEX2. # 23524# # 23525# A10. Or in INEX. # 23526# If INEX is set, round error occurred. This is # 23527# compensated for by 'or-ing' in the INEX2 flag to # 23528# the lsb of Y. # 23529# # 23530# A11. Restore original FPCR; set size ext. # 23531# Perform FINT operation in the user's rounding mode. # 23532# Keep the size to extended. # 23533# # 23534# A12. Calculate YINT = FINT(Y) according to user's rounding # 23535# mode. The FPSP routine sintd0 is used. The output # 23536# is in fp0. # 23537# # 23538# A13. Check for LEN digits. # 23539# If the int operation results in more than LEN digits, # 23540# or less than LEN -1 digits, adjust ILOG and repeat from # 23541# A6. This test occurs only on the first pass. If the # 23542# result is exactly 10^LEN, decrement ILOG and divide # 23543# the mantissa by 10. # 23544# # 23545# A14. Convert the mantissa to bcd. # 23546# The binstr routine is used to convert the LEN digit # 23547# mantissa to bcd in memory. The input to binstr is # 23548# to be a fraction; i.e. (mantissa)/10^LEN and adjusted # 23549# such that the decimal point is to the left of bit 63. # 23550# The bcd digits are stored in the correct position in # 23551# the final string area in memory. # 23552# # 23553# A15. Convert the exponent to bcd. # 23554# As in A14 above, the exp is converted to bcd and the # 23555# digits are stored in the final string. # 23556# Test the length of the final exponent string. If the # 23557# length is 4, set operr. # 23558# # 23559# A16. Write sign bits to final string. # 23560# # 23561######################################################################### 23562 23563set BINDEC_FLG, EXC_TEMP # DENORM flag 23564 23565# Constants in extended precision 23566PLOG2: 23567 long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000 23568PLOG2UP1: 23569 long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000 23570 23571# Constants in single precision 23572FONE: 23573 long 0x3F800000,0x00000000,0x00000000,0x00000000 23574FTWO: 23575 long 0x40000000,0x00000000,0x00000000,0x00000000 23576FTEN: 23577 long 0x41200000,0x00000000,0x00000000,0x00000000 23578F4933: 23579 long 0x459A2800,0x00000000,0x00000000,0x00000000 23580 23581RBDTBL: 23582 byte 0,0,0,0 23583 byte 3,3,2,2 23584 byte 3,2,2,3 23585 byte 2,3,3,2 23586 23587# Implementation Notes: 23588# 23589# The registers are used as follows: 23590# 23591# d0: scratch; LEN input to binstr 23592# d1: scratch 23593# d2: upper 32-bits of mantissa for binstr 23594# d3: scratch;lower 32-bits of mantissa for binstr 23595# d4: LEN 23596# d5: LAMBDA/ICTR 23597# d6: ILOG 23598# d7: k-factor 23599# a0: ptr for original operand/final result 23600# a1: scratch pointer 23601# a2: pointer to FP_X; abs(original value) in ext 23602# fp0: scratch 23603# fp1: scratch 23604# fp2: scratch 23605# F_SCR1: 23606# F_SCR2: 23607# L_SCR1: 23608# L_SCR2: 23609 23610 global bindec 23611bindec: 23612 movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2} 23613 fmovm.x &0x7,-(%sp) # {%fp0-%fp2} 23614 23615# A1. Set RM and size ext. Set SIGMA = sign input; 23616# The k-factor is saved for use in d7. Clear BINDEC_FLG for 23617# separating normalized/denormalized input. If the input 23618# is a denormalized number, set the BINDEC_FLG memory word 23619# to signal denorm. If the input is unnormalized, normalize 23620# the input and test for denormalized result. 23621# 23622 fmov.l &rm_mode*0x10,%fpcr # set RM and ext 23623 mov.l (%a0),L_SCR2(%a6) # save exponent for sign check 23624 mov.l %d0,%d7 # move k-factor to d7 23625 23626 clr.b BINDEC_FLG(%a6) # clr norm/denorm flag 23627 cmpi.b STAG(%a6),&DENORM # is input a DENORM? 23628 bne.w A2_str # no; input is a NORM 23629 23630# 23631# Normalize the denorm 23632# 23633un_de_norm: 23634 mov.w (%a0),%d0 23635 and.w &0x7fff,%d0 # strip sign of normalized exp 23636 mov.l 4(%a0),%d1 23637 mov.l 8(%a0),%d2 23638norm_loop: 23639 sub.w &1,%d0 23640 lsl.l &1,%d2 23641 roxl.l &1,%d1 23642 tst.l %d1 23643 bge.b norm_loop 23644# 23645# Test if the normalized input is denormalized 23646# 23647 tst.w %d0 23648 bgt.b pos_exp # if greater than zero, it is a norm 23649 st BINDEC_FLG(%a6) # set flag for denorm 23650pos_exp: 23651 and.w &0x7fff,%d0 # strip sign of normalized exp 23652 mov.w %d0,(%a0) 23653 mov.l %d1,4(%a0) 23654 mov.l %d2,8(%a0) 23655 23656# A2. Set X = abs(input). 23657# 23658A2_str: 23659 mov.l (%a0),FP_SCR1(%a6) # move input to work space 23660 mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space 23661 mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space 23662 and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X) 23663 23664# A3. Compute ILOG. 23665# ILOG is the log base 10 of the input value. It is approx- 23666# imated by adding e + 0.f when the original value is viewed 23667# as 2^^e * 1.f in extended precision. This value is stored 23668# in d6. 23669# 23670# Register usage: 23671# Input/Output 23672# d0: k-factor/exponent 23673# d2: x/x 23674# d3: x/x 23675# d4: x/x 23676# d5: x/x 23677# d6: x/ILOG 23678# d7: k-factor/Unchanged 23679# a0: ptr for original operand/final result 23680# a1: x/x 23681# a2: x/x 23682# fp0: x/float(ILOG) 23683# fp1: x/x 23684# fp2: x/x 23685# F_SCR1:x/x 23686# F_SCR2:Abs(X)/Abs(X) with $3fff exponent 23687# L_SCR1:x/x 23688# L_SCR2:first word of X packed/Unchanged 23689 23690 tst.b BINDEC_FLG(%a6) # check for denorm 23691 beq.b A3_cont # if clr, continue with norm 23692 mov.l &-4933,%d6 # force ILOG = -4933 23693 bra.b A4_str 23694A3_cont: 23695 mov.w FP_SCR1(%a6),%d0 # move exp to d0 23696 mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff 23697 fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f 23698 sub.w &0x3fff,%d0 # strip off bias 23699 fadd.w %d0,%fp0 # add in exp 23700 fsub.s FONE(%pc),%fp0 # subtract off 1.0 23701 fbge.w pos_res # if pos, branch 23702 fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1 23703 fmov.l %fp0,%d6 # put ILOG in d6 as a lword 23704 bra.b A4_str # go move out ILOG 23705pos_res: 23706 fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2 23707 fmov.l %fp0,%d6 # put ILOG in d6 as a lword 23708 23709 23710# A4. Clr INEX bit. 23711# The operation in A3 above may have set INEX2. 23712 23713A4_str: 23714 fmov.l &0,%fpsr # zero all of fpsr - nothing needed 23715 23716 23717# A5. Set ICTR = 0; 23718# ICTR is a flag used in A13. It must be set before the 23719# loop entry A6. The lower word of d5 is used for ICTR. 23720 23721 clr.w %d5 # clear ICTR 23722 23723# A6. Calculate LEN. 23724# LEN is the number of digits to be displayed. The k-factor 23725# can dictate either the total number of digits, if it is 23726# a positive number, or the number of digits after the 23727# original decimal point which are to be included as 23728# significant. See the 68882 manual for examples. 23729# If LEN is computed to be greater than 17, set OPERR in 23730# USER_FPSR. LEN is stored in d4. 23731# 23732# Register usage: 23733# Input/Output 23734# d0: exponent/Unchanged 23735# d2: x/x/scratch 23736# d3: x/x 23737# d4: exc picture/LEN 23738# d5: ICTR/Unchanged 23739# d6: ILOG/Unchanged 23740# d7: k-factor/Unchanged 23741# a0: ptr for original operand/final result 23742# a1: x/x 23743# a2: x/x 23744# fp0: float(ILOG)/Unchanged 23745# fp1: x/x 23746# fp2: x/x 23747# F_SCR1:x/x 23748# F_SCR2:Abs(X) with $3fff exponent/Unchanged 23749# L_SCR1:x/x 23750# L_SCR2:first word of X packed/Unchanged 23751 23752A6_str: 23753 tst.l %d7 # branch on sign of k 23754 ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k 23755 mov.l %d7,%d4 # if k > 0, LEN = k 23756 bra.b len_ck # skip to LEN check 23757k_neg: 23758 mov.l %d6,%d4 # first load ILOG to d4 23759 sub.l %d7,%d4 # subtract off k 23760 addq.l &1,%d4 # add in the 1 23761len_ck: 23762 tst.l %d4 # LEN check: branch on sign of LEN 23763 ble.b LEN_ng # if neg, set LEN = 1 23764 cmp.l %d4,&17 # test if LEN > 17 23765 ble.b A7_str # if not, forget it 23766 mov.l &17,%d4 # set max LEN = 17 23767 tst.l %d7 # if negative, never set OPERR 23768 ble.b A7_str # if positive, continue 23769 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR 23770 bra.b A7_str # finished here 23771LEN_ng: 23772 mov.l &1,%d4 # min LEN is 1 23773 23774 23775# A7. Calculate SCALE. 23776# SCALE is equal to 10^ISCALE, where ISCALE is the number 23777# of decimal places needed to insure LEN integer digits 23778# in the output before conversion to bcd. LAMBDA is the sign 23779# of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using 23780# the rounding mode as given in the following table (see 23781# Coonen, p. 7.23 as ref.; however, the SCALE variable is 23782# of opposite sign in bindec.sa from Coonen). 23783# 23784# Initial USE 23785# FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5] 23786# ---------------------------------------------- 23787# RN 00 0 0 00/0 RN 23788# RN 00 0 1 00/0 RN 23789# RN 00 1 0 00/0 RN 23790# RN 00 1 1 00/0 RN 23791# RZ 01 0 0 11/3 RP 23792# RZ 01 0 1 11/3 RP 23793# RZ 01 1 0 10/2 RM 23794# RZ 01 1 1 10/2 RM 23795# RM 10 0 0 11/3 RP 23796# RM 10 0 1 10/2 RM 23797# RM 10 1 0 10/2 RM 23798# RM 10 1 1 11/3 RP 23799# RP 11 0 0 10/2 RM 23800# RP 11 0 1 11/3 RP 23801# RP 11 1 0 11/3 RP 23802# RP 11 1 1 10/2 RM 23803# 23804# Register usage: 23805# Input/Output 23806# d0: exponent/scratch - final is 0 23807# d2: x/0 or 24 for A9 23808# d3: x/scratch - offset ptr into PTENRM array 23809# d4: LEN/Unchanged 23810# d5: 0/ICTR:LAMBDA 23811# d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k)) 23812# d7: k-factor/Unchanged 23813# a0: ptr for original operand/final result 23814# a1: x/ptr to PTENRM array 23815# a2: x/x 23816# fp0: float(ILOG)/Unchanged 23817# fp1: x/10^ISCALE 23818# fp2: x/x 23819# F_SCR1:x/x 23820# F_SCR2:Abs(X) with $3fff exponent/Unchanged 23821# L_SCR1:x/x 23822# L_SCR2:first word of X packed/Unchanged 23823 23824A7_str: 23825 tst.l %d7 # test sign of k 23826 bgt.b k_pos # if pos and > 0, skip this 23827 cmp.l %d7,%d6 # test k - ILOG 23828 blt.b k_pos # if ILOG >= k, skip this 23829 mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k 23830k_pos: 23831 mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0 23832 addq.l &1,%d0 # add the 1 23833 sub.l %d4,%d0 # sub off LEN 23834 swap %d5 # use upper word of d5 for LAMBDA 23835 clr.w %d5 # set it zero initially 23836 clr.w %d2 # set up d2 for very small case 23837 tst.l %d0 # test sign of ISCALE 23838 bge.b iscale # if pos, skip next inst 23839 addq.w &1,%d5 # if neg, set LAMBDA true 23840 cmp.l %d0,&0xffffecd4 # test iscale <= -4908 23841 bgt.b no_inf # if false, skip rest 23842 add.l &24,%d0 # add in 24 to iscale 23843 mov.l &24,%d2 # put 24 in d2 for A9 23844no_inf: 23845 neg.l %d0 # and take abs of ISCALE 23846iscale: 23847 fmov.s FONE(%pc),%fp1 # init fp1 to 1 23848 bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits 23849 lsl.w &1,%d1 # put them in bits 2:1 23850 add.w %d5,%d1 # add in LAMBDA 23851 lsl.w &1,%d1 # put them in bits 3:1 23852 tst.l L_SCR2(%a6) # test sign of original x 23853 bge.b x_pos # if pos, don't set bit 0 23854 addq.l &1,%d1 # if neg, set bit 0 23855x_pos: 23856 lea.l RBDTBL(%pc),%a2 # load rbdtbl base 23857 mov.b (%a2,%d1),%d3 # load d3 with new rmode 23858 lsl.l &4,%d3 # put bits in proper position 23859 fmov.l %d3,%fpcr # load bits into fpu 23860 lsr.l &4,%d3 # put bits in proper position 23861 tst.b %d3 # decode new rmode for pten table 23862 bne.b not_rn # if zero, it is RN 23863 lea.l PTENRN(%pc),%a1 # load a1 with RN table base 23864 bra.b rmode # exit decode 23865not_rn: 23866 lsr.b &1,%d3 # get lsb in carry 23867 bcc.b not_rp2 # if carry clear, it is RM 23868 lea.l PTENRP(%pc),%a1 # load a1 with RP table base 23869 bra.b rmode # exit decode 23870not_rp2: 23871 lea.l PTENRM(%pc),%a1 # load a1 with RM table base 23872rmode: 23873 clr.l %d3 # clr table index 23874e_loop2: 23875 lsr.l &1,%d0 # shift next bit into carry 23876 bcc.b e_next2 # if zero, skip the mul 23877 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 23878e_next2: 23879 add.l &12,%d3 # inc d3 to next pwrten table entry 23880 tst.l %d0 # test if ISCALE is zero 23881 bne.b e_loop2 # if not, loop 23882 23883# A8. Clr INEX; Force RZ. 23884# The operation in A3 above may have set INEX2. 23885# RZ mode is forced for the scaling operation to insure 23886# only one rounding error. The grs bits are collected in 23887# the INEX flag for use in A10. 23888# 23889# Register usage: 23890# Input/Output 23891 23892 fmov.l &0,%fpsr # clr INEX 23893 fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode 23894 23895# A9. Scale X -> Y. 23896# The mantissa is scaled to the desired number of significant 23897# digits. The excess digits are collected in INEX2. If mul, 23898# Check d2 for excess 10 exponential value. If not zero, 23899# the iscale value would have caused the pwrten calculation 23900# to overflow. Only a negative iscale can cause this, so 23901# multiply by 10^(d2), which is now only allowed to be 24, 23902# with a multiply by 10^8 and 10^16, which is exact since 23903# 10^24 is exact. If the input was denormalized, we must 23904# create a busy stack frame with the mul command and the 23905# two operands, and allow the fpu to complete the multiply. 23906# 23907# Register usage: 23908# Input/Output 23909# d0: FPCR with RZ mode/Unchanged 23910# d2: 0 or 24/unchanged 23911# d3: x/x 23912# d4: LEN/Unchanged 23913# d5: ICTR:LAMBDA 23914# d6: ILOG/Unchanged 23915# d7: k-factor/Unchanged 23916# a0: ptr for original operand/final result 23917# a1: ptr to PTENRM array/Unchanged 23918# a2: x/x 23919# fp0: float(ILOG)/X adjusted for SCALE (Y) 23920# fp1: 10^ISCALE/Unchanged 23921# fp2: x/x 23922# F_SCR1:x/x 23923# F_SCR2:Abs(X) with $3fff exponent/Unchanged 23924# L_SCR1:x/x 23925# L_SCR2:first word of X packed/Unchanged 23926 23927A9_str: 23928 fmov.x (%a0),%fp0 # load X from memory 23929 fabs.x %fp0 # use abs(X) 23930 tst.w %d5 # LAMBDA is in lower word of d5 23931 bne.b sc_mul # if neg (LAMBDA = 1), scale by mul 23932 fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0 23933 bra.w A10_st # branch to A10 23934 23935sc_mul: 23936 tst.b BINDEC_FLG(%a6) # check for denorm 23937 beq.w A9_norm # if norm, continue with mul 23938 23939# for DENORM, we must calculate: 23940# fp0 = input_op * 10^ISCALE * 10^24 23941# since the input operand is a DENORM, we can't multiply it directly. 23942# so, we do the multiplication of the exponents and mantissas separately. 23943# in this way, we avoid underflow on intermediate stages of the 23944# multiplication and guarantee a result without exception. 23945 fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack 23946 23947 mov.w (%sp),%d3 # grab exponent 23948 andi.w &0x7fff,%d3 # clear sign 23949 ori.w &0x8000,(%a0) # make DENORM exp negative 23950 add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp 23951 subi.w &0x3fff,%d3 # subtract BIAS 23952 add.w 36(%a1),%d3 23953 subi.w &0x3fff,%d3 # subtract BIAS 23954 add.w 48(%a1),%d3 23955 subi.w &0x3fff,%d3 # subtract BIAS 23956 23957 bmi.w sc_mul_err # is result is DENORM, punt!!! 23958 23959 andi.w &0x8000,(%sp) # keep sign 23960 or.w %d3,(%sp) # insert new exponent 23961 andi.w &0x7fff,(%a0) # clear sign bit on DENORM again 23962 mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk 23963 mov.l 0x4(%a0),-(%sp) 23964 mov.l &0x3fff0000,-(%sp) # force exp to zero 23965 fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0 23966 fmul.x (%sp)+,%fp0 23967 23968# fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8 23969# fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16 23970 mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa 23971 mov.l 36+4(%a1),-(%sp) 23972 mov.l &0x3fff0000,-(%sp) # force exp to zero 23973 mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa 23974 mov.l 48+4(%a1),-(%sp) 23975 mov.l &0x3fff0000,-(%sp)# force exp to zero 23976 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8 23977 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16 23978 bra.b A10_st 23979 23980sc_mul_err: 23981 bra.b sc_mul_err 23982 23983A9_norm: 23984 tst.w %d2 # test for small exp case 23985 beq.b A9_con # if zero, continue as normal 23986 fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8 23987 fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16 23988A9_con: 23989 fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0 23990 23991# A10. Or in INEX. 23992# If INEX is set, round error occurred. This is compensated 23993# for by 'or-ing' in the INEX2 flag to the lsb of Y. 23994# 23995# Register usage: 23996# Input/Output 23997# d0: FPCR with RZ mode/FPSR with INEX2 isolated 23998# d2: x/x 23999# d3: x/x 24000# d4: LEN/Unchanged 24001# d5: ICTR:LAMBDA 24002# d6: ILOG/Unchanged 24003# d7: k-factor/Unchanged 24004# a0: ptr for original operand/final result 24005# a1: ptr to PTENxx array/Unchanged 24006# a2: x/ptr to FP_SCR1(a6) 24007# fp0: Y/Y with lsb adjusted 24008# fp1: 10^ISCALE/Unchanged 24009# fp2: x/x 24010 24011A10_st: 24012 fmov.l %fpsr,%d0 # get FPSR 24013 fmov.x %fp0,FP_SCR1(%a6) # move Y to memory 24014 lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1 24015 btst &9,%d0 # check if INEX2 set 24016 beq.b A11_st # if clear, skip rest 24017 or.l &1,8(%a2) # or in 1 to lsb of mantissa 24018 fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu 24019 24020 24021# A11. Restore original FPCR; set size ext. 24022# Perform FINT operation in the user's rounding mode. Keep 24023# the size to extended. The sintdo entry point in the sint 24024# routine expects the FPCR value to be in USER_FPCR for 24025# mode and precision. The original FPCR is saved in L_SCR1. 24026 24027A11_st: 24028 mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later 24029 and.l &0x00000030,USER_FPCR(%a6) # set size to ext, 24030# ;block exceptions 24031 24032 24033# A12. Calculate YINT = FINT(Y) according to user's rounding mode. 24034# The FPSP routine sintd0 is used. The output is in fp0. 24035# 24036# Register usage: 24037# Input/Output 24038# d0: FPSR with AINEX cleared/FPCR with size set to ext 24039# d2: x/x/scratch 24040# d3: x/x 24041# d4: LEN/Unchanged 24042# d5: ICTR:LAMBDA/Unchanged 24043# d6: ILOG/Unchanged 24044# d7: k-factor/Unchanged 24045# a0: ptr for original operand/src ptr for sintdo 24046# a1: ptr to PTENxx array/Unchanged 24047# a2: ptr to FP_SCR1(a6)/Unchanged 24048# a6: temp pointer to FP_SCR1(a6) - orig value saved and restored 24049# fp0: Y/YINT 24050# fp1: 10^ISCALE/Unchanged 24051# fp2: x/x 24052# F_SCR1:x/x 24053# F_SCR2:Y adjusted for inex/Y with original exponent 24054# L_SCR1:x/original USER_FPCR 24055# L_SCR2:first word of X packed/Unchanged 24056 24057A12_st: 24058 movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1} 24059 mov.l L_SCR1(%a6),-(%sp) 24060 mov.l L_SCR2(%a6),-(%sp) 24061 24062 lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6) 24063 fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6) 24064 tst.l L_SCR2(%a6) # test sign of original operand 24065 bge.b do_fint12 # if pos, use Y 24066 or.l &0x80000000,(%a0) # if neg, use -Y 24067do_fint12: 24068 mov.l USER_FPSR(%a6),-(%sp) 24069# bsr sintdo # sint routine returns int in fp0 24070 24071 fmov.l USER_FPCR(%a6),%fpcr 24072 fmov.l &0x0,%fpsr # clear the AEXC bits!!! 24073## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode 24074## andi.l &0x00000030,%d0 24075## fmov.l %d0,%fpcr 24076 fint.x FP_SCR1(%a6),%fp0 # do fint() 24077 fmov.l %fpsr,%d0 24078 or.w %d0,FPSR_EXCEPT(%a6) 24079## fmov.l &0x0,%fpcr 24080## fmov.l %fpsr,%d0 # don't keep ccodes 24081## or.w %d0,FPSR_EXCEPT(%a6) 24082 24083 mov.b (%sp),USER_FPSR(%a6) 24084 add.l &4,%sp 24085 24086 mov.l (%sp)+,L_SCR2(%a6) 24087 mov.l (%sp)+,L_SCR1(%a6) 24088 movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1} 24089 24090 mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent 24091 mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR 24092 24093# A13. Check for LEN digits. 24094# If the int operation results in more than LEN digits, 24095# or less than LEN -1 digits, adjust ILOG and repeat from 24096# A6. This test occurs only on the first pass. If the 24097# result is exactly 10^LEN, decrement ILOG and divide 24098# the mantissa by 10. The calculation of 10^LEN cannot 24099# be inexact, since all powers of ten up to 10^27 are exact 24100# in extended precision, so the use of a previous power-of-ten 24101# table will introduce no error. 24102# 24103# 24104# Register usage: 24105# Input/Output 24106# d0: FPCR with size set to ext/scratch final = 0 24107# d2: x/x 24108# d3: x/scratch final = x 24109# d4: LEN/LEN adjusted 24110# d5: ICTR:LAMBDA/LAMBDA:ICTR 24111# d6: ILOG/ILOG adjusted 24112# d7: k-factor/Unchanged 24113# a0: pointer into memory for packed bcd string formation 24114# a1: ptr to PTENxx array/Unchanged 24115# a2: ptr to FP_SCR1(a6)/Unchanged 24116# fp0: int portion of Y/abs(YINT) adjusted 24117# fp1: 10^ISCALE/Unchanged 24118# fp2: x/10^LEN 24119# F_SCR1:x/x 24120# F_SCR2:Y with original exponent/Unchanged 24121# L_SCR1:original USER_FPCR/Unchanged 24122# L_SCR2:first word of X packed/Unchanged 24123 24124A13_st: 24125 swap %d5 # put ICTR in lower word of d5 24126 tst.w %d5 # check if ICTR = 0 24127 bne not_zr # if non-zero, go to second test 24128# 24129# Compute 10^(LEN-1) 24130# 24131 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0 24132 mov.l %d4,%d0 # put LEN in d0 24133 subq.l &1,%d0 # d0 = LEN -1 24134 clr.l %d3 # clr table index 24135l_loop: 24136 lsr.l &1,%d0 # shift next bit into carry 24137 bcc.b l_next # if zero, skip the mul 24138 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no) 24139l_next: 24140 add.l &12,%d3 # inc d3 to next pwrten table entry 24141 tst.l %d0 # test if LEN is zero 24142 bne.b l_loop # if not, loop 24143# 24144# 10^LEN-1 is computed for this test and A14. If the input was 24145# denormalized, check only the case in which YINT > 10^LEN. 24146# 24147 tst.b BINDEC_FLG(%a6) # check if input was norm 24148 beq.b A13_con # if norm, continue with checking 24149 fabs.x %fp0 # take abs of YINT 24150 bra test_2 24151# 24152# Compare abs(YINT) to 10^(LEN-1) and 10^LEN 24153# 24154A13_con: 24155 fabs.x %fp0 # take abs of YINT 24156 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1) 24157 fbge.w test_2 # if greater, do next test 24158 subq.l &1,%d6 # subtract 1 from ILOG 24159 mov.w &1,%d5 # set ICTR 24160 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM 24161 fmul.s FTEN(%pc),%fp2 # compute 10^LEN 24162 bra.w A6_str # return to A6 and recompute YINT 24163test_2: 24164 fmul.s FTEN(%pc),%fp2 # compute 10^LEN 24165 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN 24166 fblt.w A14_st # if less, all is ok, go to A14 24167 fbgt.w fix_ex # if greater, fix and redo 24168 fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10 24169 addq.l &1,%d6 # and inc ILOG 24170 bra.b A14_st # and continue elsewhere 24171fix_ex: 24172 addq.l &1,%d6 # increment ILOG by 1 24173 mov.w &1,%d5 # set ICTR 24174 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM 24175 bra.w A6_str # return to A6 and recompute YINT 24176# 24177# Since ICTR <> 0, we have already been through one adjustment, 24178# and shouldn't have another; this is to check if abs(YINT) = 10^LEN 24179# 10^LEN is again computed using whatever table is in a1 since the 24180# value calculated cannot be inexact. 24181# 24182not_zr: 24183 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0 24184 mov.l %d4,%d0 # put LEN in d0 24185 clr.l %d3 # clr table index 24186z_loop: 24187 lsr.l &1,%d0 # shift next bit into carry 24188 bcc.b z_next # if zero, skip the mul 24189 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no) 24190z_next: 24191 add.l &12,%d3 # inc d3 to next pwrten table entry 24192 tst.l %d0 # test if LEN is zero 24193 bne.b z_loop # if not, loop 24194 fabs.x %fp0 # get abs(YINT) 24195 fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN 24196 fbneq.w A14_st # if not, skip this 24197 fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10 24198 addq.l &1,%d6 # and inc ILOG by 1 24199 addq.l &1,%d4 # and inc LEN 24200 fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN 24201 24202# A14. Convert the mantissa to bcd. 24203# The binstr routine is used to convert the LEN digit 24204# mantissa to bcd in memory. The input to binstr is 24205# to be a fraction; i.e. (mantissa)/10^LEN and adjusted 24206# such that the decimal point is to the left of bit 63. 24207# The bcd digits are stored in the correct position in 24208# the final string area in memory. 24209# 24210# 24211# Register usage: 24212# Input/Output 24213# d0: x/LEN call to binstr - final is 0 24214# d1: x/0 24215# d2: x/ms 32-bits of mant of abs(YINT) 24216# d3: x/ls 32-bits of mant of abs(YINT) 24217# d4: LEN/Unchanged 24218# d5: ICTR:LAMBDA/LAMBDA:ICTR 24219# d6: ILOG 24220# d7: k-factor/Unchanged 24221# a0: pointer into memory for packed bcd string formation 24222# /ptr to first mantissa byte in result string 24223# a1: ptr to PTENxx array/Unchanged 24224# a2: ptr to FP_SCR1(a6)/Unchanged 24225# fp0: int portion of Y/abs(YINT) adjusted 24226# fp1: 10^ISCALE/Unchanged 24227# fp2: 10^LEN/Unchanged 24228# F_SCR1:x/Work area for final result 24229# F_SCR2:Y with original exponent/Unchanged 24230# L_SCR1:original USER_FPCR/Unchanged 24231# L_SCR2:first word of X packed/Unchanged 24232 24233A14_st: 24234 fmov.l &rz_mode*0x10,%fpcr # force rz for conversion 24235 fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN 24236 lea.l FP_SCR0(%a6),%a0 24237 fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory 24238 mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2 24239 mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3 24240 clr.l 4(%a0) # zero word 2 of FP_RES 24241 clr.l 8(%a0) # zero word 3 of FP_RES 24242 mov.l (%a0),%d0 # move exponent to d0 24243 swap %d0 # put exponent in lower word 24244 beq.b no_sft # if zero, don't shift 24245 sub.l &0x3ffd,%d0 # sub bias less 2 to make fract 24246 tst.l %d0 # check if > 1 24247 bgt.b no_sft # if so, don't shift 24248 neg.l %d0 # make exp positive 24249m_loop: 24250 lsr.l &1,%d2 # shift d2:d3 right, add 0s 24251 roxr.l &1,%d3 # the number of places 24252 dbf.w %d0,m_loop # given in d0 24253no_sft: 24254 tst.l %d2 # check for mantissa of zero 24255 bne.b no_zr # if not, go on 24256 tst.l %d3 # continue zero check 24257 beq.b zer_m # if zero, go directly to binstr 24258no_zr: 24259 clr.l %d1 # put zero in d1 for addx 24260 add.l &0x00000080,%d3 # inc at bit 7 24261 addx.l %d1,%d2 # continue inc 24262 and.l &0xffffff80,%d3 # strip off lsb not used by 882 24263zer_m: 24264 mov.l %d4,%d0 # put LEN in d0 for binstr call 24265 addq.l &3,%a0 # a0 points to M16 byte in result 24266 bsr binstr # call binstr to convert mant 24267 24268 24269# A15. Convert the exponent to bcd. 24270# As in A14 above, the exp is converted to bcd and the 24271# digits are stored in the final string. 24272# 24273# Digits are stored in L_SCR1(a6) on return from BINDEC as: 24274# 24275# 32 16 15 0 24276# ----------------------------------------- 24277# | 0 | e3 | e2 | e1 | e4 | X | X | X | 24278# ----------------------------------------- 24279# 24280# And are moved into their proper places in FP_SCR0. If digit e4 24281# is non-zero, OPERR is signaled. In all cases, all 4 digits are 24282# written as specified in the 881/882 manual for packed decimal. 24283# 24284# Register usage: 24285# Input/Output 24286# d0: x/LEN call to binstr - final is 0 24287# d1: x/scratch (0);shift count for final exponent packing 24288# d2: x/ms 32-bits of exp fraction/scratch 24289# d3: x/ls 32-bits of exp fraction 24290# d4: LEN/Unchanged 24291# d5: ICTR:LAMBDA/LAMBDA:ICTR 24292# d6: ILOG 24293# d7: k-factor/Unchanged 24294# a0: ptr to result string/ptr to L_SCR1(a6) 24295# a1: ptr to PTENxx array/Unchanged 24296# a2: ptr to FP_SCR1(a6)/Unchanged 24297# fp0: abs(YINT) adjusted/float(ILOG) 24298# fp1: 10^ISCALE/Unchanged 24299# fp2: 10^LEN/Unchanged 24300# F_SCR1:Work area for final result/BCD result 24301# F_SCR2:Y with original exponent/ILOG/10^4 24302# L_SCR1:original USER_FPCR/Exponent digits on return from binstr 24303# L_SCR2:first word of X packed/Unchanged 24304 24305A15_st: 24306 tst.b BINDEC_FLG(%a6) # check for denorm 24307 beq.b not_denorm 24308 ftest.x %fp0 # test for zero 24309 fbeq.w den_zero # if zero, use k-factor or 4933 24310 fmov.l %d6,%fp0 # float ILOG 24311 fabs.x %fp0 # get abs of ILOG 24312 bra.b convrt 24313den_zero: 24314 tst.l %d7 # check sign of the k-factor 24315 blt.b use_ilog # if negative, use ILOG 24316 fmov.s F4933(%pc),%fp0 # force exponent to 4933 24317 bra.b convrt # do it 24318use_ilog: 24319 fmov.l %d6,%fp0 # float ILOG 24320 fabs.x %fp0 # get abs of ILOG 24321 bra.b convrt 24322not_denorm: 24323 ftest.x %fp0 # test for zero 24324 fbneq.w not_zero # if zero, force exponent 24325 fmov.s FONE(%pc),%fp0 # force exponent to 1 24326 bra.b convrt # do it 24327not_zero: 24328 fmov.l %d6,%fp0 # float ILOG 24329 fabs.x %fp0 # get abs of ILOG 24330convrt: 24331 fdiv.x 24(%a1),%fp0 # compute ILOG/10^4 24332 fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory 24333 mov.l 4(%a2),%d2 # move word 2 to d2 24334 mov.l 8(%a2),%d3 # move word 3 to d3 24335 mov.w (%a2),%d0 # move exp to d0 24336 beq.b x_loop_fin # if zero, skip the shift 24337 sub.w &0x3ffd,%d0 # subtract off bias 24338 neg.w %d0 # make exp positive 24339x_loop: 24340 lsr.l &1,%d2 # shift d2:d3 right 24341 roxr.l &1,%d3 # the number of places 24342 dbf.w %d0,x_loop # given in d0 24343x_loop_fin: 24344 clr.l %d1 # put zero in d1 for addx 24345 add.l &0x00000080,%d3 # inc at bit 6 24346 addx.l %d1,%d2 # continue inc 24347 and.l &0xffffff80,%d3 # strip off lsb not used by 882 24348 mov.l &4,%d0 # put 4 in d0 for binstr call 24349 lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits 24350 bsr binstr # call binstr to convert exp 24351 mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0 24352 mov.l &12,%d1 # use d1 for shift count 24353 lsr.l %d1,%d0 # shift d0 right by 12 24354 bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0 24355 lsr.l %d1,%d0 # shift d0 right by 12 24356 bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0 24357 tst.b %d0 # check if e4 is zero 24358 beq.b A16_st # if zero, skip rest 24359 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR 24360 24361 24362# A16. Write sign bits to final string. 24363# Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG). 24364# 24365# Register usage: 24366# Input/Output 24367# d0: x/scratch - final is x 24368# d2: x/x 24369# d3: x/x 24370# d4: LEN/Unchanged 24371# d5: ICTR:LAMBDA/LAMBDA:ICTR 24372# d6: ILOG/ILOG adjusted 24373# d7: k-factor/Unchanged 24374# a0: ptr to L_SCR1(a6)/Unchanged 24375# a1: ptr to PTENxx array/Unchanged 24376# a2: ptr to FP_SCR1(a6)/Unchanged 24377# fp0: float(ILOG)/Unchanged 24378# fp1: 10^ISCALE/Unchanged 24379# fp2: 10^LEN/Unchanged 24380# F_SCR1:BCD result with correct signs 24381# F_SCR2:ILOG/10^4 24382# L_SCR1:Exponent digits on return from binstr 24383# L_SCR2:first word of X packed/Unchanged 24384 24385A16_st: 24386 clr.l %d0 # clr d0 for collection of signs 24387 and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0 24388 tst.l L_SCR2(%a6) # check sign of original mantissa 24389 bge.b mant_p # if pos, don't set SM 24390 mov.l &2,%d0 # move 2 in to d0 for SM 24391mant_p: 24392 tst.l %d6 # check sign of ILOG 24393 bge.b wr_sgn # if pos, don't set SE 24394 addq.l &1,%d0 # set bit 0 in d0 for SE 24395wr_sgn: 24396 bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0 24397 24398# Clean up and restore all registers used. 24399 24400 fmov.l &0,%fpsr # clear possible inex2/ainex bits 24401 fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2} 24402 movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2} 24403 rts 24404 24405 global PTENRN 24406PTENRN: 24407 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 24408 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 24409 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 24410 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 24411 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 24412 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32 24413 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64 24414 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128 24415 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256 24416 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512 24417 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024 24418 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048 24419 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096 24420 24421 global PTENRP 24422PTENRP: 24423 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 24424 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 24425 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 24426 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 24427 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 24428 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32 24429 long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64 24430 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128 24431 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256 24432 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512 24433 long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024 24434 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048 24435 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096 24436 24437 global PTENRM 24438PTENRM: 24439 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 24440 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 24441 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 24442 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 24443 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 24444 long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32 24445 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64 24446 long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128 24447 long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256 24448 long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512 24449 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024 24450 long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048 24451 long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096 24452 24453######################################################################### 24454# binstr(): Converts a 64-bit binary integer to bcd. # 24455# # 24456# INPUT *************************************************************** # 24457# d2:d3 = 64-bit binary integer # 24458# d0 = desired length (LEN) # 24459# a0 = pointer to start in memory for bcd characters # 24460# (This pointer must point to byte 4 of the first # 24461# lword of the packed decimal memory string.) # 24462# # 24463# OUTPUT ************************************************************** # 24464# a0 = pointer to LEN bcd digits representing the 64-bit integer. # 24465# # 24466# ALGORITHM *********************************************************** # 24467# The 64-bit binary is assumed to have a decimal point before # 24468# bit 63. The fraction is multiplied by 10 using a mul by 2 # 24469# shift and a mul by 8 shift. The bits shifted out of the # 24470# msb form a decimal digit. This process is iterated until # 24471# LEN digits are formed. # 24472# # 24473# A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the # 24474# digit formed will be assumed the least significant. This is # 24475# to force the first byte formed to have a 0 in the upper 4 bits. # 24476# # 24477# A2. Beginning of the loop: # 24478# Copy the fraction in d2:d3 to d4:d5. # 24479# # 24480# A3. Multiply the fraction in d2:d3 by 8 using bit-field # 24481# extracts and shifts. The three msbs from d2 will go into d1. # 24482# # 24483# A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb # 24484# will be collected by the carry. # 24485# # 24486# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 # 24487# into d2:d3. D1 will contain the bcd digit formed. # 24488# # 24489# A6. Test d7. If zero, the digit formed is the ms digit. If non- # 24490# zero, it is the ls digit. Put the digit in its place in the # 24491# upper word of d0. If it is the ls digit, write the word # 24492# from d0 to memory. # 24493# # 24494# A7. Decrement d6 (LEN counter) and repeat the loop until zero. # 24495# # 24496######################################################################### 24497 24498# Implementation Notes: 24499# 24500# The registers are used as follows: 24501# 24502# d0: LEN counter 24503# d1: temp used to form the digit 24504# d2: upper 32-bits of fraction for mul by 8 24505# d3: lower 32-bits of fraction for mul by 8 24506# d4: upper 32-bits of fraction for mul by 2 24507# d5: lower 32-bits of fraction for mul by 2 24508# d6: temp for bit-field extracts 24509# d7: byte digit formation word;digit count {0,1} 24510# a0: pointer into memory for packed bcd string formation 24511# 24512 24513 global binstr 24514binstr: 24515 movm.l &0xff00,-(%sp) # {%d0-%d7} 24516 24517# 24518# A1: Init d7 24519# 24520 mov.l &1,%d7 # init d7 for second digit 24521 subq.l &1,%d0 # for dbf d0 would have LEN+1 passes 24522# 24523# A2. Copy d2:d3 to d4:d5. Start loop. 24524# 24525loop: 24526 mov.l %d2,%d4 # copy the fraction before muls 24527 mov.l %d3,%d5 # to d4:d5 24528# 24529# A3. Multiply d2:d3 by 8; extract msbs into d1. 24530# 24531 bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1 24532 asl.l &3,%d2 # shift d2 left by 3 places 24533 bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6 24534 asl.l &3,%d3 # shift d3 left by 3 places 24535 or.l %d6,%d2 # or in msbs from d3 into d2 24536# 24537# A4. Multiply d4:d5 by 2; add carry out to d1. 24538# 24539 asl.l &1,%d5 # mul d5 by 2 24540 roxl.l &1,%d4 # mul d4 by 2 24541 swap %d6 # put 0 in d6 lower word 24542 addx.w %d6,%d1 # add in extend from mul by 2 24543# 24544# A5. Add mul by 8 to mul by 2. D1 contains the digit formed. 24545# 24546 add.l %d5,%d3 # add lower 32 bits 24547 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90) 24548 addx.l %d4,%d2 # add with extend upper 32 bits 24549 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90) 24550 addx.w %d6,%d1 # add in extend from add to d1 24551 swap %d6 # with d6 = 0; put 0 in upper word 24552# 24553# A6. Test d7 and branch. 24554# 24555 tst.w %d7 # if zero, store digit & to loop 24556 beq.b first_d # if non-zero, form byte & write 24557sec_d: 24558 swap %d7 # bring first digit to word d7b 24559 asl.w &4,%d7 # first digit in upper 4 bits d7b 24560 add.w %d1,%d7 # add in ls digit to d7b 24561 mov.b %d7,(%a0)+ # store d7b byte in memory 24562 swap %d7 # put LEN counter in word d7a 24563 clr.w %d7 # set d7a to signal no digits done 24564 dbf.w %d0,loop # do loop some more! 24565 bra.b end_bstr # finished, so exit 24566first_d: 24567 swap %d7 # put digit word in d7b 24568 mov.w %d1,%d7 # put new digit in d7b 24569 swap %d7 # put LEN counter in word d7a 24570 addq.w &1,%d7 # set d7a to signal first digit done 24571 dbf.w %d0,loop # do loop some more! 24572 swap %d7 # put last digit in string 24573 lsl.w &4,%d7 # move it to upper 4 bits 24574 mov.b %d7,(%a0)+ # store it in memory string 24575# 24576# Clean up and return with result in fp0. 24577# 24578end_bstr: 24579 movm.l (%sp)+,&0xff # {%d0-%d7} 24580 rts 24581 24582######################################################################### 24583# XDEF **************************************************************** # 24584# facc_in_b(): dmem_read_byte failed # 24585# facc_in_w(): dmem_read_word failed # 24586# facc_in_l(): dmem_read_long failed # 24587# facc_in_d(): dmem_read of dbl prec failed # 24588# facc_in_x(): dmem_read of ext prec failed # 24589# # 24590# facc_out_b(): dmem_write_byte failed # 24591# facc_out_w(): dmem_write_word failed # 24592# facc_out_l(): dmem_write_long failed # 24593# facc_out_d(): dmem_write of dbl prec failed # 24594# facc_out_x(): dmem_write of ext prec failed # 24595# # 24596# XREF **************************************************************** # 24597# _real_access() - exit through access error handler # 24598# # 24599# INPUT *************************************************************** # 24600# None # 24601# # 24602# OUTPUT ************************************************************** # 24603# None # 24604# # 24605# ALGORITHM *********************************************************** # 24606# Flow jumps here when an FP data fetch call gets an error # 24607# result. This means the operating system wants an access error frame # 24608# made out of the current exception stack frame. # 24609# So, we first call restore() which makes sure that any updated # 24610# -(an)+ register gets returned to its pre-exception value and then # 24611# we change the stack to an access error stack frame. # 24612# # 24613######################################################################### 24614 24615facc_in_b: 24616 movq.l &0x1,%d0 # one byte 24617 bsr.w restore # fix An 24618 24619 mov.w &0x0121,EXC_VOFF(%a6) # set FSLW 24620 bra.w facc_finish 24621 24622facc_in_w: 24623 movq.l &0x2,%d0 # two bytes 24624 bsr.w restore # fix An 24625 24626 mov.w &0x0141,EXC_VOFF(%a6) # set FSLW 24627 bra.b facc_finish 24628 24629facc_in_l: 24630 movq.l &0x4,%d0 # four bytes 24631 bsr.w restore # fix An 24632 24633 mov.w &0x0101,EXC_VOFF(%a6) # set FSLW 24634 bra.b facc_finish 24635 24636facc_in_d: 24637 movq.l &0x8,%d0 # eight bytes 24638 bsr.w restore # fix An 24639 24640 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW 24641 bra.b facc_finish 24642 24643facc_in_x: 24644 movq.l &0xc,%d0 # twelve bytes 24645 bsr.w restore # fix An 24646 24647 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW 24648 bra.b facc_finish 24649 24650################################################################ 24651 24652facc_out_b: 24653 movq.l &0x1,%d0 # one byte 24654 bsr.w restore # restore An 24655 24656 mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW 24657 bra.b facc_finish 24658 24659facc_out_w: 24660 movq.l &0x2,%d0 # two bytes 24661 bsr.w restore # restore An 24662 24663 mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW 24664 bra.b facc_finish 24665 24666facc_out_l: 24667 movq.l &0x4,%d0 # four bytes 24668 bsr.w restore # restore An 24669 24670 mov.w &0x0081,EXC_VOFF(%a6) # set FSLW 24671 bra.b facc_finish 24672 24673facc_out_d: 24674 movq.l &0x8,%d0 # eight bytes 24675 bsr.w restore # restore An 24676 24677 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW 24678 bra.b facc_finish 24679 24680facc_out_x: 24681 mov.l &0xc,%d0 # twelve bytes 24682 bsr.w restore # restore An 24683 24684 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW 24685 24686# here's where we actually create the access error frame from the 24687# current exception stack frame. 24688facc_finish: 24689 mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC 24690 24691 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 24692 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 24693 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 24694 24695 unlk %a6 24696 24697 mov.l (%sp),-(%sp) # store SR, hi(PC) 24698 mov.l 0x8(%sp),0x4(%sp) # store lo(PC) 24699 mov.l 0xc(%sp),0x8(%sp) # store EA 24700 mov.l &0x00000001,0xc(%sp) # store FSLW 24701 mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size) 24702 mov.w &0x4008,0x6(%sp) # store voff 24703 24704 btst &0x5,(%sp) # supervisor or user mode? 24705 beq.b facc_out2 # user 24706 bset &0x2,0xd(%sp) # set supervisor TM bit 24707 24708facc_out2: 24709 bra.l _real_access 24710 24711################################################################## 24712 24713# if the effective addressing mode was predecrement or postincrement, 24714# the emulation has already changed its value to the correct post- 24715# instruction value. but since we're exiting to the access error 24716# handler, then AN must be returned to its pre-instruction value. 24717# we do that here. 24718restore: 24719 mov.b EXC_OPWORD+0x1(%a6),%d1 24720 andi.b &0x38,%d1 # extract opmode 24721 cmpi.b %d1,&0x18 # postinc? 24722 beq.w rest_inc 24723 cmpi.b %d1,&0x20 # predec? 24724 beq.w rest_dec 24725 rts 24726 24727rest_inc: 24728 mov.b EXC_OPWORD+0x1(%a6),%d1 24729 andi.w &0x0007,%d1 # fetch An 24730 24731 mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1 24732 jmp (tbl_rest_inc.b,%pc,%d1.w*1) 24733 24734tbl_rest_inc: 24735 short ri_a0 - tbl_rest_inc 24736 short ri_a1 - tbl_rest_inc 24737 short ri_a2 - tbl_rest_inc 24738 short ri_a3 - tbl_rest_inc 24739 short ri_a4 - tbl_rest_inc 24740 short ri_a5 - tbl_rest_inc 24741 short ri_a6 - tbl_rest_inc 24742 short ri_a7 - tbl_rest_inc 24743 24744ri_a0: 24745 sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0 24746 rts 24747ri_a1: 24748 sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1 24749 rts 24750ri_a2: 24751 sub.l %d0,%a2 # fix a2 24752 rts 24753ri_a3: 24754 sub.l %d0,%a3 # fix a3 24755 rts 24756ri_a4: 24757 sub.l %d0,%a4 # fix a4 24758 rts 24759ri_a5: 24760 sub.l %d0,%a5 # fix a5 24761 rts 24762ri_a6: 24763 sub.l %d0,(%a6) # fix stacked a6 24764 rts 24765# if it's a fmove out instruction, we don't have to fix a7 24766# because we hadn't changed it yet. if it's an opclass two 24767# instruction (data moved in) and the exception was in supervisor 24768# mode, then also also wasn't updated. if it was user mode, then 24769# restore the correct a7 which is in the USP currently. 24770ri_a7: 24771 cmpi.b EXC_VOFF(%a6),&0x30 # move in or out? 24772 bne.b ri_a7_done # out 24773 24774 btst &0x5,EXC_SR(%a6) # user or supervisor? 24775 bne.b ri_a7_done # supervisor 24776 movc %usp,%a0 # restore USP 24777 sub.l %d0,%a0 24778 movc %a0,%usp 24779ri_a7_done: 24780 rts 24781 24782# need to invert adjustment value if the <ea> was predec 24783rest_dec: 24784 neg.l %d0 24785 bra.b rest_inc