pfpsp.S (462530B)
1~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP 3M68000 Hi-Performance Microprocessor Division 4M68060 Software Package 5Production Release P1.00 -- October 10, 1994 6 7M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. 8 9THE SOFTWARE is provided on an "AS IS" basis and without warranty. 10To the maximum extent permitted by applicable law, 11MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, 12INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE 13and any warranty against infringement with regard to the SOFTWARE 14(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. 15 16To the maximum extent permitted by applicable law, 17IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER 18(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, 19BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) 20ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. 21Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. 22 23You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE 24so long as this entire notice is retained without alteration in any modified and/or 25redistributed versions, and that such modified versions are clearly identified as such. 26No licenses are granted by implication, estoppel or otherwise under any patents 27or trademarks of Motorola, Inc. 28~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29# freal.s: 30# This file is appended to the top of the 060FPSP package 31# and contains the entry points into the package. The user, in 32# effect, branches to one of the branch table entries located 33# after _060FPSP_TABLE. 34# Also, subroutine stubs exist in this file (_fpsp_done for 35# example) that are referenced by the FPSP package itself in order 36# to call a given routine. The stub routine actually performs the 37# callout. The FPSP code does a "bsr" to the stub routine. This 38# extra layer of hierarchy adds a slight performance penalty but 39# it makes the FPSP code easier to read and more mainatinable. 40# 41 42set _off_bsun, 0x00 43set _off_snan, 0x04 44set _off_operr, 0x08 45set _off_ovfl, 0x0c 46set _off_unfl, 0x10 47set _off_dz, 0x14 48set _off_inex, 0x18 49set _off_fline, 0x1c 50set _off_fpu_dis, 0x20 51set _off_trap, 0x24 52set _off_trace, 0x28 53set _off_access, 0x2c 54set _off_done, 0x30 55 56set _off_imr, 0x40 57set _off_dmr, 0x44 58set _off_dmw, 0x48 59set _off_irw, 0x4c 60set _off_irl, 0x50 61set _off_drb, 0x54 62set _off_drw, 0x58 63set _off_drl, 0x5c 64set _off_dwb, 0x60 65set _off_dww, 0x64 66set _off_dwl, 0x68 67 68_060FPSP_TABLE: 69 70############################################################### 71 72# Here's the table of ENTRY POINTS for those linking the package. 73 bra.l _fpsp_snan 74 short 0x0000 75 bra.l _fpsp_operr 76 short 0x0000 77 bra.l _fpsp_ovfl 78 short 0x0000 79 bra.l _fpsp_unfl 80 short 0x0000 81 bra.l _fpsp_dz 82 short 0x0000 83 bra.l _fpsp_inex 84 short 0x0000 85 bra.l _fpsp_fline 86 short 0x0000 87 bra.l _fpsp_unsupp 88 short 0x0000 89 bra.l _fpsp_effadd 90 short 0x0000 91 92 space 56 93 94############################################################### 95 global _fpsp_done 96_fpsp_done: 97 mov.l %d0,-(%sp) 98 mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0 99 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 100 mov.l 0x4(%sp),%d0 101 rtd &0x4 102 103 global _real_ovfl 104_real_ovfl: 105 mov.l %d0,-(%sp) 106 mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0 107 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 108 mov.l 0x4(%sp),%d0 109 rtd &0x4 110 111 global _real_unfl 112_real_unfl: 113 mov.l %d0,-(%sp) 114 mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0 115 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 116 mov.l 0x4(%sp),%d0 117 rtd &0x4 118 119 global _real_inex 120_real_inex: 121 mov.l %d0,-(%sp) 122 mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0 123 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 124 mov.l 0x4(%sp),%d0 125 rtd &0x4 126 127 global _real_bsun 128_real_bsun: 129 mov.l %d0,-(%sp) 130 mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0 131 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 132 mov.l 0x4(%sp),%d0 133 rtd &0x4 134 135 global _real_operr 136_real_operr: 137 mov.l %d0,-(%sp) 138 mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0 139 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 140 mov.l 0x4(%sp),%d0 141 rtd &0x4 142 143 global _real_snan 144_real_snan: 145 mov.l %d0,-(%sp) 146 mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0 147 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 148 mov.l 0x4(%sp),%d0 149 rtd &0x4 150 151 global _real_dz 152_real_dz: 153 mov.l %d0,-(%sp) 154 mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0 155 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 156 mov.l 0x4(%sp),%d0 157 rtd &0x4 158 159 global _real_fline 160_real_fline: 161 mov.l %d0,-(%sp) 162 mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0 163 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 164 mov.l 0x4(%sp),%d0 165 rtd &0x4 166 167 global _real_fpu_disabled 168_real_fpu_disabled: 169 mov.l %d0,-(%sp) 170 mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0 171 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 172 mov.l 0x4(%sp),%d0 173 rtd &0x4 174 175 global _real_trap 176_real_trap: 177 mov.l %d0,-(%sp) 178 mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0 179 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 180 mov.l 0x4(%sp),%d0 181 rtd &0x4 182 183 global _real_trace 184_real_trace: 185 mov.l %d0,-(%sp) 186 mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0 187 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 188 mov.l 0x4(%sp),%d0 189 rtd &0x4 190 191 global _real_access 192_real_access: 193 mov.l %d0,-(%sp) 194 mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0 195 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 196 mov.l 0x4(%sp),%d0 197 rtd &0x4 198 199####################################### 200 201 global _imem_read 202_imem_read: 203 mov.l %d0,-(%sp) 204 mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0 205 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 206 mov.l 0x4(%sp),%d0 207 rtd &0x4 208 209 global _dmem_read 210_dmem_read: 211 mov.l %d0,-(%sp) 212 mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0 213 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 214 mov.l 0x4(%sp),%d0 215 rtd &0x4 216 217 global _dmem_write 218_dmem_write: 219 mov.l %d0,-(%sp) 220 mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0 221 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 222 mov.l 0x4(%sp),%d0 223 rtd &0x4 224 225 global _imem_read_word 226_imem_read_word: 227 mov.l %d0,-(%sp) 228 mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0 229 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 230 mov.l 0x4(%sp),%d0 231 rtd &0x4 232 233 global _imem_read_long 234_imem_read_long: 235 mov.l %d0,-(%sp) 236 mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0 237 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 238 mov.l 0x4(%sp),%d0 239 rtd &0x4 240 241 global _dmem_read_byte 242_dmem_read_byte: 243 mov.l %d0,-(%sp) 244 mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0 245 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 246 mov.l 0x4(%sp),%d0 247 rtd &0x4 248 249 global _dmem_read_word 250_dmem_read_word: 251 mov.l %d0,-(%sp) 252 mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0 253 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 254 mov.l 0x4(%sp),%d0 255 rtd &0x4 256 257 global _dmem_read_long 258_dmem_read_long: 259 mov.l %d0,-(%sp) 260 mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0 261 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 262 mov.l 0x4(%sp),%d0 263 rtd &0x4 264 265 global _dmem_write_byte 266_dmem_write_byte: 267 mov.l %d0,-(%sp) 268 mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0 269 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 270 mov.l 0x4(%sp),%d0 271 rtd &0x4 272 273 global _dmem_write_word 274_dmem_write_word: 275 mov.l %d0,-(%sp) 276 mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0 277 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 278 mov.l 0x4(%sp),%d0 279 rtd &0x4 280 281 global _dmem_write_long 282_dmem_write_long: 283 mov.l %d0,-(%sp) 284 mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0 285 pea.l (_060FPSP_TABLE-0x80,%pc,%d0) 286 mov.l 0x4(%sp),%d0 287 rtd &0x4 288 289# 290# This file contains a set of define statements for constants 291# in order to promote readability within the corecode itself. 292# 293 294set LOCAL_SIZE, 192 # stack frame size(bytes) 295set LV, -LOCAL_SIZE # stack offset 296 297set EXC_SR, 0x4 # stack status register 298set EXC_PC, 0x6 # stack pc 299set EXC_VOFF, 0xa # stacked vector offset 300set EXC_EA, 0xc # stacked <ea> 301 302set EXC_FP, 0x0 # frame pointer 303 304set EXC_AREGS, -68 # offset of all address regs 305set EXC_DREGS, -100 # offset of all data regs 306set EXC_FPREGS, -36 # offset of all fp regs 307 308set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7 309set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7 310set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6 311set EXC_A5, EXC_AREGS+(5*4) 312set EXC_A4, EXC_AREGS+(4*4) 313set EXC_A3, EXC_AREGS+(3*4) 314set EXC_A2, EXC_AREGS+(2*4) 315set EXC_A1, EXC_AREGS+(1*4) 316set EXC_A0, EXC_AREGS+(0*4) 317set EXC_D7, EXC_DREGS+(7*4) 318set EXC_D6, EXC_DREGS+(6*4) 319set EXC_D5, EXC_DREGS+(5*4) 320set EXC_D4, EXC_DREGS+(4*4) 321set EXC_D3, EXC_DREGS+(3*4) 322set EXC_D2, EXC_DREGS+(2*4) 323set EXC_D1, EXC_DREGS+(1*4) 324set EXC_D0, EXC_DREGS+(0*4) 325 326set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0 327set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1 328set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used) 329 330set FP_SCR1, LV+80 # fp scratch 1 331set FP_SCR1_EX, FP_SCR1+0 332set FP_SCR1_SGN, FP_SCR1+2 333set FP_SCR1_HI, FP_SCR1+4 334set FP_SCR1_LO, FP_SCR1+8 335 336set FP_SCR0, LV+68 # fp scratch 0 337set FP_SCR0_EX, FP_SCR0+0 338set FP_SCR0_SGN, FP_SCR0+2 339set FP_SCR0_HI, FP_SCR0+4 340set FP_SCR0_LO, FP_SCR0+8 341 342set FP_DST, LV+56 # fp destination operand 343set FP_DST_EX, FP_DST+0 344set FP_DST_SGN, FP_DST+2 345set FP_DST_HI, FP_DST+4 346set FP_DST_LO, FP_DST+8 347 348set FP_SRC, LV+44 # fp source operand 349set FP_SRC_EX, FP_SRC+0 350set FP_SRC_SGN, FP_SRC+2 351set FP_SRC_HI, FP_SRC+4 352set FP_SRC_LO, FP_SRC+8 353 354set USER_FPIAR, LV+40 # FP instr address register 355 356set USER_FPSR, LV+36 # FP status register 357set FPSR_CC, USER_FPSR+0 # FPSR condition codes 358set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte 359set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte 360set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte 361 362set USER_FPCR, LV+32 # FP control register 363set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable 364set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control 365 366set L_SCR3, LV+28 # integer scratch 3 367set L_SCR2, LV+24 # integer scratch 2 368set L_SCR1, LV+20 # integer scratch 1 369 370set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst) 371 372set EXC_TEMP2, LV+24 # temporary space 373set EXC_TEMP, LV+16 # temporary space 374 375set DTAG, LV+15 # destination operand type 376set STAG, LV+14 # source operand type 377 378set SPCOND_FLG, LV+10 # flag: special case (see below) 379 380set EXC_CC, LV+8 # saved condition codes 381set EXC_EXTWPTR, LV+4 # saved current PC (active) 382set EXC_EXTWORD, LV+2 # saved extension word 383set EXC_CMDREG, LV+2 # saved extension word 384set EXC_OPWORD, LV+0 # saved operation word 385 386################################ 387 388# Helpful macros 389 390set FTEMP, 0 # offsets within an 391set FTEMP_EX, 0 # extended precision 392set FTEMP_SGN, 2 # value saved in memory. 393set FTEMP_HI, 4 394set FTEMP_LO, 8 395set FTEMP_GRS, 12 396 397set LOCAL, 0 # offsets within an 398set LOCAL_EX, 0 # extended precision 399set LOCAL_SGN, 2 # value saved in memory. 400set LOCAL_HI, 4 401set LOCAL_LO, 8 402set LOCAL_GRS, 12 403 404set DST, 0 # offsets within an 405set DST_EX, 0 # extended precision 406set DST_HI, 4 # value saved in memory. 407set DST_LO, 8 408 409set SRC, 0 # offsets within an 410set SRC_EX, 0 # extended precision 411set SRC_HI, 4 # value saved in memory. 412set SRC_LO, 8 413 414set SGL_LO, 0x3f81 # min sgl prec exponent 415set SGL_HI, 0x407e # max sgl prec exponent 416set DBL_LO, 0x3c01 # min dbl prec exponent 417set DBL_HI, 0x43fe # max dbl prec exponent 418set EXT_LO, 0x0 # min ext prec exponent 419set EXT_HI, 0x7ffe # max ext prec exponent 420 421set EXT_BIAS, 0x3fff # extended precision bias 422set SGL_BIAS, 0x007f # single precision bias 423set DBL_BIAS, 0x03ff # double precision bias 424 425set NORM, 0x00 # operand type for STAG/DTAG 426set ZERO, 0x01 # operand type for STAG/DTAG 427set INF, 0x02 # operand type for STAG/DTAG 428set QNAN, 0x03 # operand type for STAG/DTAG 429set DENORM, 0x04 # operand type for STAG/DTAG 430set SNAN, 0x05 # operand type for STAG/DTAG 431set UNNORM, 0x06 # operand type for STAG/DTAG 432 433################## 434# FPSR/FPCR bits # 435################## 436set neg_bit, 0x3 # negative result 437set z_bit, 0x2 # zero result 438set inf_bit, 0x1 # infinite result 439set nan_bit, 0x0 # NAN result 440 441set q_sn_bit, 0x7 # sign bit of quotient byte 442 443set bsun_bit, 7 # branch on unordered 444set snan_bit, 6 # signalling NAN 445set operr_bit, 5 # operand error 446set ovfl_bit, 4 # overflow 447set unfl_bit, 3 # underflow 448set dz_bit, 2 # divide by zero 449set inex2_bit, 1 # inexact result 2 450set inex1_bit, 0 # inexact result 1 451 452set aiop_bit, 7 # accrued inexact operation bit 453set aovfl_bit, 6 # accrued overflow bit 454set aunfl_bit, 5 # accrued underflow bit 455set adz_bit, 4 # accrued dz bit 456set ainex_bit, 3 # accrued inexact bit 457 458############################# 459# FPSR individual bit masks # 460############################# 461set neg_mask, 0x08000000 # negative bit mask (lw) 462set inf_mask, 0x02000000 # infinity bit mask (lw) 463set z_mask, 0x04000000 # zero bit mask (lw) 464set nan_mask, 0x01000000 # nan bit mask (lw) 465 466set neg_bmask, 0x08 # negative bit mask (byte) 467set inf_bmask, 0x02 # infinity bit mask (byte) 468set z_bmask, 0x04 # zero bit mask (byte) 469set nan_bmask, 0x01 # nan bit mask (byte) 470 471set bsun_mask, 0x00008000 # bsun exception mask 472set snan_mask, 0x00004000 # snan exception mask 473set operr_mask, 0x00002000 # operr exception mask 474set ovfl_mask, 0x00001000 # overflow exception mask 475set unfl_mask, 0x00000800 # underflow exception mask 476set dz_mask, 0x00000400 # dz exception mask 477set inex2_mask, 0x00000200 # inex2 exception mask 478set inex1_mask, 0x00000100 # inex1 exception mask 479 480set aiop_mask, 0x00000080 # accrued illegal operation 481set aovfl_mask, 0x00000040 # accrued overflow 482set aunfl_mask, 0x00000020 # accrued underflow 483set adz_mask, 0x00000010 # accrued divide by zero 484set ainex_mask, 0x00000008 # accrued inexact 485 486###################################### 487# FPSR combinations used in the FPSP # 488###################################### 489set dzinf_mask, inf_mask+dz_mask+adz_mask 490set opnan_mask, nan_mask+operr_mask+aiop_mask 491set nzi_mask, 0x01ffffff #clears N, Z, and I 492set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask 493set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask 494set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask 495set inx1a_mask, inex1_mask+ainex_mask 496set inx2a_mask, inex2_mask+ainex_mask 497set snaniop_mask, nan_mask+snan_mask+aiop_mask 498set snaniop2_mask, snan_mask+aiop_mask 499set naniop_mask, nan_mask+aiop_mask 500set neginf_mask, neg_mask+inf_mask 501set infaiop_mask, inf_mask+aiop_mask 502set negz_mask, neg_mask+z_mask 503set opaop_mask, operr_mask+aiop_mask 504set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask 505set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask 506 507######### 508# misc. # 509######### 510set rnd_stky_bit, 29 # stky bit pos in longword 511 512set sign_bit, 0x7 # sign bit 513set signan_bit, 0x6 # signalling nan bit 514 515set sgl_thresh, 0x3f81 # minimum sgl exponent 516set dbl_thresh, 0x3c01 # minimum dbl exponent 517 518set x_mode, 0x0 # extended precision 519set s_mode, 0x4 # single precision 520set d_mode, 0x8 # double precision 521 522set rn_mode, 0x0 # round-to-nearest 523set rz_mode, 0x1 # round-to-zero 524set rm_mode, 0x2 # round-tp-minus-infinity 525set rp_mode, 0x3 # round-to-plus-infinity 526 527set mantissalen, 64 # length of mantissa in bits 528 529set BYTE, 1 # len(byte) == 1 byte 530set WORD, 2 # len(word) == 2 bytes 531set LONG, 4 # len(longword) == 2 bytes 532 533set BSUN_VEC, 0xc0 # bsun vector offset 534set INEX_VEC, 0xc4 # inexact vector offset 535set DZ_VEC, 0xc8 # dz vector offset 536set UNFL_VEC, 0xcc # unfl vector offset 537set OPERR_VEC, 0xd0 # operr vector offset 538set OVFL_VEC, 0xd4 # ovfl vector offset 539set SNAN_VEC, 0xd8 # snan vector offset 540 541########################### 542# SPecial CONDition FLaGs # 543########################### 544set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception 545set fbsun_flg, 0x02 # flag bit: bsun exception 546set mia7_flg, 0x04 # flag bit: (a7)+ <ea> 547set mda7_flg, 0x08 # flag bit: -(a7) <ea> 548set fmovm_flg, 0x40 # flag bit: fmovm instruction 549set immed_flg, 0x80 # flag bit: &<data> <ea> 550 551set ftrapcc_bit, 0x0 552set fbsun_bit, 0x1 553set mia7_bit, 0x2 554set mda7_bit, 0x3 555set immed_bit, 0x7 556 557################################## 558# TRANSCENDENTAL "LAST-OP" FLAGS # 559################################## 560set FMUL_OP, 0x0 # fmul instr performed last 561set FDIV_OP, 0x1 # fdiv performed last 562set FADD_OP, 0x2 # fadd performed last 563set FMOV_OP, 0x3 # fmov performed last 564 565############# 566# CONSTANTS # 567############# 568T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD 569T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL 570 571PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000 572PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 573 574TWOBYPI: 575 long 0x3FE45F30,0x6DC9C883 576 577######################################################################### 578# XDEF **************************************************************** # 579# _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. # 580# # 581# This handler should be the first code executed upon taking the # 582# FP Overflow exception in an operating system. # 583# # 584# XREF **************************************************************** # 585# _imem_read_long() - read instruction longword # 586# fix_skewed_ops() - adjust src operand in fsave frame # 587# set_tag_x() - determine optype of src/dst operands # 588# store_fpreg() - store opclass 0 or 2 result to FP regfile # 589# unnorm_fix() - change UNNORM operands to NORM or ZERO # 590# load_fpn2() - load dst operand from FP regfile # 591# fout() - emulate an opclass 3 instruction # 592# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 593# _fpsp_done() - "callout" for 060FPSP exit (all work done!) # 594# _real_ovfl() - "callout" for Overflow exception enabled code # 595# _real_inex() - "callout" for Inexact exception enabled code # 596# _real_trace() - "callout" for Trace exception code # 597# # 598# INPUT *************************************************************** # 599# - The system stack contains the FP Ovfl exception stack frame # 600# - The fsave frame contains the source operand # 601# # 602# OUTPUT ************************************************************** # 603# Overflow Exception enabled: # 604# - The system stack is unchanged # 605# - The fsave frame contains the adjusted src op for opclass 0,2 # 606# Overflow Exception disabled: # 607# - The system stack is unchanged # 608# - The "exception present" flag in the fsave frame is cleared # 609# # 610# ALGORITHM *********************************************************** # 611# On the 060, if an FP overflow is present as the result of any # 612# instruction, the 060 will take an overflow exception whether the # 613# exception is enabled or disabled in the FPCR. For the disabled case, # 614# This handler emulates the instruction to determine what the correct # 615# default result should be for the operation. This default result is # 616# then stored in either the FP regfile, data regfile, or memory. # 617# Finally, the handler exits through the "callout" _fpsp_done() # 618# denoting that no exceptional conditions exist within the machine. # 619# If the exception is enabled, then this handler must create the # 620# exceptional operand and plave it in the fsave state frame, and store # 621# the default result (only if the instruction is opclass 3). For # 622# exceptions enabled, this handler must exit through the "callout" # 623# _real_ovfl() so that the operating system enabled overflow handler # 624# can handle this case. # 625# Two other conditions exist. First, if overflow was disabled # 626# but the inexact exception was enabled, this handler must exit # 627# through the "callout" _real_inex() regardless of whether the result # 628# was inexact. # 629# Also, in the case of an opclass three instruction where # 630# overflow was disabled and the trace exception was enabled, this # 631# handler must exit through the "callout" _real_trace(). # 632# # 633######################################################################### 634 635 global _fpsp_ovfl 636_fpsp_ovfl: 637 638#$# sub.l &24,%sp # make room for src/dst 639 640 link.w %a6,&-LOCAL_SIZE # init stack frame 641 642 fsave FP_SRC(%a6) # grab the "busy" frame 643 644 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 645 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 646 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 647 648# the FPIAR holds the "current PC" of the faulting instruction 649 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 650 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 651 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 652 bsr.l _imem_read_long # fetch the instruction words 653 mov.l %d0,EXC_OPWORD(%a6) 654 655############################################################################## 656 657 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out? 658 bne.w fovfl_out 659 660 661 lea FP_SRC(%a6),%a0 # pass: ptr to src op 662 bsr.l fix_skewed_ops # fix src op 663 664# since, I believe, only NORMs and DENORMs can come through here, 665# maybe we can avoid the subroutine call. 666 lea FP_SRC(%a6),%a0 # pass: ptr to src op 667 bsr.l set_tag_x # tag the operand type 668 mov.b %d0,STAG(%a6) # maybe NORM,DENORM 669 670# bit five of the fp extension word separates the monadic and dyadic operations 671# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos 672# will never take this exception. 673 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 674 beq.b fovfl_extract # monadic 675 676 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 677 bsr.l load_fpn2 # load dst into FP_DST 678 679 lea FP_DST(%a6),%a0 # pass: ptr to dst op 680 bsr.l set_tag_x # tag the operand type 681 cmpi.b %d0,&UNNORM # is operand an UNNORM? 682 bne.b fovfl_op2_done # no 683 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 684fovfl_op2_done: 685 mov.b %d0,DTAG(%a6) # save dst optype tag 686 687fovfl_extract: 688 689#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 690#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 691#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 692#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6) 693#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6) 694#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6) 695 696 clr.l %d0 697 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 698 699 mov.b 1+EXC_CMDREG(%a6),%d1 700 andi.w &0x007f,%d1 # extract extension 701 702 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field 703 704 fmov.l &0x0,%fpcr # zero current control regs 705 fmov.l &0x0,%fpsr 706 707 lea FP_SRC(%a6),%a0 708 lea FP_DST(%a6),%a1 709 710# maybe we can make these entry points ONLY the OVFL entry points of each routine. 711 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 712 jsr (tbl_unsupp.l,%pc,%d1.l*1) 713 714# the operation has been emulated. the result is in fp0. 715# the EXOP, if an exception occurred, is in fp1. 716# we must save the default result regardless of whether 717# traps are enabled or disabled. 718 bfextu EXC_CMDREG(%a6){&6:&3},%d0 719 bsr.l store_fpreg 720 721# the exceptional possibilities we have left ourselves with are ONLY overflow 722# and inexact. and, the inexact is such that overflow occurred and was disabled 723# but inexact was enabled. 724 btst &ovfl_bit,FPCR_ENABLE(%a6) 725 bne.b fovfl_ovfl_on 726 727 btst &inex2_bit,FPCR_ENABLE(%a6) 728 bne.b fovfl_inex_on 729 730 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 731 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 732 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 733 734 unlk %a6 735#$# add.l &24,%sp 736 bra.l _fpsp_done 737 738# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP 739# in fp1. now, simply jump to _real_ovfl()! 740fovfl_ovfl_on: 741 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 742 743 mov.w &0xe005,2+FP_SRC(%a6) # save exc status 744 745 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 746 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 747 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 748 749 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 750 751 unlk %a6 752 753 bra.l _real_ovfl 754 755# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, 756# we must jump to real_inex(). 757fovfl_inex_on: 758 759 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 760 761 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4 762 mov.w &0xe001,2+FP_SRC(%a6) # save exc status 763 764 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 765 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 766 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 767 768 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 769 770 unlk %a6 771 772 bra.l _real_inex 773 774######################################################################## 775fovfl_out: 776 777 778#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 779#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 780#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 781 782# the src operand is definitely a NORM(!), so tag it as such 783 mov.b &NORM,STAG(%a6) # set src optype tag 784 785 clr.l %d0 786 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 787 788 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field 789 790 fmov.l &0x0,%fpcr # zero current control regs 791 fmov.l &0x0,%fpsr 792 793 lea FP_SRC(%a6),%a0 # pass ptr to src operand 794 795 bsr.l fout 796 797 btst &ovfl_bit,FPCR_ENABLE(%a6) 798 bne.w fovfl_ovfl_on 799 800 btst &inex2_bit,FPCR_ENABLE(%a6) 801 bne.w fovfl_inex_on 802 803 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 804 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 805 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 806 807 unlk %a6 808#$# add.l &24,%sp 809 810 btst &0x7,(%sp) # is trace on? 811 beq.l _fpsp_done # no 812 813 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 814 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 815 bra.l _real_trace 816 817######################################################################### 818# XDEF **************************************************************** # 819# _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. # 820# # 821# This handler should be the first code executed upon taking the # 822# FP Underflow exception in an operating system. # 823# # 824# XREF **************************************************************** # 825# _imem_read_long() - read instruction longword # 826# fix_skewed_ops() - adjust src operand in fsave frame # 827# set_tag_x() - determine optype of src/dst operands # 828# store_fpreg() - store opclass 0 or 2 result to FP regfile # 829# unnorm_fix() - change UNNORM operands to NORM or ZERO # 830# load_fpn2() - load dst operand from FP regfile # 831# fout() - emulate an opclass 3 instruction # 832# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 833# _fpsp_done() - "callout" for 060FPSP exit (all work done!) # 834# _real_ovfl() - "callout" for Overflow exception enabled code # 835# _real_inex() - "callout" for Inexact exception enabled code # 836# _real_trace() - "callout" for Trace exception code # 837# # 838# INPUT *************************************************************** # 839# - The system stack contains the FP Unfl exception stack frame # 840# - The fsave frame contains the source operand # 841# # 842# OUTPUT ************************************************************** # 843# Underflow Exception enabled: # 844# - The system stack is unchanged # 845# - The fsave frame contains the adjusted src op for opclass 0,2 # 846# Underflow Exception disabled: # 847# - The system stack is unchanged # 848# - The "exception present" flag in the fsave frame is cleared # 849# # 850# ALGORITHM *********************************************************** # 851# On the 060, if an FP underflow is present as the result of any # 852# instruction, the 060 will take an underflow exception whether the # 853# exception is enabled or disabled in the FPCR. For the disabled case, # 854# This handler emulates the instruction to determine what the correct # 855# default result should be for the operation. This default result is # 856# then stored in either the FP regfile, data regfile, or memory. # 857# Finally, the handler exits through the "callout" _fpsp_done() # 858# denoting that no exceptional conditions exist within the machine. # 859# If the exception is enabled, then this handler must create the # 860# exceptional operand and plave it in the fsave state frame, and store # 861# the default result (only if the instruction is opclass 3). For # 862# exceptions enabled, this handler must exit through the "callout" # 863# _real_unfl() so that the operating system enabled overflow handler # 864# can handle this case. # 865# Two other conditions exist. First, if underflow was disabled # 866# but the inexact exception was enabled and the result was inexact, # 867# this handler must exit through the "callout" _real_inex(). # 868# was inexact. # 869# Also, in the case of an opclass three instruction where # 870# underflow was disabled and the trace exception was enabled, this # 871# handler must exit through the "callout" _real_trace(). # 872# # 873######################################################################### 874 875 global _fpsp_unfl 876_fpsp_unfl: 877 878#$# sub.l &24,%sp # make room for src/dst 879 880 link.w %a6,&-LOCAL_SIZE # init stack frame 881 882 fsave FP_SRC(%a6) # grab the "busy" frame 883 884 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 885 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 886 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 887 888# the FPIAR holds the "current PC" of the faulting instruction 889 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 890 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 891 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 892 bsr.l _imem_read_long # fetch the instruction words 893 mov.l %d0,EXC_OPWORD(%a6) 894 895############################################################################## 896 897 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out? 898 bne.w funfl_out 899 900 901 lea FP_SRC(%a6),%a0 # pass: ptr to src op 902 bsr.l fix_skewed_ops # fix src op 903 904 lea FP_SRC(%a6),%a0 # pass: ptr to src op 905 bsr.l set_tag_x # tag the operand type 906 mov.b %d0,STAG(%a6) # maybe NORM,DENORM 907 908# bit five of the fp ext word separates the monadic and dyadic operations 909# that can pass through fpsp_unfl(). remember that fcmp, and ftst 910# will never take this exception. 911 btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic? 912 beq.b funfl_extract # monadic 913 914# now, what's left that's not dyadic is fsincos. we can distinguish it 915# from all dyadics by the '0110xxx pattern 916 btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos? 917 bne.b funfl_extract # yes 918 919 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 920 bsr.l load_fpn2 # load dst into FP_DST 921 922 lea FP_DST(%a6),%a0 # pass: ptr to dst op 923 bsr.l set_tag_x # tag the operand type 924 cmpi.b %d0,&UNNORM # is operand an UNNORM? 925 bne.b funfl_op2_done # no 926 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 927funfl_op2_done: 928 mov.b %d0,DTAG(%a6) # save dst optype tag 929 930funfl_extract: 931 932#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 933#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 934#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 935#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6) 936#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6) 937#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6) 938 939 clr.l %d0 940 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 941 942 mov.b 1+EXC_CMDREG(%a6),%d1 943 andi.w &0x007f,%d1 # extract extension 944 945 andi.l &0x00ff01ff,USER_FPSR(%a6) 946 947 fmov.l &0x0,%fpcr # zero current control regs 948 fmov.l &0x0,%fpsr 949 950 lea FP_SRC(%a6),%a0 951 lea FP_DST(%a6),%a1 952 953# maybe we can make these entry points ONLY the OVFL entry points of each routine. 954 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 955 jsr (tbl_unsupp.l,%pc,%d1.l*1) 956 957 bfextu EXC_CMDREG(%a6){&6:&3},%d0 958 bsr.l store_fpreg 959 960# The `060 FPU multiplier hardware is such that if the result of a 961# multiply operation is the smallest possible normalized number 962# (0x00000000_80000000_00000000), then the machine will take an 963# underflow exception. Since this is incorrect, we need to check 964# if our emulation, after re-doing the operation, decided that 965# no underflow was called for. We do these checks only in 966# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this 967# special case will simply exit gracefully with the correct result. 968 969# the exceptional possibilities we have left ourselves with are ONLY overflow 970# and inexact. and, the inexact is such that overflow occurred and was disabled 971# but inexact was enabled. 972 btst &unfl_bit,FPCR_ENABLE(%a6) 973 bne.b funfl_unfl_on 974 975funfl_chkinex: 976 btst &inex2_bit,FPCR_ENABLE(%a6) 977 bne.b funfl_inex_on 978 979funfl_exit: 980 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 981 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 982 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 983 984 unlk %a6 985#$# add.l &24,%sp 986 bra.l _fpsp_done 987 988# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP 989# in fp1 (don't forget to save fp0). what to do now? 990# well, we simply have to get to go to _real_unfl()! 991funfl_unfl_on: 992 993# The `060 FPU multiplier hardware is such that if the result of a 994# multiply operation is the smallest possible normalized number 995# (0x00000000_80000000_00000000), then the machine will take an 996# underflow exception. Since this is incorrect, we check here to see 997# if our emulation, after re-doing the operation, decided that 998# no underflow was called for. 999 btst &unfl_bit,FPSR_EXCEPT(%a6) 1000 beq.w funfl_chkinex 1001 1002funfl_unfl_on2: 1003 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack 1004 1005 mov.w &0xe003,2+FP_SRC(%a6) # save exc status 1006 1007 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1008 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1009 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1010 1011 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 1012 1013 unlk %a6 1014 1015 bra.l _real_unfl 1016 1017# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore, 1018# we must jump to real_inex(). 1019funfl_inex_on: 1020 1021# The `060 FPU multiplier hardware is such that if the result of a 1022# multiply operation is the smallest possible normalized number 1023# (0x00000000_80000000_00000000), then the machine will take an 1024# underflow exception. 1025# But, whether bogus or not, if inexact is enabled AND it occurred, 1026# then we have to branch to real_inex. 1027 1028 btst &inex2_bit,FPSR_EXCEPT(%a6) 1029 beq.w funfl_exit 1030 1031funfl_inex_on2: 1032 1033 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack 1034 1035 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4 1036 mov.w &0xe001,2+FP_SRC(%a6) # save exc status 1037 1038 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1039 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1040 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1041 1042 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s! 1043 1044 unlk %a6 1045 1046 bra.l _real_inex 1047 1048####################################################################### 1049funfl_out: 1050 1051 1052#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6) 1053#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6) 1054#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6) 1055 1056# the src operand is definitely a NORM(!), so tag it as such 1057 mov.b &NORM,STAG(%a6) # set src optype tag 1058 1059 clr.l %d0 1060 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 1061 1062 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field 1063 1064 fmov.l &0x0,%fpcr # zero current control regs 1065 fmov.l &0x0,%fpsr 1066 1067 lea FP_SRC(%a6),%a0 # pass ptr to src operand 1068 1069 bsr.l fout 1070 1071 btst &unfl_bit,FPCR_ENABLE(%a6) 1072 bne.w funfl_unfl_on2 1073 1074 btst &inex2_bit,FPCR_ENABLE(%a6) 1075 bne.w funfl_inex_on2 1076 1077 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 1078 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1079 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1080 1081 unlk %a6 1082#$# add.l &24,%sp 1083 1084 btst &0x7,(%sp) # is trace on? 1085 beq.l _fpsp_done # no 1086 1087 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 1088 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 1089 bra.l _real_trace 1090 1091######################################################################### 1092# XDEF **************************************************************** # 1093# _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented # 1094# Data Type" exception. # 1095# # 1096# This handler should be the first code executed upon taking the # 1097# FP Unimplemented Data Type exception in an operating system. # 1098# # 1099# XREF **************************************************************** # 1100# _imem_read_{word,long}() - read instruction word/longword # 1101# fix_skewed_ops() - adjust src operand in fsave frame # 1102# set_tag_x() - determine optype of src/dst operands # 1103# store_fpreg() - store opclass 0 or 2 result to FP regfile # 1104# unnorm_fix() - change UNNORM operands to NORM or ZERO # 1105# load_fpn2() - load dst operand from FP regfile # 1106# load_fpn1() - load src operand from FP regfile # 1107# fout() - emulate an opclass 3 instruction # 1108# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 1109# _real_inex() - "callout" to operating system inexact handler # 1110# _fpsp_done() - "callout" for exit; work all done # 1111# _real_trace() - "callout" for Trace enabled exception # 1112# funimp_skew() - adjust fsave src ops to "incorrect" value # 1113# _real_snan() - "callout" for SNAN exception # 1114# _real_operr() - "callout" for OPERR exception # 1115# _real_ovfl() - "callout" for OVFL exception # 1116# _real_unfl() - "callout" for UNFL exception # 1117# get_packed() - fetch packed operand from memory # 1118# # 1119# INPUT *************************************************************** # 1120# - The system stack contains the "Unimp Data Type" stk frame # 1121# - The fsave frame contains the ssrc op (for UNNORM/DENORM) # 1122# # 1123# OUTPUT ************************************************************** # 1124# If Inexact exception (opclass 3): # 1125# - The system stack is changed to an Inexact exception stk frame # 1126# If SNAN exception (opclass 3): # 1127# - The system stack is changed to an SNAN exception stk frame # 1128# If OPERR exception (opclass 3): # 1129# - The system stack is changed to an OPERR exception stk frame # 1130# If OVFL exception (opclass 3): # 1131# - The system stack is changed to an OVFL exception stk frame # 1132# If UNFL exception (opclass 3): # 1133# - The system stack is changed to an UNFL exception stack frame # 1134# If Trace exception enabled: # 1135# - The system stack is changed to a Trace exception stack frame # 1136# Else: (normal case) # 1137# - Correct result has been stored as appropriate # 1138# # 1139# ALGORITHM *********************************************************** # 1140# Two main instruction types can enter here: (1) DENORM or UNNORM # 1141# unimplemented data types. These can be either opclass 0,2 or 3 # 1142# instructions, and (2) PACKED unimplemented data format instructions # 1143# also of opclasses 0,2, or 3. # 1144# For UNNORM/DENORM opclass 0 and 2, the handler fetches the src # 1145# operand from the fsave state frame and the dst operand (if dyadic) # 1146# from the FP register file. The instruction is then emulated by # 1147# choosing an emulation routine from a table of routines indexed by # 1148# instruction type. Once the instruction has been emulated and result # 1149# saved, then we check to see if any enabled exceptions resulted from # 1150# instruction emulation. If none, then we exit through the "callout" # 1151# _fpsp_done(). If there is an enabled FP exception, then we insert # 1152# this exception into the FPU in the fsave state frame and then exit # 1153# through _fpsp_done(). # 1154# PACKED opclass 0 and 2 is similar in how the instruction is # 1155# emulated and exceptions handled. The differences occur in how the # 1156# handler loads the packed op (by calling get_packed() routine) and # 1157# by the fact that a Trace exception could be pending for PACKED ops. # 1158# If a Trace exception is pending, then the current exception stack # 1159# frame is changed to a Trace exception stack frame and an exit is # 1160# made through _real_trace(). # 1161# For UNNORM/DENORM opclass 3, the actual move out to memory is # 1162# performed by calling the routine fout(). If no exception should occur # 1163# as the result of emulation, then an exit either occurs through # 1164# _fpsp_done() or through _real_trace() if a Trace exception is pending # 1165# (a Trace stack frame must be created here, too). If an FP exception # 1166# should occur, then we must create an exception stack frame of that # 1167# type and jump to either _real_snan(), _real_operr(), _real_inex(), # 1168# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 # 1169# emulation is performed in a similar manner. # 1170# # 1171######################################################################### 1172 1173# 1174# (1) DENORM and UNNORM (unimplemented) data types: 1175# 1176# post-instruction 1177# ***************** 1178# * EA * 1179# pre-instruction * * 1180# ***************** ***************** 1181# * 0x0 * 0x0dc * * 0x3 * 0x0dc * 1182# ***************** ***************** 1183# * Next * * Next * 1184# * PC * * PC * 1185# ***************** ***************** 1186# * SR * * SR * 1187# ***************** ***************** 1188# 1189# (2) PACKED format (unsupported) opclasses two and three: 1190# ***************** 1191# * EA * 1192# * * 1193# ***************** 1194# * 0x2 * 0x0dc * 1195# ***************** 1196# * Next * 1197# * PC * 1198# ***************** 1199# * SR * 1200# ***************** 1201# 1202 global _fpsp_unsupp 1203_fpsp_unsupp: 1204 1205 link.w %a6,&-LOCAL_SIZE # init stack frame 1206 1207 fsave FP_SRC(%a6) # save fp state 1208 1209 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 1210 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 1211 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 1212 1213 btst &0x5,EXC_SR(%a6) # user or supervisor mode? 1214 bne.b fu_s 1215fu_u: 1216 mov.l %usp,%a0 # fetch user stack pointer 1217 mov.l %a0,EXC_A7(%a6) # save on stack 1218 bra.b fu_cont 1219# if the exception is an opclass zero or two unimplemented data type 1220# exception, then the a7' calculated here is wrong since it doesn't 1221# stack an ea. however, we don't need an a7' for this case anyways. 1222fu_s: 1223 lea 0x4+EXC_EA(%a6),%a0 # load old a7' 1224 mov.l %a0,EXC_A7(%a6) # save on stack 1225 1226fu_cont: 1227 1228# the FPIAR holds the "current PC" of the faulting instruction 1229# the FPIAR should be set correctly for ALL exceptions passing through 1230# this point. 1231 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 1232 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 1233 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 1234 bsr.l _imem_read_long # fetch the instruction words 1235 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 1236 1237############################ 1238 1239 clr.b SPCOND_FLG(%a6) # clear special condition flag 1240 1241# Separate opclass three (fpn-to-mem) ops since they have a different 1242# stack frame and protocol. 1243 btst &0x5,EXC_CMDREG(%a6) # is it an fmove out? 1244 bne.w fu_out # yes 1245 1246# Separate packed opclass two instructions. 1247 bfextu EXC_CMDREG(%a6){&0:&6},%d0 1248 cmpi.b %d0,&0x13 1249 beq.w fu_in_pack 1250 1251 1252# I'm not sure at this point what FPSR bits are valid for this instruction. 1253# so, since the emulation routines re-create them anyways, zero exception field 1254 andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field 1255 1256 fmov.l &0x0,%fpcr # zero current control regs 1257 fmov.l &0x0,%fpsr 1258 1259# Opclass two w/ memory-to-fpn operation will have an incorrect extended 1260# precision format if the src format was single or double and the 1261# source data type was an INF, NAN, DENORM, or UNNORM 1262 lea FP_SRC(%a6),%a0 # pass ptr to input 1263 bsr.l fix_skewed_ops 1264 1265# we don't know whether the src operand or the dst operand (or both) is the 1266# UNNORM or DENORM. call the function that tags the operand type. if the 1267# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO. 1268 lea FP_SRC(%a6),%a0 # pass: ptr to src op 1269 bsr.l set_tag_x # tag the operand type 1270 cmpi.b %d0,&UNNORM # is operand an UNNORM? 1271 bne.b fu_op2 # no 1272 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1273 1274fu_op2: 1275 mov.b %d0,STAG(%a6) # save src optype tag 1276 1277 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1278 1279# bit five of the fp extension word separates the monadic and dyadic operations 1280# at this point 1281 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 1282 beq.b fu_extract # monadic 1283 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst? 1284 beq.b fu_extract # yes, so it's monadic, too 1285 1286 bsr.l load_fpn2 # load dst into FP_DST 1287 1288 lea FP_DST(%a6),%a0 # pass: ptr to dst op 1289 bsr.l set_tag_x # tag the operand type 1290 cmpi.b %d0,&UNNORM # is operand an UNNORM? 1291 bne.b fu_op2_done # no 1292 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1293fu_op2_done: 1294 mov.b %d0,DTAG(%a6) # save dst optype tag 1295 1296fu_extract: 1297 clr.l %d0 1298 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1299 1300 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension 1301 1302 lea FP_SRC(%a6),%a0 1303 lea FP_DST(%a6),%a1 1304 1305 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr 1306 jsr (tbl_unsupp.l,%pc,%d1.l*1) 1307 1308# 1309# Exceptions in order of precedence: 1310# BSUN : none 1311# SNAN : all dyadic ops 1312# OPERR : fsqrt(-NORM) 1313# OVFL : all except ftst,fcmp 1314# UNFL : all except ftst,fcmp 1315# DZ : fdiv 1316# INEX2 : all except ftst,fcmp 1317# INEX1 : none (packed doesn't go through here) 1318# 1319 1320# we determine the highest priority exception(if any) set by the 1321# emulation routine that has also been enabled by the user. 1322 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set 1323 bne.b fu_in_ena # some are enabled 1324 1325fu_in_cont: 1326# fcmp and ftst do not store any result. 1327 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension 1328 andi.b &0x38,%d0 # extract bits 3-5 1329 cmpi.b %d0,&0x38 # is instr fcmp or ftst? 1330 beq.b fu_in_exit # yes 1331 1332 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1333 bsr.l store_fpreg # store the result 1334 1335fu_in_exit: 1336 1337 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1338 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1339 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1340 1341 unlk %a6 1342 1343 bra.l _fpsp_done 1344 1345fu_in_ena: 1346 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 1347 bfffo %d0{&24:&8},%d0 # find highest priority exception 1348 bne.b fu_in_exc # there is at least one set 1349 1350# 1351# No exceptions occurred that were also enabled. Now: 1352# 1353# if (OVFL && ovfl_disabled && inexact_enabled) { 1354# branch to _real_inex() (even if the result was exact!); 1355# } else { 1356# save the result in the proper fp reg (unless the op is fcmp or ftst); 1357# return; 1358# } 1359# 1360 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1361 beq.b fu_in_cont # no 1362 1363fu_in_ovflchk: 1364 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1365 beq.b fu_in_cont # no 1366 bra.w fu_in_exc_ovfl # go insert overflow frame 1367 1368# 1369# An exception occurred and that exception was enabled: 1370# 1371# shift enabled exception field into lo byte of d0; 1372# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) || 1373# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) { 1374# /* 1375# * this is the case where we must call _real_inex() now or else 1376# * there will be no other way to pass it the exceptional operand 1377# */ 1378# call _real_inex(); 1379# } else { 1380# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU; 1381# } 1382# 1383fu_in_exc: 1384 subi.l &24,%d0 # fix offset to be 0-8 1385 cmpi.b %d0,&0x6 # is exception INEX? (6) 1386 bne.b fu_in_exc_exit # no 1387 1388# the enabled exception was inexact 1389 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur? 1390 bne.w fu_in_exc_unfl # yes 1391 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur? 1392 bne.w fu_in_exc_ovfl # yes 1393 1394# here, we insert the correct fsave status value into the fsave frame for the 1395# corresponding exception. the operand in the fsave frame should be the original 1396# src operand. 1397fu_in_exc_exit: 1398 mov.l %d0,-(%sp) # save d0 1399 bsr.l funimp_skew # skew sgl or dbl inputs 1400 mov.l (%sp)+,%d0 # restore d0 1401 1402 mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status 1403 1404 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1405 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1406 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1407 1408 frestore FP_SRC(%a6) # restore src op 1409 1410 unlk %a6 1411 1412 bra.l _fpsp_done 1413 1414tbl_except: 1415 short 0xe000,0xe006,0xe004,0xe005 1416 short 0xe003,0xe002,0xe001,0xe001 1417 1418fu_in_exc_unfl: 1419 mov.w &0x4,%d0 1420 bra.b fu_in_exc_exit 1421fu_in_exc_ovfl: 1422 mov.w &0x03,%d0 1423 bra.b fu_in_exc_exit 1424 1425# If the input operand to this operation was opclass two and a single 1426# or double precision denorm, inf, or nan, the operand needs to be 1427# "corrected" in order to have the proper equivalent extended precision 1428# number. 1429 global fix_skewed_ops 1430fix_skewed_ops: 1431 bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt 1432 cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl? 1433 beq.b fso_sgl # yes 1434 cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl? 1435 beq.b fso_dbl # yes 1436 rts # no 1437 1438fso_sgl: 1439 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent 1440 andi.w &0x7fff,%d0 # strip sign 1441 cmpi.w %d0,&0x3f80 # is |exp| == $3f80? 1442 beq.b fso_sgl_dnrm_zero # yes 1443 cmpi.w %d0,&0x407f # no; is |exp| == $407f? 1444 beq.b fso_infnan # yes 1445 rts # no 1446 1447fso_sgl_dnrm_zero: 1448 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit 1449 beq.b fso_zero # it's a skewed zero 1450fso_sgl_dnrm: 1451# here, we count on norm not to alter a0... 1452 bsr.l norm # normalize mantissa 1453 neg.w %d0 # -shft amt 1454 addi.w &0x3f81,%d0 # adjust new exponent 1455 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent 1456 or.w %d0,LOCAL_EX(%a0) # insert new exponent 1457 rts 1458 1459fso_zero: 1460 andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent 1461 rts 1462 1463fso_infnan: 1464 andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit 1465 ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff 1466 rts 1467 1468fso_dbl: 1469 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent 1470 andi.w &0x7fff,%d0 # strip sign 1471 cmpi.w %d0,&0x3c00 # is |exp| == $3c00? 1472 beq.b fso_dbl_dnrm_zero # yes 1473 cmpi.w %d0,&0x43ff # no; is |exp| == $43ff? 1474 beq.b fso_infnan # yes 1475 rts # no 1476 1477fso_dbl_dnrm_zero: 1478 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit 1479 bne.b fso_dbl_dnrm # it's a skewed denorm 1480 tst.l LOCAL_LO(%a0) # is it a zero? 1481 beq.b fso_zero # yes 1482fso_dbl_dnrm: 1483# here, we count on norm not to alter a0... 1484 bsr.l norm # normalize mantissa 1485 neg.w %d0 # -shft amt 1486 addi.w &0x3c01,%d0 # adjust new exponent 1487 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent 1488 or.w %d0,LOCAL_EX(%a0) # insert new exponent 1489 rts 1490 1491################################################################# 1492 1493# fmove out took an unimplemented data type exception. 1494# the src operand is in FP_SRC. Call _fout() to write out the result and 1495# to determine which exceptions, if any, to take. 1496fu_out: 1497 1498# Separate packed move outs from the UNNORM and DENORM move outs. 1499 bfextu EXC_CMDREG(%a6){&3:&3},%d0 1500 cmpi.b %d0,&0x3 1501 beq.w fu_out_pack 1502 cmpi.b %d0,&0x7 1503 beq.w fu_out_pack 1504 1505 1506# I'm not sure at this point what FPSR bits are valid for this instruction. 1507# so, since the emulation routines re-create them anyways, zero exception field. 1508# fmove out doesn't affect ccodes. 1509 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 1510 1511 fmov.l &0x0,%fpcr # zero current control regs 1512 fmov.l &0x0,%fpsr 1513 1514# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine 1515# call here. just figure out what it is... 1516 mov.w FP_SRC_EX(%a6),%d0 # get exponent 1517 andi.w &0x7fff,%d0 # strip sign 1518 beq.b fu_out_denorm # it's a DENORM 1519 1520 lea FP_SRC(%a6),%a0 1521 bsr.l unnorm_fix # yes; fix it 1522 1523 mov.b %d0,STAG(%a6) 1524 1525 bra.b fu_out_cont 1526fu_out_denorm: 1527 mov.b &DENORM,STAG(%a6) 1528fu_out_cont: 1529 1530 clr.l %d0 1531 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1532 1533 lea FP_SRC(%a6),%a0 # pass ptr to src operand 1534 1535 mov.l (%a6),EXC_A6(%a6) # in case a6 changes 1536 bsr.l fout # call fmove out routine 1537 1538# Exceptions in order of precedence: 1539# BSUN : none 1540# SNAN : none 1541# OPERR : fmove.{b,w,l} out of large UNNORM 1542# OVFL : fmove.{s,d} 1543# UNFL : fmove.{s,d,x} 1544# DZ : none 1545# INEX2 : all 1546# INEX1 : none (packed doesn't travel through here) 1547 1548# determine the highest priority exception(if any) set by the 1549# emulation routine that has also been enabled by the user. 1550 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 1551 bne.w fu_out_ena # some are enabled 1552 1553fu_out_done: 1554 1555 mov.l EXC_A6(%a6),(%a6) # in case a6 changed 1556 1557# on extended precision opclass three instructions using pre-decrement or 1558# post-increment addressing mode, the address register is not updated. is the 1559# address register was the stack pointer used from user mode, then let's update 1560# it here. if it was used from supervisor mode, then we have to handle this 1561# as a special case. 1562 btst &0x5,EXC_SR(%a6) 1563 bne.b fu_out_done_s 1564 1565 mov.l EXC_A7(%a6),%a0 # restore a7 1566 mov.l %a0,%usp 1567 1568fu_out_done_cont: 1569 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1570 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1571 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1572 1573 unlk %a6 1574 1575 btst &0x7,(%sp) # is trace on? 1576 bne.b fu_out_trace # yes 1577 1578 bra.l _fpsp_done 1579 1580# is the ea mode pre-decrement of the stack pointer from supervisor mode? 1581# ("fmov.x fpm,-(a7)") if so, 1582fu_out_done_s: 1583 cmpi.b SPCOND_FLG(%a6),&mda7_flg 1584 bne.b fu_out_done_cont 1585 1586# the extended precision result is still in fp0. but, we need to save it 1587# somewhere on the stack until we can copy it to its final resting place. 1588# here, we're counting on the top of the stack to be the old place-holders 1589# for fp0/fp1 which have already been restored. that way, we can write 1590# over those destinations with the shifted stack frame. 1591 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack 1592 1593 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1594 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1595 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1596 1597 mov.l (%a6),%a6 # restore frame pointer 1598 1599 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 1600 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 1601 1602# now, copy the result to the proper place on the stack 1603 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 1604 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 1605 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 1606 1607 add.l &LOCAL_SIZE-0x8,%sp 1608 1609 btst &0x7,(%sp) 1610 bne.b fu_out_trace 1611 1612 bra.l _fpsp_done 1613 1614fu_out_ena: 1615 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 1616 bfffo %d0{&24:&8},%d0 # find highest priority exception 1617 bne.b fu_out_exc # there is at least one set 1618 1619# no exceptions were set. 1620# if a disabled overflow occurred and inexact was enabled but the result 1621# was exact, then a branch to _real_inex() is made. 1622 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1623 beq.w fu_out_done # no 1624 1625fu_out_ovflchk: 1626 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1627 beq.w fu_out_done # no 1628 bra.w fu_inex # yes 1629 1630# 1631# The fp move out that took the "Unimplemented Data Type" exception was 1632# being traced. Since the stack frames are similar, get the "current" PC 1633# from FPIAR and put it in the trace stack frame then jump to _real_trace(). 1634# 1635# UNSUPP FRAME TRACE FRAME 1636# ***************** ***************** 1637# * EA * * Current * 1638# * * * PC * 1639# ***************** ***************** 1640# * 0x3 * 0x0dc * * 0x2 * 0x024 * 1641# ***************** ***************** 1642# * Next * * Next * 1643# * PC * * PC * 1644# ***************** ***************** 1645# * SR * * SR * 1646# ***************** ***************** 1647# 1648fu_out_trace: 1649 mov.w &0x2024,0x6(%sp) 1650 fmov.l %fpiar,0x8(%sp) 1651 bra.l _real_trace 1652 1653# an exception occurred and that exception was enabled. 1654fu_out_exc: 1655 subi.l &24,%d0 # fix offset to be 0-8 1656 1657# we don't mess with the existing fsave frame. just re-insert it and 1658# jump to the "_real_{}()" handler... 1659 mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0 1660 jmp (tbl_fu_out.b,%pc,%d0.w*1) 1661 1662 swbeg &0x8 1663tbl_fu_out: 1664 short tbl_fu_out - tbl_fu_out # BSUN can't happen 1665 short tbl_fu_out - tbl_fu_out # SNAN can't happen 1666 short fu_operr - tbl_fu_out # OPERR 1667 short fu_ovfl - tbl_fu_out # OVFL 1668 short fu_unfl - tbl_fu_out # UNFL 1669 short tbl_fu_out - tbl_fu_out # DZ can't happen 1670 short fu_inex - tbl_fu_out # INEX2 1671 short tbl_fu_out - tbl_fu_out # INEX1 won't make it here 1672 1673# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just 1674# frestore it. 1675fu_snan: 1676 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1677 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1678 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1679 1680 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8 1681 mov.w &0xe006,2+FP_SRC(%a6) 1682 1683 frestore FP_SRC(%a6) 1684 1685 unlk %a6 1686 1687 1688 bra.l _real_snan 1689 1690fu_operr: 1691 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1692 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1693 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1694 1695 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0 1696 mov.w &0xe004,2+FP_SRC(%a6) 1697 1698 frestore FP_SRC(%a6) 1699 1700 unlk %a6 1701 1702 1703 bra.l _real_operr 1704 1705fu_ovfl: 1706 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1707 1708 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1709 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1710 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1711 1712 mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4 1713 mov.w &0xe005,2+FP_SRC(%a6) 1714 1715 frestore FP_SRC(%a6) # restore EXOP 1716 1717 unlk %a6 1718 1719 bra.l _real_ovfl 1720 1721# underflow can happen for extended precision. extended precision opclass 1722# three instruction exceptions don't update the stack pointer. so, if the 1723# exception occurred from user mode, then simply update a7 and exit normally. 1724# if the exception occurred from supervisor mode, check if 1725fu_unfl: 1726 mov.l EXC_A6(%a6),(%a6) # restore a6 1727 1728 btst &0x5,EXC_SR(%a6) 1729 bne.w fu_unfl_s 1730 1731 mov.l EXC_A7(%a6),%a0 # restore a7 whether we need 1732 mov.l %a0,%usp # to or not... 1733 1734fu_unfl_cont: 1735 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1736 1737 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1738 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1739 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1740 1741 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc 1742 mov.w &0xe003,2+FP_SRC(%a6) 1743 1744 frestore FP_SRC(%a6) # restore EXOP 1745 1746 unlk %a6 1747 1748 bra.l _real_unfl 1749 1750fu_unfl_s: 1751 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)? 1752 bne.b fu_unfl_cont 1753 1754# the extended precision result is still in fp0. but, we need to save it 1755# somewhere on the stack until we can copy it to its final resting place 1756# (where the exc frame is currently). make sure it's not at the top of the 1757# frame or it will get overwritten when the exc stack frame is shifted "down". 1758 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack 1759 fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack 1760 1761 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1762 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1763 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1764 1765 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc 1766 mov.w &0xe003,2+FP_DST(%a6) 1767 1768 frestore FP_DST(%a6) # restore EXOP 1769 1770 mov.l (%a6),%a6 # restore frame pointer 1771 1772 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 1773 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 1774 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 1775 1776# now, copy the result to the proper place on the stack 1777 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 1778 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 1779 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 1780 1781 add.l &LOCAL_SIZE-0x8,%sp 1782 1783 bra.l _real_unfl 1784 1785# fmove in and out enter here. 1786fu_inex: 1787 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack 1788 1789 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1790 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1791 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1792 1793 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4 1794 mov.w &0xe001,2+FP_SRC(%a6) 1795 1796 frestore FP_SRC(%a6) # restore EXOP 1797 1798 unlk %a6 1799 1800 1801 bra.l _real_inex 1802 1803######################################################################### 1804######################################################################### 1805fu_in_pack: 1806 1807 1808# I'm not sure at this point what FPSR bits are valid for this instruction. 1809# so, since the emulation routines re-create them anyways, zero exception field 1810 andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field 1811 1812 fmov.l &0x0,%fpcr # zero current control regs 1813 fmov.l &0x0,%fpsr 1814 1815 bsr.l get_packed # fetch packed src operand 1816 1817 lea FP_SRC(%a6),%a0 # pass ptr to src 1818 bsr.l set_tag_x # set src optype tag 1819 1820 mov.b %d0,STAG(%a6) # save src optype tag 1821 1822 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1823 1824# bit five of the fp extension word separates the monadic and dyadic operations 1825# at this point 1826 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 1827 beq.b fu_extract_p # monadic 1828 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst? 1829 beq.b fu_extract_p # yes, so it's monadic, too 1830 1831 bsr.l load_fpn2 # load dst into FP_DST 1832 1833 lea FP_DST(%a6),%a0 # pass: ptr to dst op 1834 bsr.l set_tag_x # tag the operand type 1835 cmpi.b %d0,&UNNORM # is operand an UNNORM? 1836 bne.b fu_op2_done_p # no 1837 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 1838fu_op2_done_p: 1839 mov.b %d0,DTAG(%a6) # save dst optype tag 1840 1841fu_extract_p: 1842 clr.l %d0 1843 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 1844 1845 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension 1846 1847 lea FP_SRC(%a6),%a0 1848 lea FP_DST(%a6),%a1 1849 1850 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr 1851 jsr (tbl_unsupp.l,%pc,%d1.l*1) 1852 1853# 1854# Exceptions in order of precedence: 1855# BSUN : none 1856# SNAN : all dyadic ops 1857# OPERR : fsqrt(-NORM) 1858# OVFL : all except ftst,fcmp 1859# UNFL : all except ftst,fcmp 1860# DZ : fdiv 1861# INEX2 : all except ftst,fcmp 1862# INEX1 : all 1863# 1864 1865# we determine the highest priority exception(if any) set by the 1866# emulation routine that has also been enabled by the user. 1867 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 1868 bne.w fu_in_ena_p # some are enabled 1869 1870fu_in_cont_p: 1871# fcmp and ftst do not store any result. 1872 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension 1873 andi.b &0x38,%d0 # extract bits 3-5 1874 cmpi.b %d0,&0x38 # is instr fcmp or ftst? 1875 beq.b fu_in_exit_p # yes 1876 1877 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 1878 bsr.l store_fpreg # store the result 1879 1880fu_in_exit_p: 1881 1882 btst &0x5,EXC_SR(%a6) # user or supervisor? 1883 bne.w fu_in_exit_s_p # supervisor 1884 1885 mov.l EXC_A7(%a6),%a0 # update user a7 1886 mov.l %a0,%usp 1887 1888fu_in_exit_cont_p: 1889 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1890 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1891 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1892 1893 unlk %a6 # unravel stack frame 1894 1895 btst &0x7,(%sp) # is trace on? 1896 bne.w fu_trace_p # yes 1897 1898 bra.l _fpsp_done # exit to os 1899 1900# the exception occurred in supervisor mode. check to see if the 1901# addressing mode was (a7)+. if so, we'll need to shift the 1902# stack frame "up". 1903fu_in_exit_s_p: 1904 btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+ 1905 beq.b fu_in_exit_cont_p # no 1906 1907 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1908 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1909 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1910 1911 unlk %a6 # unravel stack frame 1912 1913# shift the stack frame "up". we don't really care about the <ea> field. 1914 mov.l 0x4(%sp),0x10(%sp) 1915 mov.l 0x0(%sp),0xc(%sp) 1916 add.l &0xc,%sp 1917 1918 btst &0x7,(%sp) # is trace on? 1919 bne.w fu_trace_p # yes 1920 1921 bra.l _fpsp_done # exit to os 1922 1923fu_in_ena_p: 1924 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set 1925 bfffo %d0{&24:&8},%d0 # find highest priority exception 1926 bne.b fu_in_exc_p # at least one was set 1927 1928# 1929# No exceptions occurred that were also enabled. Now: 1930# 1931# if (OVFL && ovfl_disabled && inexact_enabled) { 1932# branch to _real_inex() (even if the result was exact!); 1933# } else { 1934# save the result in the proper fp reg (unless the op is fcmp or ftst); 1935# return; 1936# } 1937# 1938 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set? 1939 beq.w fu_in_cont_p # no 1940 1941fu_in_ovflchk_p: 1942 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled? 1943 beq.w fu_in_cont_p # no 1944 bra.w fu_in_exc_ovfl_p # do _real_inex() now 1945 1946# 1947# An exception occurred and that exception was enabled: 1948# 1949# shift enabled exception field into lo byte of d0; 1950# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) || 1951# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) { 1952# /* 1953# * this is the case where we must call _real_inex() now or else 1954# * there will be no other way to pass it the exceptional operand 1955# */ 1956# call _real_inex(); 1957# } else { 1958# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU; 1959# } 1960# 1961fu_in_exc_p: 1962 subi.l &24,%d0 # fix offset to be 0-8 1963 cmpi.b %d0,&0x6 # is exception INEX? (6 or 7) 1964 blt.b fu_in_exc_exit_p # no 1965 1966# the enabled exception was inexact 1967 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur? 1968 bne.w fu_in_exc_unfl_p # yes 1969 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur? 1970 bne.w fu_in_exc_ovfl_p # yes 1971 1972# here, we insert the correct fsave status value into the fsave frame for the 1973# corresponding exception. the operand in the fsave frame should be the original 1974# src operand. 1975# as a reminder for future predicted pain and agony, we are passing in fsave the 1976# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs. 1977# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!! 1978fu_in_exc_exit_p: 1979 btst &0x5,EXC_SR(%a6) # user or supervisor? 1980 bne.w fu_in_exc_exit_s_p # supervisor 1981 1982 mov.l EXC_A7(%a6),%a0 # update user a7 1983 mov.l %a0,%usp 1984 1985fu_in_exc_exit_cont_p: 1986 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6) 1987 1988 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 1989 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 1990 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 1991 1992 frestore FP_SRC(%a6) # restore src op 1993 1994 unlk %a6 1995 1996 btst &0x7,(%sp) # is trace enabled? 1997 bne.w fu_trace_p # yes 1998 1999 bra.l _fpsp_done 2000 2001tbl_except_p: 2002 short 0xe000,0xe006,0xe004,0xe005 2003 short 0xe003,0xe002,0xe001,0xe001 2004 2005fu_in_exc_ovfl_p: 2006 mov.w &0x3,%d0 2007 bra.w fu_in_exc_exit_p 2008 2009fu_in_exc_unfl_p: 2010 mov.w &0x4,%d0 2011 bra.w fu_in_exc_exit_p 2012 2013fu_in_exc_exit_s_p: 2014 btst &mia7_bit,SPCOND_FLG(%a6) 2015 beq.b fu_in_exc_exit_cont_p 2016 2017 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6) 2018 2019 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2020 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2021 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2022 2023 frestore FP_SRC(%a6) # restore src op 2024 2025 unlk %a6 # unravel stack frame 2026 2027# shift stack frame "up". who cares about <ea> field. 2028 mov.l 0x4(%sp),0x10(%sp) 2029 mov.l 0x0(%sp),0xc(%sp) 2030 add.l &0xc,%sp 2031 2032 btst &0x7,(%sp) # is trace on? 2033 bne.b fu_trace_p # yes 2034 2035 bra.l _fpsp_done # exit to os 2036 2037# 2038# The opclass two PACKED instruction that took an "Unimplemented Data Type" 2039# exception was being traced. Make the "current" PC the FPIAR and put it in the 2040# trace stack frame then jump to _real_trace(). 2041# 2042# UNSUPP FRAME TRACE FRAME 2043# ***************** ***************** 2044# * EA * * Current * 2045# * * * PC * 2046# ***************** ***************** 2047# * 0x2 * 0x0dc * * 0x2 * 0x024 * 2048# ***************** ***************** 2049# * Next * * Next * 2050# * PC * * PC * 2051# ***************** ***************** 2052# * SR * * SR * 2053# ***************** ***************** 2054fu_trace_p: 2055 mov.w &0x2024,0x6(%sp) 2056 fmov.l %fpiar,0x8(%sp) 2057 2058 bra.l _real_trace 2059 2060######################################################### 2061######################################################### 2062fu_out_pack: 2063 2064 2065# I'm not sure at this point what FPSR bits are valid for this instruction. 2066# so, since the emulation routines re-create them anyways, zero exception field. 2067# fmove out doesn't affect ccodes. 2068 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 2069 2070 fmov.l &0x0,%fpcr # zero current control regs 2071 fmov.l &0x0,%fpsr 2072 2073 bfextu EXC_CMDREG(%a6){&6:&3},%d0 2074 bsr.l load_fpn1 2075 2076# unlike other opclass 3, unimplemented data type exceptions, packed must be 2077# able to detect all operand types. 2078 lea FP_SRC(%a6),%a0 2079 bsr.l set_tag_x # tag the operand type 2080 cmpi.b %d0,&UNNORM # is operand an UNNORM? 2081 bne.b fu_op2_p # no 2082 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 2083 2084fu_op2_p: 2085 mov.b %d0,STAG(%a6) # save src optype tag 2086 2087 clr.l %d0 2088 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec 2089 2090 lea FP_SRC(%a6),%a0 # pass ptr to src operand 2091 2092 mov.l (%a6),EXC_A6(%a6) # in case a6 changes 2093 bsr.l fout # call fmove out routine 2094 2095# Exceptions in order of precedence: 2096# BSUN : no 2097# SNAN : yes 2098# OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits)) 2099# OVFL : no 2100# UNFL : no 2101# DZ : no 2102# INEX2 : yes 2103# INEX1 : no 2104 2105# determine the highest priority exception(if any) set by the 2106# emulation routine that has also been enabled by the user. 2107 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 2108 bne.w fu_out_ena_p # some are enabled 2109 2110fu_out_exit_p: 2111 mov.l EXC_A6(%a6),(%a6) # restore a6 2112 2113 btst &0x5,EXC_SR(%a6) # user or supervisor? 2114 bne.b fu_out_exit_s_p # supervisor 2115 2116 mov.l EXC_A7(%a6),%a0 # update user a7 2117 mov.l %a0,%usp 2118 2119fu_out_exit_cont_p: 2120 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2121 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2122 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2123 2124 unlk %a6 # unravel stack frame 2125 2126 btst &0x7,(%sp) # is trace on? 2127 bne.w fu_trace_p # yes 2128 2129 bra.l _fpsp_done # exit to os 2130 2131# the exception occurred in supervisor mode. check to see if the 2132# addressing mode was -(a7). if so, we'll need to shift the 2133# stack frame "down". 2134fu_out_exit_s_p: 2135 btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7) 2136 beq.b fu_out_exit_cont_p # no 2137 2138 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2139 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2140 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2141 2142 mov.l (%a6),%a6 # restore frame pointer 2143 2144 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2145 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2146 2147# now, copy the result to the proper place on the stack 2148 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp) 2149 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp) 2150 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp) 2151 2152 add.l &LOCAL_SIZE-0x8,%sp 2153 2154 btst &0x7,(%sp) 2155 bne.w fu_trace_p 2156 2157 bra.l _fpsp_done 2158 2159fu_out_ena_p: 2160 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled 2161 bfffo %d0{&24:&8},%d0 # find highest priority exception 2162 beq.w fu_out_exit_p 2163 2164 mov.l EXC_A6(%a6),(%a6) # restore a6 2165 2166# an exception occurred and that exception was enabled. 2167# the only exception possible on packed move out are INEX, OPERR, and SNAN. 2168fu_out_exc_p: 2169 cmpi.b %d0,&0x1a 2170 bgt.w fu_inex_p2 2171 beq.w fu_operr_p 2172 2173fu_snan_p: 2174 btst &0x5,EXC_SR(%a6) 2175 bne.b fu_snan_s_p 2176 2177 mov.l EXC_A7(%a6),%a0 2178 mov.l %a0,%usp 2179 bra.w fu_snan 2180 2181fu_snan_s_p: 2182 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2183 bne.w fu_snan 2184 2185# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2186# the strategy is to move the exception frame "down" 12 bytes. then, we 2187# can store the default result where the exception frame was. 2188 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2189 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2190 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2191 2192 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0 2193 mov.w &0xe006,2+FP_SRC(%a6) # set fsave status 2194 2195 frestore FP_SRC(%a6) # restore src operand 2196 2197 mov.l (%a6),%a6 # restore frame pointer 2198 2199 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2200 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2201 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2202 2203# now, we copy the default result to its proper location 2204 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2205 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2206 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2207 2208 add.l &LOCAL_SIZE-0x8,%sp 2209 2210 2211 bra.l _real_snan 2212 2213fu_operr_p: 2214 btst &0x5,EXC_SR(%a6) 2215 bne.w fu_operr_p_s 2216 2217 mov.l EXC_A7(%a6),%a0 2218 mov.l %a0,%usp 2219 bra.w fu_operr 2220 2221fu_operr_p_s: 2222 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2223 bne.w fu_operr 2224 2225# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2226# the strategy is to move the exception frame "down" 12 bytes. then, we 2227# can store the default result where the exception frame was. 2228 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2229 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2230 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2231 2232 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0 2233 mov.w &0xe004,2+FP_SRC(%a6) # set fsave status 2234 2235 frestore FP_SRC(%a6) # restore src operand 2236 2237 mov.l (%a6),%a6 # restore frame pointer 2238 2239 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2240 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2241 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2242 2243# now, we copy the default result to its proper location 2244 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2245 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2246 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2247 2248 add.l &LOCAL_SIZE-0x8,%sp 2249 2250 2251 bra.l _real_operr 2252 2253fu_inex_p2: 2254 btst &0x5,EXC_SR(%a6) 2255 bne.w fu_inex_s_p2 2256 2257 mov.l EXC_A7(%a6),%a0 2258 mov.l %a0,%usp 2259 bra.w fu_inex 2260 2261fu_inex_s_p2: 2262 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2263 bne.w fu_inex 2264 2265# the instruction was "fmove.p fpn,-(a7)" from supervisor mode. 2266# the strategy is to move the exception frame "down" 12 bytes. then, we 2267# can store the default result where the exception frame was. 2268 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1 2269 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2270 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2271 2272 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4 2273 mov.w &0xe001,2+FP_SRC(%a6) # set fsave status 2274 2275 frestore FP_SRC(%a6) # restore src operand 2276 2277 mov.l (%a6),%a6 # restore frame pointer 2278 2279 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 2280 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp) 2281 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 2282 2283# now, we copy the default result to its proper location 2284 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp) 2285 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp) 2286 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp) 2287 2288 add.l &LOCAL_SIZE-0x8,%sp 2289 2290 2291 bra.l _real_inex 2292 2293######################################################################### 2294 2295# 2296# if we're stuffing a source operand back into an fsave frame then we 2297# have to make sure that for single or double source operands that the 2298# format stuffed is as weird as the hardware usually makes it. 2299# 2300 global funimp_skew 2301funimp_skew: 2302 bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier 2303 cmpi.b %d0,&0x1 # was src sgl? 2304 beq.b funimp_skew_sgl # yes 2305 cmpi.b %d0,&0x5 # was src dbl? 2306 beq.b funimp_skew_dbl # yes 2307 rts 2308 2309funimp_skew_sgl: 2310 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent 2311 andi.w &0x7fff,%d0 # strip sign 2312 beq.b funimp_skew_sgl_not 2313 cmpi.w %d0,&0x3f80 2314 bgt.b funimp_skew_sgl_not 2315 neg.w %d0 # make exponent negative 2316 addi.w &0x3f81,%d0 # find amt to shift 2317 mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man) 2318 lsr.l %d0,%d1 # shift it 2319 bset &31,%d1 # set j-bit 2320 mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man) 2321 andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent 2322 ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent 2323funimp_skew_sgl_not: 2324 rts 2325 2326funimp_skew_dbl: 2327 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent 2328 andi.w &0x7fff,%d0 # strip sign 2329 beq.b funimp_skew_dbl_not 2330 cmpi.w %d0,&0x3c00 2331 bgt.b funimp_skew_dbl_not 2332 2333 tst.b FP_SRC_EX(%a6) # make "internal format" 2334 smi.b 0x2+FP_SRC(%a6) 2335 mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign 2336 clr.l %d0 # clear g,r,s 2337 lea FP_SRC(%a6),%a0 # pass ptr to src op 2338 mov.w &0x3c01,%d1 # pass denorm threshold 2339 bsr.l dnrm_lp # denorm it 2340 mov.w &0x3c00,%d0 # new exponent 2341 tst.b 0x2+FP_SRC(%a6) # is sign set? 2342 beq.b fss_dbl_denorm_done # no 2343 bset &15,%d0 # set sign 2344fss_dbl_denorm_done: 2345 bset &0x7,FP_SRC_HI(%a6) # set j-bit 2346 mov.w %d0,FP_SRC_EX(%a6) # insert new exponent 2347funimp_skew_dbl_not: 2348 rts 2349 2350######################################################################### 2351 global _mem_write2 2352_mem_write2: 2353 btst &0x5,EXC_SR(%a6) 2354 beq.l _dmem_write 2355 mov.l 0x0(%a0),FP_DST_EX(%a6) 2356 mov.l 0x4(%a0),FP_DST_HI(%a6) 2357 mov.l 0x8(%a0),FP_DST_LO(%a6) 2358 clr.l %d1 2359 rts 2360 2361######################################################################### 2362# XDEF **************************************************************** # 2363# _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented # 2364# effective address" exception. # 2365# # 2366# This handler should be the first code executed upon taking the # 2367# FP Unimplemented Effective Address exception in an operating # 2368# system. # 2369# # 2370# XREF **************************************************************** # 2371# _imem_read_long() - read instruction longword # 2372# fix_skewed_ops() - adjust src operand in fsave frame # 2373# set_tag_x() - determine optype of src/dst operands # 2374# store_fpreg() - store opclass 0 or 2 result to FP regfile # 2375# unnorm_fix() - change UNNORM operands to NORM or ZERO # 2376# load_fpn2() - load dst operand from FP regfile # 2377# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 2378# decbin() - convert packed data to FP binary data # 2379# _real_fpu_disabled() - "callout" for "FPU disabled" exception # 2380# _real_access() - "callout" for access error exception # 2381# _mem_read() - read extended immediate operand from memory # 2382# _fpsp_done() - "callout" for exit; work all done # 2383# _real_trace() - "callout" for Trace enabled exception # 2384# fmovm_dynamic() - emulate dynamic fmovm instruction # 2385# fmovm_ctrl() - emulate fmovm control instruction # 2386# # 2387# INPUT *************************************************************** # 2388# - The system stack contains the "Unimplemented <ea>" stk frame # 2389# # 2390# OUTPUT ************************************************************** # 2391# If access error: # 2392# - The system stack is changed to an access error stack frame # 2393# If FPU disabled: # 2394# - The system stack is changed to an FPU disabled stack frame # 2395# If Trace exception enabled: # 2396# - The system stack is changed to a Trace exception stack frame # 2397# Else: (normal case) # 2398# - None (correct result has been stored as appropriate) # 2399# # 2400# ALGORITHM *********************************************************** # 2401# This exception handles 3 types of operations: # 2402# (1) FP Instructions using extended precision or packed immediate # 2403# addressing mode. # 2404# (2) The "fmovm.x" instruction w/ dynamic register specification. # 2405# (3) The "fmovm.l" instruction w/ 2 or 3 control registers. # 2406# # 2407# For immediate data operations, the data is read in w/ a # 2408# _mem_read() "callout", converted to FP binary (if packed), and used # 2409# as the source operand to the instruction specified by the instruction # 2410# word. If no FP exception should be reported ads a result of the # 2411# emulation, then the result is stored to the destination register and # 2412# the handler exits through _fpsp_done(). If an enabled exc has been # 2413# signalled as a result of emulation, then an fsave state frame # 2414# corresponding to the FP exception type must be entered into the 060 # 2415# FPU before exiting. In either the enabled or disabled cases, we # 2416# must also check if a Trace exception is pending, in which case, we # 2417# must create a Trace exception stack frame from the current exception # 2418# stack frame. If no Trace is pending, we simply exit through # 2419# _fpsp_done(). # 2420# For "fmovm.x", call the routine fmovm_dynamic() which will # 2421# decode and emulate the instruction. No FP exceptions can be pending # 2422# as a result of this operation emulation. A Trace exception can be # 2423# pending, though, which means the current stack frame must be changed # 2424# to a Trace stack frame and an exit made through _real_trace(). # 2425# For the case of "fmovm.x Dn,-(a7)", where the offending instruction # 2426# was executed from supervisor mode, this handler must store the FP # 2427# register file values to the system stack by itself since # 2428# fmovm_dynamic() can't handle this. A normal exit is made through # 2429# fpsp_done(). # 2430# For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. # 2431# Again, a Trace exception may be pending and an exit made through # 2432# _real_trace(). Else, a normal exit is made through _fpsp_done(). # 2433# # 2434# Before any of the above is attempted, it must be checked to # 2435# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken # 2436# before the "FPU disabled" exception, but the "FPU disabled" exception # 2437# has higher priority, we check the disabled bit in the PCR. If set, # 2438# then we must create an 8 word "FPU disabled" exception stack frame # 2439# from the current 4 word exception stack frame. This includes # 2440# reproducing the effective address of the instruction to put on the # 2441# new stack frame. # 2442# # 2443# In the process of all emulation work, if a _mem_read() # 2444# "callout" returns a failing result indicating an access error, then # 2445# we must create an access error stack frame from the current stack # 2446# frame. This information includes a faulting address and a fault- # 2447# status-longword. These are created within this handler. # 2448# # 2449######################################################################### 2450 2451 global _fpsp_effadd 2452_fpsp_effadd: 2453 2454# This exception type takes priority over the "Line F Emulator" 2455# exception. Therefore, the FPU could be disabled when entering here. 2456# So, we must check to see if it's disabled and handle that case separately. 2457 mov.l %d0,-(%sp) # save d0 2458 movc %pcr,%d0 # load proc cr 2459 btst &0x1,%d0 # is FPU disabled? 2460 bne.w iea_disabled # yes 2461 mov.l (%sp)+,%d0 # restore d0 2462 2463 link %a6,&-LOCAL_SIZE # init stack frame 2464 2465 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2466 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 2467 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 2468 2469# PC of instruction that took the exception is the PC in the frame 2470 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) 2471 2472 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 2473 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 2474 bsr.l _imem_read_long # fetch the instruction words 2475 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 2476 2477######################################################################### 2478 2479 tst.w %d0 # is operation fmovem? 2480 bmi.w iea_fmovm # yes 2481 2482# 2483# here, we will have: 2484# fabs fdabs fsabs facos fmod 2485# fadd fdadd fsadd fasin frem 2486# fcmp fatan fscale 2487# fdiv fddiv fsdiv fatanh fsin 2488# fint fcos fsincos 2489# fintrz fcosh fsinh 2490# fmove fdmove fsmove fetox ftan 2491# fmul fdmul fsmul fetoxm1 ftanh 2492# fneg fdneg fsneg fgetexp ftentox 2493# fsgldiv fgetman ftwotox 2494# fsglmul flog10 2495# fsqrt flog2 2496# fsub fdsub fssub flogn 2497# ftst flognp1 2498# which can all use f<op>.{x,p} 2499# so, now it's immediate data extended precision AND PACKED FORMAT! 2500# 2501iea_op: 2502 andi.l &0x00ff00ff,USER_FPSR(%a6) 2503 2504 btst &0xa,%d0 # is src fmt x or p? 2505 bne.b iea_op_pack # packed 2506 2507 2508 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data> 2509 lea FP_SRC(%a6),%a1 # pass: ptr to super addr 2510 mov.l &0xc,%d0 # pass: 12 bytes 2511 bsr.l _imem_read # read extended immediate 2512 2513 tst.l %d1 # did ifetch fail? 2514 bne.w iea_iacc # yes 2515 2516 bra.b iea_op_setsrc 2517 2518iea_op_pack: 2519 2520 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data> 2521 lea FP_SRC(%a6),%a1 # pass: ptr to super dst 2522 mov.l &0xc,%d0 # pass: 12 bytes 2523 bsr.l _imem_read # read packed operand 2524 2525 tst.l %d1 # did ifetch fail? 2526 bne.w iea_iacc # yes 2527 2528# The packed operand is an INF or a NAN if the exponent field is all ones. 2529 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp 2530 cmpi.w %d0,&0x7fff # INF or NAN? 2531 beq.b iea_op_setsrc # operand is an INF or NAN 2532 2533# The packed operand is a zero if the mantissa is all zero, else it's 2534# a normal packed op. 2535 mov.b 3+FP_SRC(%a6),%d0 # get byte 4 2536 andi.b &0x0f,%d0 # clear all but last nybble 2537 bne.b iea_op_gp_not_spec # not a zero 2538 tst.l FP_SRC_HI(%a6) # is lw 2 zero? 2539 bne.b iea_op_gp_not_spec # not a zero 2540 tst.l FP_SRC_LO(%a6) # is lw 3 zero? 2541 beq.b iea_op_setsrc # operand is a ZERO 2542iea_op_gp_not_spec: 2543 lea FP_SRC(%a6),%a0 # pass: ptr to packed op 2544 bsr.l decbin # convert to extended 2545 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop 2546 2547iea_op_setsrc: 2548 addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer 2549 2550# FP_SRC now holds the src operand. 2551 lea FP_SRC(%a6),%a0 # pass: ptr to src op 2552 bsr.l set_tag_x # tag the operand type 2553 mov.b %d0,STAG(%a6) # could be ANYTHING!!! 2554 cmpi.b %d0,&UNNORM # is operand an UNNORM? 2555 bne.b iea_op_getdst # no 2556 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO 2557 mov.b %d0,STAG(%a6) # set new optype tag 2558iea_op_getdst: 2559 clr.b STORE_FLG(%a6) # clear "store result" boolean 2560 2561 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 2562 beq.b iea_op_extract # monadic 2563 btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp? 2564 bne.b iea_op_spec # yes 2565 2566iea_op_loaddst: 2567 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno 2568 bsr.l load_fpn2 # load dst operand 2569 2570 lea FP_DST(%a6),%a0 # pass: ptr to dst op 2571 bsr.l set_tag_x # tag the operand type 2572 mov.b %d0,DTAG(%a6) # could be ANYTHING!!! 2573 cmpi.b %d0,&UNNORM # is operand an UNNORM? 2574 bne.b iea_op_extract # no 2575 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO 2576 mov.b %d0,DTAG(%a6) # set new optype tag 2577 bra.b iea_op_extract 2578 2579# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic 2580iea_op_spec: 2581 btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos? 2582 beq.b iea_op_extract # yes 2583# now, we're left with ftst and fcmp. so, first let's tag them so that they don't 2584# store a result. then, only fcmp will branch back and pick up a dst operand. 2585 st STORE_FLG(%a6) # don't store a final result 2586 btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp? 2587 beq.b iea_op_loaddst # yes 2588 2589iea_op_extract: 2590 clr.l %d0 2591 mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec 2592 2593 mov.b 1+EXC_CMDREG(%a6),%d1 2594 andi.w &0x007f,%d1 # extract extension 2595 2596 fmov.l &0x0,%fpcr 2597 fmov.l &0x0,%fpsr 2598 2599 lea FP_SRC(%a6),%a0 2600 lea FP_DST(%a6),%a1 2601 2602 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 2603 jsr (tbl_unsupp.l,%pc,%d1.l*1) 2604 2605# 2606# Exceptions in order of precedence: 2607# BSUN : none 2608# SNAN : all operations 2609# OPERR : all reg-reg or mem-reg operations that can normally operr 2610# OVFL : same as OPERR 2611# UNFL : same as OPERR 2612# DZ : same as OPERR 2613# INEX2 : same as OPERR 2614# INEX1 : all packed immediate operations 2615# 2616 2617# we determine the highest priority exception(if any) set by the 2618# emulation routine that has also been enabled by the user. 2619 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled 2620 bne.b iea_op_ena # some are enabled 2621 2622# now, we save the result, unless, of course, the operation was ftst or fcmp. 2623# these don't save results. 2624iea_op_save: 2625 tst.b STORE_FLG(%a6) # does this op store a result? 2626 bne.b iea_op_exit1 # exit with no frestore 2627 2628iea_op_store: 2629 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno 2630 bsr.l store_fpreg # store the result 2631 2632iea_op_exit1: 2633 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC" 2634 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame 2635 2636 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2637 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2638 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2639 2640 unlk %a6 # unravel the frame 2641 2642 btst &0x7,(%sp) # is trace on? 2643 bne.w iea_op_trace # yes 2644 2645 bra.l _fpsp_done # exit to os 2646 2647iea_op_ena: 2648 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set 2649 bfffo %d0{&24:&8},%d0 # find highest priority exception 2650 bne.b iea_op_exc # at least one was set 2651 2652# no exception occurred. now, did a disabled, exact overflow occur with inexact 2653# enabled? if so, then we have to stuff an overflow frame into the FPU. 2654 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? 2655 beq.b iea_op_save 2656 2657iea_op_ovfl: 2658 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled? 2659 beq.b iea_op_store # no 2660 bra.b iea_op_exc_ovfl # yes 2661 2662# an enabled exception occurred. we have to insert the exception type back into 2663# the machine. 2664iea_op_exc: 2665 subi.l &24,%d0 # fix offset to be 0-8 2666 cmpi.b %d0,&0x6 # is exception INEX? 2667 bne.b iea_op_exc_force # no 2668 2669# the enabled exception was inexact. so, if it occurs with an overflow 2670# or underflow that was disabled, then we have to force an overflow or 2671# underflow frame. 2672 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur? 2673 bne.b iea_op_exc_ovfl # yes 2674 btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur? 2675 bne.b iea_op_exc_unfl # yes 2676 2677iea_op_exc_force: 2678 mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) 2679 bra.b iea_op_exit2 # exit with frestore 2680 2681tbl_iea_except: 2682 short 0xe002, 0xe006, 0xe004, 0xe005 2683 short 0xe003, 0xe002, 0xe001, 0xe001 2684 2685iea_op_exc_ovfl: 2686 mov.w &0xe005,2+FP_SRC(%a6) 2687 bra.b iea_op_exit2 2688 2689iea_op_exc_unfl: 2690 mov.w &0xe003,2+FP_SRC(%a6) 2691 2692iea_op_exit2: 2693 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC" 2694 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame 2695 2696 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2697 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2698 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2699 2700 frestore FP_SRC(%a6) # restore exceptional state 2701 2702 unlk %a6 # unravel the frame 2703 2704 btst &0x7,(%sp) # is trace on? 2705 bne.b iea_op_trace # yes 2706 2707 bra.l _fpsp_done # exit to os 2708 2709# 2710# The opclass two instruction that took an "Unimplemented Effective Address" 2711# exception was being traced. Make the "current" PC the FPIAR and put it in 2712# the trace stack frame then jump to _real_trace(). 2713# 2714# UNIMP EA FRAME TRACE FRAME 2715# ***************** ***************** 2716# * 0x0 * 0x0f0 * * Current * 2717# ***************** * PC * 2718# * Current * ***************** 2719# * PC * * 0x2 * 0x024 * 2720# ***************** ***************** 2721# * SR * * Next * 2722# ***************** * PC * 2723# ***************** 2724# * SR * 2725# ***************** 2726iea_op_trace: 2727 mov.l (%sp),-(%sp) # shift stack frame "down" 2728 mov.w 0x8(%sp),0x4(%sp) 2729 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024 2730 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR 2731 2732 bra.l _real_trace 2733 2734######################################################################### 2735iea_fmovm: 2736 btst &14,%d0 # ctrl or data reg 2737 beq.w iea_fmovm_ctrl 2738 2739iea_fmovm_data: 2740 2741 btst &0x5,EXC_SR(%a6) # user or supervisor mode 2742 bne.b iea_fmovm_data_s 2743 2744iea_fmovm_data_u: 2745 mov.l %usp,%a0 2746 mov.l %a0,EXC_A7(%a6) # store current a7 2747 bsr.l fmovm_dynamic # do dynamic fmovm 2748 mov.l EXC_A7(%a6),%a0 # load possibly new a7 2749 mov.l %a0,%usp # update usp 2750 bra.w iea_fmovm_exit 2751 2752iea_fmovm_data_s: 2753 clr.b SPCOND_FLG(%a6) 2754 lea 0x2+EXC_VOFF(%a6),%a0 2755 mov.l %a0,EXC_A7(%a6) 2756 bsr.l fmovm_dynamic # do dynamic fmovm 2757 2758 cmpi.b SPCOND_FLG(%a6),&mda7_flg 2759 beq.w iea_fmovm_data_predec 2760 cmpi.b SPCOND_FLG(%a6),&mia7_flg 2761 bne.w iea_fmovm_exit 2762 2763# right now, d0 = the size. 2764# the data has been fetched from the supervisor stack, but we have not 2765# incremented the stack pointer by the appropriate number of bytes. 2766# do it here. 2767iea_fmovm_data_postinc: 2768 btst &0x7,EXC_SR(%a6) 2769 bne.b iea_fmovm_data_pi_trace 2770 2771 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0) 2772 mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0) 2773 mov.w &0x00f0,(EXC_VOFF,%a6,%d0) 2774 2775 lea (EXC_SR,%a6,%d0),%a0 2776 mov.l %a0,EXC_SR(%a6) 2777 2778 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2779 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2780 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2781 2782 unlk %a6 2783 mov.l (%sp)+,%sp 2784 bra.l _fpsp_done 2785 2786iea_fmovm_data_pi_trace: 2787 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0) 2788 mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0) 2789 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0) 2790 mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0) 2791 2792 lea (EXC_SR-0x4,%a6,%d0),%a0 2793 mov.l %a0,EXC_SR(%a6) 2794 2795 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2796 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2797 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2798 2799 unlk %a6 2800 mov.l (%sp)+,%sp 2801 bra.l _real_trace 2802 2803# right now, d1 = size and d0 = the strg. 2804iea_fmovm_data_predec: 2805 mov.b %d1,EXC_VOFF(%a6) # store strg 2806 mov.b %d0,0x1+EXC_VOFF(%a6) # store size 2807 2808 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1 2809 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2810 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2811 2812 mov.l (%a6),-(%sp) # make a copy of a6 2813 mov.l %d0,-(%sp) # save d0 2814 mov.l %d1,-(%sp) # save d1 2815 mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC 2816 2817 clr.l %d0 2818 mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size 2819 neg.l %d0 # get negative of size 2820 2821 btst &0x7,EXC_SR(%a6) # is trace enabled? 2822 beq.b iea_fmovm_data_p2 2823 2824 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0) 2825 mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0) 2826 mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0) 2827 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0) 2828 2829 pea (%a6,%d0) # create final sp 2830 bra.b iea_fmovm_data_p3 2831 2832iea_fmovm_data_p2: 2833 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0) 2834 mov.l (%sp)+,(EXC_PC,%a6,%d0) 2835 mov.w &0x00f0,(EXC_VOFF,%a6,%d0) 2836 2837 pea (0x4,%a6,%d0) # create final sp 2838 2839iea_fmovm_data_p3: 2840 clr.l %d1 2841 mov.b EXC_VOFF(%a6),%d1 # fetch strg 2842 2843 tst.b %d1 2844 bpl.b fm_1 2845 fmovm.x &0x80,(0x4+0x8,%a6,%d0) 2846 addi.l &0xc,%d0 2847fm_1: 2848 lsl.b &0x1,%d1 2849 bpl.b fm_2 2850 fmovm.x &0x40,(0x4+0x8,%a6,%d0) 2851 addi.l &0xc,%d0 2852fm_2: 2853 lsl.b &0x1,%d1 2854 bpl.b fm_3 2855 fmovm.x &0x20,(0x4+0x8,%a6,%d0) 2856 addi.l &0xc,%d0 2857fm_3: 2858 lsl.b &0x1,%d1 2859 bpl.b fm_4 2860 fmovm.x &0x10,(0x4+0x8,%a6,%d0) 2861 addi.l &0xc,%d0 2862fm_4: 2863 lsl.b &0x1,%d1 2864 bpl.b fm_5 2865 fmovm.x &0x08,(0x4+0x8,%a6,%d0) 2866 addi.l &0xc,%d0 2867fm_5: 2868 lsl.b &0x1,%d1 2869 bpl.b fm_6 2870 fmovm.x &0x04,(0x4+0x8,%a6,%d0) 2871 addi.l &0xc,%d0 2872fm_6: 2873 lsl.b &0x1,%d1 2874 bpl.b fm_7 2875 fmovm.x &0x02,(0x4+0x8,%a6,%d0) 2876 addi.l &0xc,%d0 2877fm_7: 2878 lsl.b &0x1,%d1 2879 bpl.b fm_end 2880 fmovm.x &0x01,(0x4+0x8,%a6,%d0) 2881fm_end: 2882 mov.l 0x4(%sp),%d1 2883 mov.l 0x8(%sp),%d0 2884 mov.l 0xc(%sp),%a6 2885 mov.l (%sp)+,%sp 2886 2887 btst &0x7,(%sp) # is trace enabled? 2888 beq.l _fpsp_done 2889 bra.l _real_trace 2890 2891######################################################################### 2892iea_fmovm_ctrl: 2893 2894 bsr.l fmovm_ctrl # load ctrl regs 2895 2896iea_fmovm_exit: 2897 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 2898 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 2899 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2900 2901 btst &0x7,EXC_SR(%a6) # is trace on? 2902 bne.b iea_fmovm_trace # yes 2903 2904 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC 2905 2906 unlk %a6 # unravel the frame 2907 2908 bra.l _fpsp_done # exit to os 2909 2910# 2911# The control reg instruction that took an "Unimplemented Effective Address" 2912# exception was being traced. The "Current PC" for the trace frame is the 2913# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR. 2914# After fixing the stack frame, jump to _real_trace(). 2915# 2916# UNIMP EA FRAME TRACE FRAME 2917# ***************** ***************** 2918# * 0x0 * 0x0f0 * * Current * 2919# ***************** * PC * 2920# * Current * ***************** 2921# * PC * * 0x2 * 0x024 * 2922# ***************** ***************** 2923# * SR * * Next * 2924# ***************** * PC * 2925# ***************** 2926# * SR * 2927# ***************** 2928# this ain't a pretty solution, but it works: 2929# -restore a6 (not with unlk) 2930# -shift stack frame down over where old a6 used to be 2931# -add LOCAL_SIZE to stack pointer 2932iea_fmovm_trace: 2933 mov.l (%a6),%a6 # restore frame pointer 2934 mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp) 2935 mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp) 2936 mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp) 2937 mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024 2938 add.l &LOCAL_SIZE,%sp # clear stack frame 2939 2940 bra.l _real_trace 2941 2942######################################################################### 2943# The FPU is disabled and so we should really have taken the "Line 2944# F Emulator" exception. So, here we create an 8-word stack frame 2945# from our 4-word stack frame. This means we must calculate the length 2946# the faulting instruction to get the "next PC". This is trivial for 2947# immediate operands but requires some extra work for fmovm dynamic 2948# which can use most addressing modes. 2949iea_disabled: 2950 mov.l (%sp)+,%d0 # restore d0 2951 2952 link %a6,&-LOCAL_SIZE # init stack frame 2953 2954 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 2955 2956# PC of instruction that took the exception is the PC in the frame 2957 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6) 2958 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 2959 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 2960 bsr.l _imem_read_long # fetch the instruction words 2961 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD 2962 2963 tst.w %d0 # is instr fmovm? 2964 bmi.b iea_dis_fmovm # yes 2965# instruction is using an extended precision immediate operand. Therefore, 2966# the total instruction length is 16 bytes. 2967iea_dis_immed: 2968 mov.l &0x10,%d0 # 16 bytes of instruction 2969 bra.b iea_dis_cont 2970iea_dis_fmovm: 2971 btst &0xe,%d0 # is instr fmovm ctrl 2972 bne.b iea_dis_fmovm_data # no 2973# the instruction is a fmovm.l with 2 or 3 registers. 2974 bfextu %d0{&19:&3},%d1 2975 mov.l &0xc,%d0 2976 cmpi.b %d1,&0x7 # move all regs? 2977 bne.b iea_dis_cont 2978 addq.l &0x4,%d0 2979 bra.b iea_dis_cont 2980# the instruction is an fmovm.x dynamic which can use many addressing 2981# modes and thus can have several different total instruction lengths. 2982# call fmovm_calc_ea which will go through the ea calc process and, 2983# as a by-product, will tell us how long the instruction is. 2984iea_dis_fmovm_data: 2985 clr.l %d0 2986 bsr.l fmovm_calc_ea 2987 mov.l EXC_EXTWPTR(%a6),%d0 2988 sub.l EXC_PC(%a6),%d0 2989iea_dis_cont: 2990 mov.w %d0,EXC_VOFF(%a6) # store stack shift value 2991 2992 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 2993 2994 unlk %a6 2995 2996# here, we actually create the 8-word frame from the 4-word frame, 2997# with the "next PC" as additional info. 2998# the <ea> field is let as undefined. 2999 subq.l &0x8,%sp # make room for new stack 3000 mov.l %d0,-(%sp) # save d0 3001 mov.w 0xc(%sp),0x4(%sp) # move SR 3002 mov.l 0xe(%sp),0x6(%sp) # move Current PC 3003 clr.l %d0 3004 mov.w 0x12(%sp),%d0 3005 mov.l 0x6(%sp),0x10(%sp) # move Current PC 3006 add.l %d0,0x6(%sp) # make Next PC 3007 mov.w &0x402c,0xa(%sp) # insert offset,frame format 3008 mov.l (%sp)+,%d0 # restore d0 3009 3010 bra.l _real_fpu_disabled 3011 3012########## 3013 3014iea_iacc: 3015 movc %pcr,%d0 3016 btst &0x1,%d0 3017 bne.b iea_iacc_cont 3018 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3019 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack 3020iea_iacc_cont: 3021 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3022 3023 unlk %a6 3024 3025 subq.w &0x8,%sp # make stack frame bigger 3026 mov.l 0x8(%sp),(%sp) # store SR,hi(PC) 3027 mov.w 0xc(%sp),0x4(%sp) # store lo(PC) 3028 mov.w &0x4008,0x6(%sp) # store voff 3029 mov.l 0x2(%sp),0x8(%sp) # store ea 3030 mov.l &0x09428001,0xc(%sp) # store fslw 3031 3032iea_acc_done: 3033 btst &0x5,(%sp) # user or supervisor mode? 3034 beq.b iea_acc_done2 # user 3035 bset &0x2,0xd(%sp) # set supervisor TM bit 3036 3037iea_acc_done2: 3038 bra.l _real_access 3039 3040iea_dacc: 3041 lea -LOCAL_SIZE(%a6),%sp 3042 3043 movc %pcr,%d1 3044 btst &0x1,%d1 3045 bne.b iea_dacc_cont 3046 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack 3047 fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs 3048iea_dacc_cont: 3049 mov.l (%a6),%a6 3050 3051 mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp) 3052 mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp) 3053 mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp) 3054 mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp) 3055 mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp) 3056 mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp) 3057 3058 movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1 3059 add.w &LOCAL_SIZE-0x4,%sp 3060 3061 bra.b iea_acc_done 3062 3063######################################################################### 3064# XDEF **************************************************************** # 3065# _fpsp_operr(): 060FPSP entry point for FP Operr exception. # 3066# # 3067# This handler should be the first code executed upon taking the # 3068# FP Operand Error exception in an operating system. # 3069# # 3070# XREF **************************************************************** # 3071# _imem_read_long() - read instruction longword # 3072# fix_skewed_ops() - adjust src operand in fsave frame # 3073# _real_operr() - "callout" to operating system operr handler # 3074# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) # 3075# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) # 3076# facc_out_{b,w,l}() - store to memory took access error (opcl 3) # 3077# # 3078# INPUT *************************************************************** # 3079# - The system stack contains the FP Operr exception frame # 3080# - The fsave frame contains the source operand # 3081# # 3082# OUTPUT ************************************************************** # 3083# No access error: # 3084# - The system stack is unchanged # 3085# - The fsave frame contains the adjusted src op for opclass 0,2 # 3086# # 3087# ALGORITHM *********************************************************** # 3088# In a system where the FP Operr exception is enabled, the goal # 3089# is to get to the handler specified at _real_operr(). But, on the 060, # 3090# for opclass zero and two instruction taking this exception, the # 3091# input operand in the fsave frame may be incorrect for some cases # 3092# and needs to be corrected. This handler calls fix_skewed_ops() to # 3093# do just this and then exits through _real_operr(). # 3094# For opclass 3 instructions, the 060 doesn't store the default # 3095# operr result out to memory or data register file as it should. # 3096# This code must emulate the move out before finally exiting through # 3097# _real_inex(). The move out, if to memory, is performed using # 3098# _mem_write() "callout" routines that may return a failing result. # 3099# In this special case, the handler must exit through facc_out() # 3100# which creates an access error stack frame from the current operr # 3101# stack frame. # 3102# # 3103######################################################################### 3104 3105 global _fpsp_operr 3106_fpsp_operr: 3107 3108 link.w %a6,&-LOCAL_SIZE # init stack frame 3109 3110 fsave FP_SRC(%a6) # grab the "busy" frame 3111 3112 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3113 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3114 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3115 3116# the FPIAR holds the "current PC" of the faulting instruction 3117 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3118 3119 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3120 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3121 bsr.l _imem_read_long # fetch the instruction words 3122 mov.l %d0,EXC_OPWORD(%a6) 3123 3124############################################################################## 3125 3126 btst &13,%d0 # is instr an fmove out? 3127 bne.b foperr_out # fmove out 3128 3129 3130# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3131# this would be the case for opclass two operations with a source infinity or 3132# denorm operand in the sgl or dbl format. NANs also become skewed, but can't 3133# cause an operr so we don't need to check for them here. 3134 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3135 bsr.l fix_skewed_ops # fix src op 3136 3137foperr_exit: 3138 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3139 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3140 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3141 3142 frestore FP_SRC(%a6) 3143 3144 unlk %a6 3145 bra.l _real_operr 3146 3147######################################################################## 3148 3149# 3150# the hardware does not save the default result to memory on enabled 3151# operand error exceptions. we do this here before passing control to 3152# the user operand error handler. 3153# 3154# byte, word, and long destination format operations can pass 3155# through here. we simply need to test the sign of the src 3156# operand and save the appropriate minimum or maximum integer value 3157# to the effective address as pointed to by the stacked effective address. 3158# 3159# although packed opclass three operations can take operand error 3160# exceptions, they won't pass through here since they are caught 3161# first by the unsupported data format exception handler. that handler 3162# sends them directly to _real_operr() if necessary. 3163# 3164foperr_out: 3165 3166 mov.w FP_SRC_EX(%a6),%d1 # fetch exponent 3167 andi.w &0x7fff,%d1 3168 cmpi.w %d1,&0x7fff 3169 bne.b foperr_out_not_qnan 3170# the operand is either an infinity or a QNAN. 3171 tst.l FP_SRC_LO(%a6) 3172 bne.b foperr_out_qnan 3173 mov.l FP_SRC_HI(%a6),%d1 3174 andi.l &0x7fffffff,%d1 3175 beq.b foperr_out_not_qnan 3176foperr_out_qnan: 3177 mov.l FP_SRC_HI(%a6),L_SCR1(%a6) 3178 bra.b foperr_out_jmp 3179 3180foperr_out_not_qnan: 3181 mov.l &0x7fffffff,%d1 3182 tst.b FP_SRC_EX(%a6) 3183 bpl.b foperr_out_not_qnan2 3184 addq.l &0x1,%d1 3185foperr_out_not_qnan2: 3186 mov.l %d1,L_SCR1(%a6) 3187 3188foperr_out_jmp: 3189 bfextu %d0{&19:&3},%d0 # extract dst format field 3190 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg 3191 mov.w (tbl_operr.b,%pc,%d0.w*2),%a0 3192 jmp (tbl_operr.b,%pc,%a0) 3193 3194tbl_operr: 3195 short foperr_out_l - tbl_operr # long word integer 3196 short tbl_operr - tbl_operr # sgl prec shouldn't happen 3197 short tbl_operr - tbl_operr # ext prec shouldn't happen 3198 short foperr_exit - tbl_operr # packed won't enter here 3199 short foperr_out_w - tbl_operr # word integer 3200 short tbl_operr - tbl_operr # dbl prec shouldn't happen 3201 short foperr_out_b - tbl_operr # byte integer 3202 short tbl_operr - tbl_operr # packed won't enter here 3203 3204foperr_out_b: 3205 mov.b L_SCR1(%a6),%d0 # load positive default result 3206 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3207 ble.b foperr_out_b_save_dn # yes 3208 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3209 bsr.l _dmem_write_byte # write the default result 3210 3211 tst.l %d1 # did dstore fail? 3212 bne.l facc_out_b # yes 3213 3214 bra.w foperr_exit 3215foperr_out_b_save_dn: 3216 andi.w &0x0007,%d1 3217 bsr.l store_dreg_b # store result to regfile 3218 bra.w foperr_exit 3219 3220foperr_out_w: 3221 mov.w L_SCR1(%a6),%d0 # load positive default result 3222 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3223 ble.b foperr_out_w_save_dn # yes 3224 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3225 bsr.l _dmem_write_word # write the default result 3226 3227 tst.l %d1 # did dstore fail? 3228 bne.l facc_out_w # yes 3229 3230 bra.w foperr_exit 3231foperr_out_w_save_dn: 3232 andi.w &0x0007,%d1 3233 bsr.l store_dreg_w # store result to regfile 3234 bra.w foperr_exit 3235 3236foperr_out_l: 3237 mov.l L_SCR1(%a6),%d0 # load positive default result 3238 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3239 ble.b foperr_out_l_save_dn # yes 3240 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3241 bsr.l _dmem_write_long # write the default result 3242 3243 tst.l %d1 # did dstore fail? 3244 bne.l facc_out_l # yes 3245 3246 bra.w foperr_exit 3247foperr_out_l_save_dn: 3248 andi.w &0x0007,%d1 3249 bsr.l store_dreg_l # store result to regfile 3250 bra.w foperr_exit 3251 3252######################################################################### 3253# XDEF **************************************************************** # 3254# _fpsp_snan(): 060FPSP entry point for FP SNAN exception. # 3255# # 3256# This handler should be the first code executed upon taking the # 3257# FP Signalling NAN exception in an operating system. # 3258# # 3259# XREF **************************************************************** # 3260# _imem_read_long() - read instruction longword # 3261# fix_skewed_ops() - adjust src operand in fsave frame # 3262# _real_snan() - "callout" to operating system SNAN handler # 3263# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) # 3264# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) # 3265# facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) # 3266# _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> # 3267# # 3268# INPUT *************************************************************** # 3269# - The system stack contains the FP SNAN exception frame # 3270# - The fsave frame contains the source operand # 3271# # 3272# OUTPUT ************************************************************** # 3273# No access error: # 3274# - The system stack is unchanged # 3275# - The fsave frame contains the adjusted src op for opclass 0,2 # 3276# # 3277# ALGORITHM *********************************************************** # 3278# In a system where the FP SNAN exception is enabled, the goal # 3279# is to get to the handler specified at _real_snan(). But, on the 060, # 3280# for opclass zero and two instructions taking this exception, the # 3281# input operand in the fsave frame may be incorrect for some cases # 3282# and needs to be corrected. This handler calls fix_skewed_ops() to # 3283# do just this and then exits through _real_snan(). # 3284# For opclass 3 instructions, the 060 doesn't store the default # 3285# SNAN result out to memory or data register file as it should. # 3286# This code must emulate the move out before finally exiting through # 3287# _real_snan(). The move out, if to memory, is performed using # 3288# _mem_write() "callout" routines that may return a failing result. # 3289# In this special case, the handler must exit through facc_out() # 3290# which creates an access error stack frame from the current SNAN # 3291# stack frame. # 3292# For the case of an extended precision opclass 3 instruction, # 3293# if the effective addressing mode was -() or ()+, then the address # 3294# register must get updated by calling _calc_ea_fout(). If the <ea> # 3295# was -(a7) from supervisor mode, then the exception frame currently # 3296# on the system stack must be carefully moved "down" to make room # 3297# for the operand being moved. # 3298# # 3299######################################################################### 3300 3301 global _fpsp_snan 3302_fpsp_snan: 3303 3304 link.w %a6,&-LOCAL_SIZE # init stack frame 3305 3306 fsave FP_SRC(%a6) # grab the "busy" frame 3307 3308 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3309 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3310 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3311 3312# the FPIAR holds the "current PC" of the faulting instruction 3313 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3314 3315 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3316 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3317 bsr.l _imem_read_long # fetch the instruction words 3318 mov.l %d0,EXC_OPWORD(%a6) 3319 3320############################################################################## 3321 3322 btst &13,%d0 # is instr an fmove out? 3323 bne.w fsnan_out # fmove out 3324 3325 3326# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3327# this would be the case for opclass two operations with a source infinity or 3328# denorm operand in the sgl or dbl format. NANs also become skewed and must be 3329# fixed here. 3330 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3331 bsr.l fix_skewed_ops # fix src op 3332 3333fsnan_exit: 3334 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3335 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3336 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3337 3338 frestore FP_SRC(%a6) 3339 3340 unlk %a6 3341 bra.l _real_snan 3342 3343######################################################################## 3344 3345# 3346# the hardware does not save the default result to memory on enabled 3347# snan exceptions. we do this here before passing control to 3348# the user snan handler. 3349# 3350# byte, word, long, and packed destination format operations can pass 3351# through here. since packed format operations already were handled by 3352# fpsp_unsupp(), then we need to do nothing else for them here. 3353# for byte, word, and long, we simply need to test the sign of the src 3354# operand and save the appropriate minimum or maximum integer value 3355# to the effective address as pointed to by the stacked effective address. 3356# 3357fsnan_out: 3358 3359 bfextu %d0{&19:&3},%d0 # extract dst format field 3360 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg 3361 mov.w (tbl_snan.b,%pc,%d0.w*2),%a0 3362 jmp (tbl_snan.b,%pc,%a0) 3363 3364tbl_snan: 3365 short fsnan_out_l - tbl_snan # long word integer 3366 short fsnan_out_s - tbl_snan # sgl prec shouldn't happen 3367 short fsnan_out_x - tbl_snan # ext prec shouldn't happen 3368 short tbl_snan - tbl_snan # packed needs no help 3369 short fsnan_out_w - tbl_snan # word integer 3370 short fsnan_out_d - tbl_snan # dbl prec shouldn't happen 3371 short fsnan_out_b - tbl_snan # byte integer 3372 short tbl_snan - tbl_snan # packed needs no help 3373 3374fsnan_out_b: 3375 mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN 3376 bset &6,%d0 # set SNAN bit 3377 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3378 ble.b fsnan_out_b_dn # yes 3379 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3380 bsr.l _dmem_write_byte # write the default result 3381 3382 tst.l %d1 # did dstore fail? 3383 bne.l facc_out_b # yes 3384 3385 bra.w fsnan_exit 3386fsnan_out_b_dn: 3387 andi.w &0x0007,%d1 3388 bsr.l store_dreg_b # store result to regfile 3389 bra.w fsnan_exit 3390 3391fsnan_out_w: 3392 mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN 3393 bset &14,%d0 # set SNAN bit 3394 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3395 ble.b fsnan_out_w_dn # yes 3396 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3397 bsr.l _dmem_write_word # write the default result 3398 3399 tst.l %d1 # did dstore fail? 3400 bne.l facc_out_w # yes 3401 3402 bra.w fsnan_exit 3403fsnan_out_w_dn: 3404 andi.w &0x0007,%d1 3405 bsr.l store_dreg_w # store result to regfile 3406 bra.w fsnan_exit 3407 3408fsnan_out_l: 3409 mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN 3410 bset &30,%d0 # set SNAN bit 3411 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3412 ble.b fsnan_out_l_dn # yes 3413 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3414 bsr.l _dmem_write_long # write the default result 3415 3416 tst.l %d1 # did dstore fail? 3417 bne.l facc_out_l # yes 3418 3419 bra.w fsnan_exit 3420fsnan_out_l_dn: 3421 andi.w &0x0007,%d1 3422 bsr.l store_dreg_l # store result to regfile 3423 bra.w fsnan_exit 3424 3425fsnan_out_s: 3426 cmpi.b %d1,&0x7 # is <ea> mode a data reg? 3427 ble.b fsnan_out_d_dn # yes 3428 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3429 andi.l &0x80000000,%d0 # keep sign 3430 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit 3431 mov.l FP_SRC_HI(%a6),%d1 # load mantissa 3432 lsr.l &0x8,%d1 # shift mantissa for sgl 3433 or.l %d1,%d0 # create sgl SNAN 3434 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result 3435 bsr.l _dmem_write_long # write the default result 3436 3437 tst.l %d1 # did dstore fail? 3438 bne.l facc_out_l # yes 3439 3440 bra.w fsnan_exit 3441fsnan_out_d_dn: 3442 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3443 andi.l &0x80000000,%d0 # keep sign 3444 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit 3445 mov.l %d1,-(%sp) 3446 mov.l FP_SRC_HI(%a6),%d1 # load mantissa 3447 lsr.l &0x8,%d1 # shift mantissa for sgl 3448 or.l %d1,%d0 # create sgl SNAN 3449 mov.l (%sp)+,%d1 3450 andi.w &0x0007,%d1 3451 bsr.l store_dreg_l # store result to regfile 3452 bra.w fsnan_exit 3453 3454fsnan_out_d: 3455 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign 3456 andi.l &0x80000000,%d0 # keep sign 3457 ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit 3458 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa 3459 mov.l %d0,FP_SCR0_EX(%a6) # store to temp space 3460 mov.l &11,%d0 # load shift amt 3461 lsr.l %d0,%d1 3462 or.l %d1,FP_SCR0_EX(%a6) # create dbl hi 3463 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa 3464 andi.l &0x000007ff,%d1 3465 ror.l %d0,%d1 3466 mov.l %d1,FP_SCR0_HI(%a6) # store to temp space 3467 mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa 3468 lsr.l %d0,%d1 3469 or.l %d1,FP_SCR0_HI(%a6) # create dbl lo 3470 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 3471 mov.l EXC_EA(%a6),%a1 # pass: dst addr 3472 movq.l &0x8,%d0 # pass: size of 8 bytes 3473 bsr.l _dmem_write # write the default result 3474 3475 tst.l %d1 # did dstore fail? 3476 bne.l facc_out_d # yes 3477 3478 bra.w fsnan_exit 3479 3480# for extended precision, if the addressing mode is pre-decrement or 3481# post-increment, then the address register did not get updated. 3482# in addition, for pre-decrement, the stacked <ea> is incorrect. 3483fsnan_out_x: 3484 clr.b SPCOND_FLG(%a6) # clear special case flag 3485 3486 mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6) 3487 clr.w 2+FP_SCR0(%a6) 3488 mov.l FP_SRC_HI(%a6),%d0 3489 bset &30,%d0 3490 mov.l %d0,FP_SCR0_HI(%a6) 3491 mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6) 3492 3493 btst &0x5,EXC_SR(%a6) # supervisor mode exception? 3494 bne.b fsnan_out_x_s # yes 3495 3496 mov.l %usp,%a0 # fetch user stack pointer 3497 mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea() 3498 mov.l (%a6),EXC_A6(%a6) 3499 3500 bsr.l _calc_ea_fout # find the correct ea,update An 3501 mov.l %a0,%a1 3502 mov.l %a0,EXC_EA(%a6) # stack correct <ea> 3503 3504 mov.l EXC_A7(%a6),%a0 3505 mov.l %a0,%usp # restore user stack pointer 3506 mov.l EXC_A6(%a6),(%a6) 3507 3508fsnan_out_x_save: 3509 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 3510 movq.l &0xc,%d0 # pass: size of extended 3511 bsr.l _dmem_write # write the default result 3512 3513 tst.l %d1 # did dstore fail? 3514 bne.l facc_out_x # yes 3515 3516 bra.w fsnan_exit 3517 3518fsnan_out_x_s: 3519 mov.l (%a6),EXC_A6(%a6) 3520 3521 bsr.l _calc_ea_fout # find the correct ea,update An 3522 mov.l %a0,%a1 3523 mov.l %a0,EXC_EA(%a6) # stack correct <ea> 3524 3525 mov.l EXC_A6(%a6),(%a6) 3526 3527 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)? 3528 bne.b fsnan_out_x_save # no 3529 3530# the operation was "fmove.x SNAN,-(a7)" from supervisor mode. 3531 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3532 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3533 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3534 3535 frestore FP_SRC(%a6) 3536 3537 mov.l EXC_A6(%a6),%a6 # restore frame pointer 3538 3539 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp) 3540 mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp) 3541 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp) 3542 3543 mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp) 3544 mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp) 3545 mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp) 3546 3547 add.l &LOCAL_SIZE-0x8,%sp 3548 3549 bra.l _real_snan 3550 3551######################################################################### 3552# XDEF **************************************************************** # 3553# _fpsp_inex(): 060FPSP entry point for FP Inexact exception. # 3554# # 3555# This handler should be the first code executed upon taking the # 3556# FP Inexact exception in an operating system. # 3557# # 3558# XREF **************************************************************** # 3559# _imem_read_long() - read instruction longword # 3560# fix_skewed_ops() - adjust src operand in fsave frame # 3561# set_tag_x() - determine optype of src/dst operands # 3562# store_fpreg() - store opclass 0 or 2 result to FP regfile # 3563# unnorm_fix() - change UNNORM operands to NORM or ZERO # 3564# load_fpn2() - load dst operand from FP regfile # 3565# smovcr() - emulate an "fmovcr" instruction # 3566# fout() - emulate an opclass 3 instruction # 3567# tbl_unsupp - add of table of emulation routines for opclass 0,2 # 3568# _real_inex() - "callout" to operating system inexact handler # 3569# # 3570# INPUT *************************************************************** # 3571# - The system stack contains the FP Inexact exception frame # 3572# - The fsave frame contains the source operand # 3573# # 3574# OUTPUT ************************************************************** # 3575# - The system stack is unchanged # 3576# - The fsave frame contains the adjusted src op for opclass 0,2 # 3577# # 3578# ALGORITHM *********************************************************** # 3579# In a system where the FP Inexact exception is enabled, the goal # 3580# is to get to the handler specified at _real_inex(). But, on the 060, # 3581# for opclass zero and two instruction taking this exception, the # 3582# hardware doesn't store the correct result to the destination FP # 3583# register as did the '040 and '881/2. This handler must emulate the # 3584# instruction in order to get this value and then store it to the # 3585# correct register before calling _real_inex(). # 3586# For opclass 3 instructions, the 060 doesn't store the default # 3587# inexact result out to memory or data register file as it should. # 3588# This code must emulate the move out by calling fout() before finally # 3589# exiting through _real_inex(). # 3590# # 3591######################################################################### 3592 3593 global _fpsp_inex 3594_fpsp_inex: 3595 3596 link.w %a6,&-LOCAL_SIZE # init stack frame 3597 3598 fsave FP_SRC(%a6) # grab the "busy" frame 3599 3600 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3601 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3602 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3603 3604# the FPIAR holds the "current PC" of the faulting instruction 3605 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3606 3607 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3608 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3609 bsr.l _imem_read_long # fetch the instruction words 3610 mov.l %d0,EXC_OPWORD(%a6) 3611 3612############################################################################## 3613 3614 btst &13,%d0 # is instr an fmove out? 3615 bne.w finex_out # fmove out 3616 3617 3618# the hardware, for "fabs" and "fneg" w/ a long source format, puts the 3619# longword integer directly into the upper longword of the mantissa along 3620# w/ an exponent value of 0x401e. we convert this to extended precision here. 3621 bfextu %d0{&19:&3},%d0 # fetch instr size 3622 bne.b finex_cont # instr size is not long 3623 cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e? 3624 bne.b finex_cont # no 3625 fmov.l &0x0,%fpcr 3626 fmov.l FP_SRC_HI(%a6),%fp0 # load integer src 3627 fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision 3628 mov.w &0xe001,0x2+FP_SRC(%a6) 3629 3630finex_cont: 3631 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3632 bsr.l fix_skewed_ops # fix src op 3633 3634# Here, we zero the ccode and exception byte field since we're going to 3635# emulate the whole instruction. Notice, though, that we don't kill the 3636# INEX1 bit. This is because a packed op has long since been converted 3637# to extended before arriving here. Therefore, we need to retain the 3638# INEX1 bit from when the operand was first converted. 3639 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field 3640 3641 fmov.l &0x0,%fpcr # zero current control regs 3642 fmov.l &0x0,%fpsr 3643 3644 bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg 3645 cmpi.b %d1,&0x17 # is op an fmovecr? 3646 beq.w finex_fmovcr # yes 3647 3648 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3649 bsr.l set_tag_x # tag the operand type 3650 mov.b %d0,STAG(%a6) # maybe NORM,DENORM 3651 3652# bits four and five of the fp extension word separate the monadic and dyadic 3653# operations that can pass through fpsp_inex(). remember that fcmp and ftst 3654# will never take this exception, but fsincos will. 3655 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic? 3656 beq.b finex_extract # monadic 3657 3658 btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos? 3659 bne.b finex_extract # yes 3660 3661 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg 3662 bsr.l load_fpn2 # load dst into FP_DST 3663 3664 lea FP_DST(%a6),%a0 # pass: ptr to dst op 3665 bsr.l set_tag_x # tag the operand type 3666 cmpi.b %d0,&UNNORM # is operand an UNNORM? 3667 bne.b finex_op2_done # no 3668 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO 3669finex_op2_done: 3670 mov.b %d0,DTAG(%a6) # save dst optype tag 3671 3672finex_extract: 3673 clr.l %d0 3674 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode 3675 3676 mov.b 1+EXC_CMDREG(%a6),%d1 3677 andi.w &0x007f,%d1 # extract extension 3678 3679 lea FP_SRC(%a6),%a0 3680 lea FP_DST(%a6),%a1 3681 3682 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr 3683 jsr (tbl_unsupp.l,%pc,%d1.l*1) 3684 3685# the operation has been emulated. the result is in fp0. 3686finex_save: 3687 bfextu EXC_CMDREG(%a6){&6:&3},%d0 3688 bsr.l store_fpreg 3689 3690finex_exit: 3691 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3692 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3693 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3694 3695 frestore FP_SRC(%a6) 3696 3697 unlk %a6 3698 bra.l _real_inex 3699 3700finex_fmovcr: 3701 clr.l %d0 3702 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode 3703 mov.b 1+EXC_CMDREG(%a6),%d1 3704 andi.l &0x0000007f,%d1 # pass rom offset 3705 bsr.l smovcr 3706 bra.b finex_save 3707 3708######################################################################## 3709 3710# 3711# the hardware does not save the default result to memory on enabled 3712# inexact exceptions. we do this here before passing control to 3713# the user inexact handler. 3714# 3715# byte, word, and long destination format operations can pass 3716# through here. so can double and single precision. 3717# although packed opclass three operations can take inexact 3718# exceptions, they won't pass through here since they are caught 3719# first by the unsupported data format exception handler. that handler 3720# sends them directly to _real_inex() if necessary. 3721# 3722finex_out: 3723 3724 mov.b &NORM,STAG(%a6) # src is a NORM 3725 3726 clr.l %d0 3727 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode 3728 3729 andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field 3730 3731 lea FP_SRC(%a6),%a0 # pass ptr to src operand 3732 3733 bsr.l fout # store the default result 3734 3735 bra.b finex_exit 3736 3737######################################################################### 3738# XDEF **************************************************************** # 3739# _fpsp_dz(): 060FPSP entry point for FP DZ exception. # 3740# # 3741# This handler should be the first code executed upon taking # 3742# the FP DZ exception in an operating system. # 3743# # 3744# XREF **************************************************************** # 3745# _imem_read_long() - read instruction longword from memory # 3746# fix_skewed_ops() - adjust fsave operand # 3747# _real_dz() - "callout" exit point from FP DZ handler # 3748# # 3749# INPUT *************************************************************** # 3750# - The system stack contains the FP DZ exception stack. # 3751# - The fsave frame contains the source operand. # 3752# # 3753# OUTPUT ************************************************************** # 3754# - The system stack contains the FP DZ exception stack. # 3755# - The fsave frame contains the adjusted source operand. # 3756# # 3757# ALGORITHM *********************************************************** # 3758# In a system where the DZ exception is enabled, the goal is to # 3759# get to the handler specified at _real_dz(). But, on the 060, when the # 3760# exception is taken, the input operand in the fsave state frame may # 3761# be incorrect for some cases and need to be adjusted. So, this package # 3762# adjusts the operand using fix_skewed_ops() and then branches to # 3763# _real_dz(). # 3764# # 3765######################################################################### 3766 3767 global _fpsp_dz 3768_fpsp_dz: 3769 3770 link.w %a6,&-LOCAL_SIZE # init stack frame 3771 3772 fsave FP_SRC(%a6) # grab the "busy" frame 3773 3774 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 3775 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs 3776 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack 3777 3778# the FPIAR holds the "current PC" of the faulting instruction 3779 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6) 3780 3781 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 3782 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 3783 bsr.l _imem_read_long # fetch the instruction words 3784 mov.l %d0,EXC_OPWORD(%a6) 3785 3786############################################################################## 3787 3788 3789# here, we simply see if the operand in the fsave frame needs to be "unskewed". 3790# this would be the case for opclass two operations with a source zero 3791# in the sgl or dbl format. 3792 lea FP_SRC(%a6),%a0 # pass: ptr to src op 3793 bsr.l fix_skewed_ops # fix src op 3794 3795fdz_exit: 3796 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 3797 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 3798 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 3799 3800 frestore FP_SRC(%a6) 3801 3802 unlk %a6 3803 bra.l _real_dz 3804 3805######################################################################### 3806# XDEF **************************************************************** # 3807# _fpsp_fline(): 060FPSP entry point for "Line F emulator" # 3808# exception when the "reduced" version of the # 3809# FPSP is implemented that does not emulate # 3810# FP unimplemented instructions. # 3811# # 3812# This handler should be the first code executed upon taking a # 3813# "Line F Emulator" exception in an operating system integrating # 3814# the reduced version of 060FPSP. # 3815# # 3816# XREF **************************************************************** # 3817# _real_fpu_disabled() - Handle "FPU disabled" exceptions # 3818# _real_fline() - Handle all other cases (treated equally) # 3819# # 3820# INPUT *************************************************************** # 3821# - The system stack contains a "Line F Emulator" exception # 3822# stack frame. # 3823# # 3824# OUTPUT ************************************************************** # 3825# - The system stack is unchanged. # 3826# # 3827# ALGORITHM *********************************************************** # 3828# When a "Line F Emulator" exception occurs in a system where # 3829# "FPU Unimplemented" instructions will not be emulated, the exception # 3830# can occur because then FPU is disabled or the instruction is to be # 3831# classifed as "Line F". This module determines which case exists and # 3832# calls the appropriate "callout". # 3833# # 3834######################################################################### 3835 3836 global _fpsp_fline 3837_fpsp_fline: 3838 3839# check to see if the FPU is disabled. if so, jump to the OS entry 3840# point for that condition. 3841 cmpi.w 0x6(%sp),&0x402c 3842 beq.l _real_fpu_disabled 3843 3844 bra.l _real_fline 3845 3846######################################################################### 3847# XDEF **************************************************************** # 3848# _dcalc_ea(): calc correct <ea> from <ea> stacked on exception # 3849# # 3850# XREF **************************************************************** # 3851# inc_areg() - increment an address register # 3852# dec_areg() - decrement an address register # 3853# # 3854# INPUT *************************************************************** # 3855# d0 = number of bytes to adjust <ea> by # 3856# # 3857# OUTPUT ************************************************************** # 3858# None # 3859# # 3860# ALGORITHM *********************************************************** # 3861# "Dummy" CALCulate Effective Address: # 3862# The stacked <ea> for FP unimplemented instructions and opclass # 3863# two packed instructions is correct with the exception of... # 3864# # 3865# 1) -(An) : The register is not updated regardless of size. # 3866# Also, for extended precision and packed, the # 3867# stacked <ea> value is 8 bytes too big # 3868# 2) (An)+ : The register is not updated. # 3869# 3) #<data> : The upper longword of the immediate operand is # 3870# stacked b,w,l and s sizes are completely stacked. # 3871# d,x, and p are not. # 3872# # 3873######################################################################### 3874 3875 global _dcalc_ea 3876_dcalc_ea: 3877 mov.l %d0, %a0 # move # bytes to %a0 3878 3879 mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word 3880 mov.l %d0, %d1 # make a copy 3881 3882 andi.w &0x38, %d0 # extract mode field 3883 andi.l &0x7, %d1 # extract reg field 3884 3885 cmpi.b %d0,&0x18 # is mode (An)+ ? 3886 beq.b dcea_pi # yes 3887 3888 cmpi.b %d0,&0x20 # is mode -(An) ? 3889 beq.b dcea_pd # yes 3890 3891 or.w %d1,%d0 # concat mode,reg 3892 cmpi.b %d0,&0x3c # is mode #<data>? 3893 3894 beq.b dcea_imm # yes 3895 3896 mov.l EXC_EA(%a6),%a0 # return <ea> 3897 rts 3898 3899# need to set immediate data flag here since we'll need to do 3900# an imem_read to fetch this later. 3901dcea_imm: 3902 mov.b &immed_flg,SPCOND_FLG(%a6) 3903 lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea> 3904 rts 3905 3906# here, the <ea> is stacked correctly. however, we must update the 3907# address register... 3908dcea_pi: 3909 mov.l %a0,%d0 # pass amt to inc by 3910 bsr.l inc_areg # inc addr register 3911 3912 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 3913 rts 3914 3915# the <ea> is stacked correctly for all but extended and packed which 3916# the <ea>s are 8 bytes too large. 3917# it would make no sense to have a pre-decrement to a7 in supervisor 3918# mode so we don't even worry about this tricky case here : ) 3919dcea_pd: 3920 mov.l %a0,%d0 # pass amt to dec by 3921 bsr.l dec_areg # dec addr register 3922 3923 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 3924 3925 cmpi.b %d0,&0xc # is opsize ext or packed? 3926 beq.b dcea_pd2 # yes 3927 rts 3928dcea_pd2: 3929 sub.l &0x8,%a0 # correct <ea> 3930 mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack 3931 rts 3932 3933######################################################################### 3934# XDEF **************************************************************** # 3935# _calc_ea_fout(): calculate correct stacked <ea> for extended # 3936# and packed data opclass 3 operations. # 3937# # 3938# XREF **************************************************************** # 3939# None # 3940# # 3941# INPUT *************************************************************** # 3942# None # 3943# # 3944# OUTPUT ************************************************************** # 3945# a0 = return correct effective address # 3946# # 3947# ALGORITHM *********************************************************** # 3948# For opclass 3 extended and packed data operations, the <ea> # 3949# stacked for the exception is incorrect for -(an) and (an)+ addressing # 3950# modes. Also, while we're at it, the index register itself must get # 3951# updated. # 3952# So, for -(an), we must subtract 8 off of the stacked <ea> value # 3953# and return that value as the correct <ea> and store that value in An. # 3954# For (an)+, the stacked <ea> is correct but we must adjust An by +12. # 3955# # 3956######################################################################### 3957 3958# This calc_ea is currently used to retrieve the correct <ea> 3959# for fmove outs of type extended and packed. 3960 global _calc_ea_fout 3961_calc_ea_fout: 3962 mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word 3963 mov.l %d0,%d1 # make a copy 3964 3965 andi.w &0x38,%d0 # extract mode field 3966 andi.l &0x7,%d1 # extract reg field 3967 3968 cmpi.b %d0,&0x18 # is mode (An)+ ? 3969 beq.b ceaf_pi # yes 3970 3971 cmpi.b %d0,&0x20 # is mode -(An) ? 3972 beq.w ceaf_pd # yes 3973 3974 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 3975 rts 3976 3977# (An)+ : extended and packed fmove out 3978# : stacked <ea> is correct 3979# : "An" not updated 3980ceaf_pi: 3981 mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1 3982 mov.l EXC_EA(%a6),%a0 3983 jmp (tbl_ceaf_pi.b,%pc,%d1.w*1) 3984 3985 swbeg &0x8 3986tbl_ceaf_pi: 3987 short ceaf_pi0 - tbl_ceaf_pi 3988 short ceaf_pi1 - tbl_ceaf_pi 3989 short ceaf_pi2 - tbl_ceaf_pi 3990 short ceaf_pi3 - tbl_ceaf_pi 3991 short ceaf_pi4 - tbl_ceaf_pi 3992 short ceaf_pi5 - tbl_ceaf_pi 3993 short ceaf_pi6 - tbl_ceaf_pi 3994 short ceaf_pi7 - tbl_ceaf_pi 3995 3996ceaf_pi0: 3997 addi.l &0xc,EXC_DREGS+0x8(%a6) 3998 rts 3999ceaf_pi1: 4000 addi.l &0xc,EXC_DREGS+0xc(%a6) 4001 rts 4002ceaf_pi2: 4003 add.l &0xc,%a2 4004 rts 4005ceaf_pi3: 4006 add.l &0xc,%a3 4007 rts 4008ceaf_pi4: 4009 add.l &0xc,%a4 4010 rts 4011ceaf_pi5: 4012 add.l &0xc,%a5 4013 rts 4014ceaf_pi6: 4015 addi.l &0xc,EXC_A6(%a6) 4016 rts 4017ceaf_pi7: 4018 mov.b &mia7_flg,SPCOND_FLG(%a6) 4019 addi.l &0xc,EXC_A7(%a6) 4020 rts 4021 4022# -(An) : extended and packed fmove out 4023# : stacked <ea> = actual <ea> + 8 4024# : "An" not updated 4025ceaf_pd: 4026 mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1 4027 mov.l EXC_EA(%a6),%a0 4028 sub.l &0x8,%a0 4029 sub.l &0x8,EXC_EA(%a6) 4030 jmp (tbl_ceaf_pd.b,%pc,%d1.w*1) 4031 4032 swbeg &0x8 4033tbl_ceaf_pd: 4034 short ceaf_pd0 - tbl_ceaf_pd 4035 short ceaf_pd1 - tbl_ceaf_pd 4036 short ceaf_pd2 - tbl_ceaf_pd 4037 short ceaf_pd3 - tbl_ceaf_pd 4038 short ceaf_pd4 - tbl_ceaf_pd 4039 short ceaf_pd5 - tbl_ceaf_pd 4040 short ceaf_pd6 - tbl_ceaf_pd 4041 short ceaf_pd7 - tbl_ceaf_pd 4042 4043ceaf_pd0: 4044 mov.l %a0,EXC_DREGS+0x8(%a6) 4045 rts 4046ceaf_pd1: 4047 mov.l %a0,EXC_DREGS+0xc(%a6) 4048 rts 4049ceaf_pd2: 4050 mov.l %a0,%a2 4051 rts 4052ceaf_pd3: 4053 mov.l %a0,%a3 4054 rts 4055ceaf_pd4: 4056 mov.l %a0,%a4 4057 rts 4058ceaf_pd5: 4059 mov.l %a0,%a5 4060 rts 4061ceaf_pd6: 4062 mov.l %a0,EXC_A6(%a6) 4063 rts 4064ceaf_pd7: 4065 mov.l %a0,EXC_A7(%a6) 4066 mov.b &mda7_flg,SPCOND_FLG(%a6) 4067 rts 4068 4069# 4070# This table holds the offsets of the emulation routines for each individual 4071# math operation relative to the address of this table. Included are 4072# routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because 4073# this table is for the version if the 060FPSP without transcendentals. 4074# The location within the table is determined by the extension bits of the 4075# operation longword. 4076# 4077 4078 swbeg &109 4079tbl_unsupp: 4080 long fin - tbl_unsupp # 00: fmove 4081 long fint - tbl_unsupp # 01: fint 4082 long tbl_unsupp - tbl_unsupp # 02: fsinh 4083 long fintrz - tbl_unsupp # 03: fintrz 4084 long fsqrt - tbl_unsupp # 04: fsqrt 4085 long tbl_unsupp - tbl_unsupp 4086 long tbl_unsupp - tbl_unsupp # 06: flognp1 4087 long tbl_unsupp - tbl_unsupp 4088 long tbl_unsupp - tbl_unsupp # 08: fetoxm1 4089 long tbl_unsupp - tbl_unsupp # 09: ftanh 4090 long tbl_unsupp - tbl_unsupp # 0a: fatan 4091 long tbl_unsupp - tbl_unsupp 4092 long tbl_unsupp - tbl_unsupp # 0c: fasin 4093 long tbl_unsupp - tbl_unsupp # 0d: fatanh 4094 long tbl_unsupp - tbl_unsupp # 0e: fsin 4095 long tbl_unsupp - tbl_unsupp # 0f: ftan 4096 long tbl_unsupp - tbl_unsupp # 10: fetox 4097 long tbl_unsupp - tbl_unsupp # 11: ftwotox 4098 long tbl_unsupp - tbl_unsupp # 12: ftentox 4099 long tbl_unsupp - tbl_unsupp 4100 long tbl_unsupp - tbl_unsupp # 14: flogn 4101 long tbl_unsupp - tbl_unsupp # 15: flog10 4102 long tbl_unsupp - tbl_unsupp # 16: flog2 4103 long tbl_unsupp - tbl_unsupp 4104 long fabs - tbl_unsupp # 18: fabs 4105 long tbl_unsupp - tbl_unsupp # 19: fcosh 4106 long fneg - tbl_unsupp # 1a: fneg 4107 long tbl_unsupp - tbl_unsupp 4108 long tbl_unsupp - tbl_unsupp # 1c: facos 4109 long tbl_unsupp - tbl_unsupp # 1d: fcos 4110 long tbl_unsupp - tbl_unsupp # 1e: fgetexp 4111 long tbl_unsupp - tbl_unsupp # 1f: fgetman 4112 long fdiv - tbl_unsupp # 20: fdiv 4113 long tbl_unsupp - tbl_unsupp # 21: fmod 4114 long fadd - tbl_unsupp # 22: fadd 4115 long fmul - tbl_unsupp # 23: fmul 4116 long fsgldiv - tbl_unsupp # 24: fsgldiv 4117 long tbl_unsupp - tbl_unsupp # 25: frem 4118 long tbl_unsupp - tbl_unsupp # 26: fscale 4119 long fsglmul - tbl_unsupp # 27: fsglmul 4120 long fsub - tbl_unsupp # 28: fsub 4121 long tbl_unsupp - tbl_unsupp 4122 long tbl_unsupp - tbl_unsupp 4123 long tbl_unsupp - tbl_unsupp 4124 long tbl_unsupp - tbl_unsupp 4125 long tbl_unsupp - tbl_unsupp 4126 long tbl_unsupp - tbl_unsupp 4127 long tbl_unsupp - tbl_unsupp 4128 long tbl_unsupp - tbl_unsupp # 30: fsincos 4129 long tbl_unsupp - tbl_unsupp # 31: fsincos 4130 long tbl_unsupp - tbl_unsupp # 32: fsincos 4131 long tbl_unsupp - tbl_unsupp # 33: fsincos 4132 long tbl_unsupp - tbl_unsupp # 34: fsincos 4133 long tbl_unsupp - tbl_unsupp # 35: fsincos 4134 long tbl_unsupp - tbl_unsupp # 36: fsincos 4135 long tbl_unsupp - tbl_unsupp # 37: fsincos 4136 long fcmp - tbl_unsupp # 38: fcmp 4137 long tbl_unsupp - tbl_unsupp 4138 long ftst - tbl_unsupp # 3a: ftst 4139 long tbl_unsupp - tbl_unsupp 4140 long tbl_unsupp - tbl_unsupp 4141 long tbl_unsupp - tbl_unsupp 4142 long tbl_unsupp - tbl_unsupp 4143 long tbl_unsupp - tbl_unsupp 4144 long fsin - tbl_unsupp # 40: fsmove 4145 long fssqrt - tbl_unsupp # 41: fssqrt 4146 long tbl_unsupp - tbl_unsupp 4147 long tbl_unsupp - tbl_unsupp 4148 long fdin - tbl_unsupp # 44: fdmove 4149 long fdsqrt - tbl_unsupp # 45: fdsqrt 4150 long tbl_unsupp - tbl_unsupp 4151 long tbl_unsupp - tbl_unsupp 4152 long tbl_unsupp - tbl_unsupp 4153 long tbl_unsupp - tbl_unsupp 4154 long tbl_unsupp - tbl_unsupp 4155 long tbl_unsupp - tbl_unsupp 4156 long tbl_unsupp - tbl_unsupp 4157 long tbl_unsupp - tbl_unsupp 4158 long tbl_unsupp - tbl_unsupp 4159 long tbl_unsupp - tbl_unsupp 4160 long tbl_unsupp - tbl_unsupp 4161 long tbl_unsupp - tbl_unsupp 4162 long tbl_unsupp - tbl_unsupp 4163 long tbl_unsupp - tbl_unsupp 4164 long tbl_unsupp - tbl_unsupp 4165 long tbl_unsupp - tbl_unsupp 4166 long tbl_unsupp - tbl_unsupp 4167 long tbl_unsupp - tbl_unsupp 4168 long fsabs - tbl_unsupp # 58: fsabs 4169 long tbl_unsupp - tbl_unsupp 4170 long fsneg - tbl_unsupp # 5a: fsneg 4171 long tbl_unsupp - tbl_unsupp 4172 long fdabs - tbl_unsupp # 5c: fdabs 4173 long tbl_unsupp - tbl_unsupp 4174 long fdneg - tbl_unsupp # 5e: fdneg 4175 long tbl_unsupp - tbl_unsupp 4176 long fsdiv - tbl_unsupp # 60: fsdiv 4177 long tbl_unsupp - tbl_unsupp 4178 long fsadd - tbl_unsupp # 62: fsadd 4179 long fsmul - tbl_unsupp # 63: fsmul 4180 long fddiv - tbl_unsupp # 64: fddiv 4181 long tbl_unsupp - tbl_unsupp 4182 long fdadd - tbl_unsupp # 66: fdadd 4183 long fdmul - tbl_unsupp # 67: fdmul 4184 long fssub - tbl_unsupp # 68: fssub 4185 long tbl_unsupp - tbl_unsupp 4186 long tbl_unsupp - tbl_unsupp 4187 long tbl_unsupp - tbl_unsupp 4188 long fdsub - tbl_unsupp # 6c: fdsub 4189 4190################################################# 4191# Add this here so non-fp modules can compile. 4192# (smovcr is called from fpsp_inex.) 4193 global smovcr 4194smovcr: 4195 bra.b smovcr 4196 4197######################################################################### 4198# XDEF **************************************************************** # 4199# fmovm_dynamic(): emulate "fmovm" dynamic instruction # 4200# # 4201# XREF **************************************************************** # 4202# fetch_dreg() - fetch data register # 4203# {i,d,}mem_read() - fetch data from memory # 4204# _mem_write() - write data to memory # 4205# iea_iacc() - instruction memory access error occurred # 4206# iea_dacc() - data memory access error occurred # 4207# restore() - restore An index regs if access error occurred # 4208# # 4209# INPUT *************************************************************** # 4210# None # 4211# # 4212# OUTPUT ************************************************************** # 4213# If instr is "fmovm Dn,-(A7)" from supervisor mode, # 4214# d0 = size of dump # 4215# d1 = Dn # 4216# Else if instruction access error, # 4217# d0 = FSLW # 4218# Else if data access error, # 4219# d0 = FSLW # 4220# a0 = address of fault # 4221# Else # 4222# none. # 4223# # 4224# ALGORITHM *********************************************************** # 4225# The effective address must be calculated since this is entered # 4226# from an "Unimplemented Effective Address" exception handler. So, we # 4227# have our own fcalc_ea() routine here. If an access error is flagged # 4228# by a _{i,d,}mem_read() call, we must exit through the special # 4229# handler. # 4230# The data register is determined and its value loaded to get the # 4231# string of FP registers affected. This value is used as an index into # 4232# a lookup table such that we can determine the number of bytes # 4233# involved. # 4234# If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used # 4235# to read in all FP values. Again, _mem_read() may fail and require a # 4236# special exit. # 4237# If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used # 4238# to write all FP values. _mem_write() may also fail. # 4239# If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, # 4240# then we return the size of the dump and the string to the caller # 4241# so that the move can occur outside of this routine. This special # 4242# case is required so that moves to the system stack are handled # 4243# correctly. # 4244# # 4245# DYNAMIC: # 4246# fmovm.x dn, <ea> # 4247# fmovm.x <ea>, dn # 4248# # 4249# <WORD 1> <WORD2> # 4250# 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 # 4251# # 4252# & = (0): predecrement addressing mode # 4253# (1): postincrement or control addressing mode # 4254# @ = (0): move listed regs from memory to the FPU # 4255# (1): move listed regs from the FPU to memory # 4256# $$$ : index of data register holding reg select mask # 4257# # 4258# NOTES: # 4259# If the data register holds a zero, then the # 4260# instruction is a nop. # 4261# # 4262######################################################################### 4263 4264 global fmovm_dynamic 4265fmovm_dynamic: 4266 4267# extract the data register in which the bit string resides... 4268 mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword 4269 andi.w &0x70,%d1 # extract reg bits 4270 lsr.b &0x4,%d1 # shift into lo bits 4271 4272# fetch the bit string into d0... 4273 bsr.l fetch_dreg # fetch reg string 4274 4275 andi.l &0x000000ff,%d0 # keep only lo byte 4276 4277 mov.l %d0,-(%sp) # save strg 4278 mov.b (tbl_fmovm_size.w,%pc,%d0),%d0 4279 mov.l %d0,-(%sp) # save size 4280 bsr.l fmovm_calc_ea # calculate <ea> 4281 mov.l (%sp)+,%d0 # restore size 4282 mov.l (%sp)+,%d1 # restore strg 4283 4284# if the bit string is a zero, then the operation is a no-op 4285# but, make sure that we've calculated ea and advanced the opword pointer 4286 beq.w fmovm_data_done 4287 4288# separate move ins from move outs... 4289 btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out? 4290 beq.w fmovm_data_in # it's a move out 4291 4292############# 4293# MOVE OUT: # 4294############# 4295fmovm_data_out: 4296 btst &0x4,EXC_EXTWORD(%a6) # control or predecrement? 4297 bne.w fmovm_out_ctrl # control 4298 4299############################ 4300fmovm_out_predec: 4301# for predecrement mode, the bit string is the opposite of both control 4302# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0) 4303# here, we convert it to be just like the others... 4304 mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1 4305 4306 btst &0x5,EXC_SR(%a6) # user or supervisor mode? 4307 beq.b fmovm_out_ctrl # user 4308 4309fmovm_out_predec_s: 4310 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)? 4311 bne.b fmovm_out_ctrl 4312 4313# the operation was unfortunately an: fmovm.x dn,-(sp) 4314# called from supervisor mode. 4315# we're also passing "size" and "strg" back to the calling routine 4316 rts 4317 4318############################ 4319fmovm_out_ctrl: 4320 mov.l %a0,%a1 # move <ea> to a1 4321 4322 sub.l %d0,%sp # subtract size of dump 4323 lea (%sp),%a0 4324 4325 tst.b %d1 # should FP0 be moved? 4326 bpl.b fmovm_out_ctrl_fp1 # no 4327 4328 mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes 4329 mov.l 0x4+EXC_FP0(%a6),(%a0)+ 4330 mov.l 0x8+EXC_FP0(%a6),(%a0)+ 4331 4332fmovm_out_ctrl_fp1: 4333 lsl.b &0x1,%d1 # should FP1 be moved? 4334 bpl.b fmovm_out_ctrl_fp2 # no 4335 4336 mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes 4337 mov.l 0x4+EXC_FP1(%a6),(%a0)+ 4338 mov.l 0x8+EXC_FP1(%a6),(%a0)+ 4339 4340fmovm_out_ctrl_fp2: 4341 lsl.b &0x1,%d1 # should FP2 be moved? 4342 bpl.b fmovm_out_ctrl_fp3 # no 4343 4344 fmovm.x &0x20,(%a0) # yes 4345 add.l &0xc,%a0 4346 4347fmovm_out_ctrl_fp3: 4348 lsl.b &0x1,%d1 # should FP3 be moved? 4349 bpl.b fmovm_out_ctrl_fp4 # no 4350 4351 fmovm.x &0x10,(%a0) # yes 4352 add.l &0xc,%a0 4353 4354fmovm_out_ctrl_fp4: 4355 lsl.b &0x1,%d1 # should FP4 be moved? 4356 bpl.b fmovm_out_ctrl_fp5 # no 4357 4358 fmovm.x &0x08,(%a0) # yes 4359 add.l &0xc,%a0 4360 4361fmovm_out_ctrl_fp5: 4362 lsl.b &0x1,%d1 # should FP5 be moved? 4363 bpl.b fmovm_out_ctrl_fp6 # no 4364 4365 fmovm.x &0x04,(%a0) # yes 4366 add.l &0xc,%a0 4367 4368fmovm_out_ctrl_fp6: 4369 lsl.b &0x1,%d1 # should FP6 be moved? 4370 bpl.b fmovm_out_ctrl_fp7 # no 4371 4372 fmovm.x &0x02,(%a0) # yes 4373 add.l &0xc,%a0 4374 4375fmovm_out_ctrl_fp7: 4376 lsl.b &0x1,%d1 # should FP7 be moved? 4377 bpl.b fmovm_out_ctrl_done # no 4378 4379 fmovm.x &0x01,(%a0) # yes 4380 add.l &0xc,%a0 4381 4382fmovm_out_ctrl_done: 4383 mov.l %a1,L_SCR1(%a6) 4384 4385 lea (%sp),%a0 # pass: supervisor src 4386 mov.l %d0,-(%sp) # save size 4387 bsr.l _dmem_write # copy data to user mem 4388 4389 mov.l (%sp)+,%d0 4390 add.l %d0,%sp # clear fpreg data from stack 4391 4392 tst.l %d1 # did dstore err? 4393 bne.w fmovm_out_err # yes 4394 4395 rts 4396 4397############ 4398# MOVE IN: # 4399############ 4400fmovm_data_in: 4401 mov.l %a0,L_SCR1(%a6) 4402 4403 sub.l %d0,%sp # make room for fpregs 4404 lea (%sp),%a1 4405 4406 mov.l %d1,-(%sp) # save bit string for later 4407 mov.l %d0,-(%sp) # save # of bytes 4408 4409 bsr.l _dmem_read # copy data from user mem 4410 4411 mov.l (%sp)+,%d0 # retrieve # of bytes 4412 4413 tst.l %d1 # did dfetch fail? 4414 bne.w fmovm_in_err # yes 4415 4416 mov.l (%sp)+,%d1 # load bit string 4417 4418 lea (%sp),%a0 # addr of stack 4419 4420 tst.b %d1 # should FP0 be moved? 4421 bpl.b fmovm_data_in_fp1 # no 4422 4423 mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes 4424 mov.l (%a0)+,0x4+EXC_FP0(%a6) 4425 mov.l (%a0)+,0x8+EXC_FP0(%a6) 4426 4427fmovm_data_in_fp1: 4428 lsl.b &0x1,%d1 # should FP1 be moved? 4429 bpl.b fmovm_data_in_fp2 # no 4430 4431 mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes 4432 mov.l (%a0)+,0x4+EXC_FP1(%a6) 4433 mov.l (%a0)+,0x8+EXC_FP1(%a6) 4434 4435fmovm_data_in_fp2: 4436 lsl.b &0x1,%d1 # should FP2 be moved? 4437 bpl.b fmovm_data_in_fp3 # no 4438 4439 fmovm.x (%a0)+,&0x20 # yes 4440 4441fmovm_data_in_fp3: 4442 lsl.b &0x1,%d1 # should FP3 be moved? 4443 bpl.b fmovm_data_in_fp4 # no 4444 4445 fmovm.x (%a0)+,&0x10 # yes 4446 4447fmovm_data_in_fp4: 4448 lsl.b &0x1,%d1 # should FP4 be moved? 4449 bpl.b fmovm_data_in_fp5 # no 4450 4451 fmovm.x (%a0)+,&0x08 # yes 4452 4453fmovm_data_in_fp5: 4454 lsl.b &0x1,%d1 # should FP5 be moved? 4455 bpl.b fmovm_data_in_fp6 # no 4456 4457 fmovm.x (%a0)+,&0x04 # yes 4458 4459fmovm_data_in_fp6: 4460 lsl.b &0x1,%d1 # should FP6 be moved? 4461 bpl.b fmovm_data_in_fp7 # no 4462 4463 fmovm.x (%a0)+,&0x02 # yes 4464 4465fmovm_data_in_fp7: 4466 lsl.b &0x1,%d1 # should FP7 be moved? 4467 bpl.b fmovm_data_in_done # no 4468 4469 fmovm.x (%a0)+,&0x01 # yes 4470 4471fmovm_data_in_done: 4472 add.l %d0,%sp # remove fpregs from stack 4473 rts 4474 4475##################################### 4476 4477fmovm_data_done: 4478 rts 4479 4480############################################################################## 4481 4482# 4483# table indexed by the operation's bit string that gives the number 4484# of bytes that will be moved. 4485# 4486# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg) 4487# 4488tbl_fmovm_size: 4489 byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24 4490 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4491 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4492 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4493 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4494 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4495 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4496 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4497 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4498 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4499 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4500 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4501 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4502 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4503 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4504 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4505 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30 4506 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4507 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4508 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4509 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4510 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4511 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4512 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4513 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c 4514 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4515 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4516 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4517 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48 4518 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4519 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54 4520 byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60 4521 4522# 4523# table to convert a pre-decrement bit string into a post-increment 4524# or control bit string. 4525# ex: 0x00 ==> 0x00 4526# 0x01 ==> 0x80 4527# 0x02 ==> 0x40 4528# . 4529# . 4530# 0xfd ==> 0xbf 4531# 0xfe ==> 0x7f 4532# 0xff ==> 0xff 4533# 4534tbl_fmovm_convert: 4535 byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0 4536 byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0 4537 byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8 4538 byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8 4539 byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4 4540 byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4 4541 byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec 4542 byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc 4543 byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2 4544 byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2 4545 byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea 4546 byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa 4547 byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6 4548 byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6 4549 byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee 4550 byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe 4551 byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1 4552 byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1 4553 byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9 4554 byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9 4555 byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5 4556 byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5 4557 byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed 4558 byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd 4559 byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3 4560 byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3 4561 byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb 4562 byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb 4563 byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7 4564 byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7 4565 byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef 4566 byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff 4567 4568 global fmovm_calc_ea 4569############################################### 4570# _fmovm_calc_ea: calculate effective address # 4571############################################### 4572fmovm_calc_ea: 4573 mov.l %d0,%a0 # move # bytes to a0 4574 4575# currently, MODE and REG are taken from the EXC_OPWORD. this could be 4576# easily changed if they were inputs passed in registers. 4577 mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word 4578 mov.w %d0,%d1 # make a copy 4579 4580 andi.w &0x3f,%d0 # extract mode field 4581 andi.l &0x7,%d1 # extract reg field 4582 4583# jump to the corresponding function for each {MODE,REG} pair. 4584 mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance 4585 jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode 4586 4587 swbeg &64 4588tbl_fea_mode: 4589 short tbl_fea_mode - tbl_fea_mode 4590 short tbl_fea_mode - tbl_fea_mode 4591 short tbl_fea_mode - tbl_fea_mode 4592 short tbl_fea_mode - tbl_fea_mode 4593 short tbl_fea_mode - tbl_fea_mode 4594 short tbl_fea_mode - tbl_fea_mode 4595 short tbl_fea_mode - tbl_fea_mode 4596 short tbl_fea_mode - tbl_fea_mode 4597 4598 short tbl_fea_mode - tbl_fea_mode 4599 short tbl_fea_mode - tbl_fea_mode 4600 short tbl_fea_mode - tbl_fea_mode 4601 short tbl_fea_mode - tbl_fea_mode 4602 short tbl_fea_mode - tbl_fea_mode 4603 short tbl_fea_mode - tbl_fea_mode 4604 short tbl_fea_mode - tbl_fea_mode 4605 short tbl_fea_mode - tbl_fea_mode 4606 4607 short faddr_ind_a0 - tbl_fea_mode 4608 short faddr_ind_a1 - tbl_fea_mode 4609 short faddr_ind_a2 - tbl_fea_mode 4610 short faddr_ind_a3 - tbl_fea_mode 4611 short faddr_ind_a4 - tbl_fea_mode 4612 short faddr_ind_a5 - tbl_fea_mode 4613 short faddr_ind_a6 - tbl_fea_mode 4614 short faddr_ind_a7 - tbl_fea_mode 4615 4616 short faddr_ind_p_a0 - tbl_fea_mode 4617 short faddr_ind_p_a1 - tbl_fea_mode 4618 short faddr_ind_p_a2 - tbl_fea_mode 4619 short faddr_ind_p_a3 - tbl_fea_mode 4620 short faddr_ind_p_a4 - tbl_fea_mode 4621 short faddr_ind_p_a5 - tbl_fea_mode 4622 short faddr_ind_p_a6 - tbl_fea_mode 4623 short faddr_ind_p_a7 - tbl_fea_mode 4624 4625 short faddr_ind_m_a0 - tbl_fea_mode 4626 short faddr_ind_m_a1 - tbl_fea_mode 4627 short faddr_ind_m_a2 - tbl_fea_mode 4628 short faddr_ind_m_a3 - tbl_fea_mode 4629 short faddr_ind_m_a4 - tbl_fea_mode 4630 short faddr_ind_m_a5 - tbl_fea_mode 4631 short faddr_ind_m_a6 - tbl_fea_mode 4632 short faddr_ind_m_a7 - tbl_fea_mode 4633 4634 short faddr_ind_disp_a0 - tbl_fea_mode 4635 short faddr_ind_disp_a1 - tbl_fea_mode 4636 short faddr_ind_disp_a2 - tbl_fea_mode 4637 short faddr_ind_disp_a3 - tbl_fea_mode 4638 short faddr_ind_disp_a4 - tbl_fea_mode 4639 short faddr_ind_disp_a5 - tbl_fea_mode 4640 short faddr_ind_disp_a6 - tbl_fea_mode 4641 short faddr_ind_disp_a7 - tbl_fea_mode 4642 4643 short faddr_ind_ext - tbl_fea_mode 4644 short faddr_ind_ext - tbl_fea_mode 4645 short faddr_ind_ext - tbl_fea_mode 4646 short faddr_ind_ext - tbl_fea_mode 4647 short faddr_ind_ext - tbl_fea_mode 4648 short faddr_ind_ext - tbl_fea_mode 4649 short faddr_ind_ext - tbl_fea_mode 4650 short faddr_ind_ext - tbl_fea_mode 4651 4652 short fabs_short - tbl_fea_mode 4653 short fabs_long - tbl_fea_mode 4654 short fpc_ind - tbl_fea_mode 4655 short fpc_ind_ext - tbl_fea_mode 4656 short tbl_fea_mode - tbl_fea_mode 4657 short tbl_fea_mode - tbl_fea_mode 4658 short tbl_fea_mode - tbl_fea_mode 4659 short tbl_fea_mode - tbl_fea_mode 4660 4661################################### 4662# Address register indirect: (An) # 4663################################### 4664faddr_ind_a0: 4665 mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0 4666 rts 4667 4668faddr_ind_a1: 4669 mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1 4670 rts 4671 4672faddr_ind_a2: 4673 mov.l %a2,%a0 # Get current a2 4674 rts 4675 4676faddr_ind_a3: 4677 mov.l %a3,%a0 # Get current a3 4678 rts 4679 4680faddr_ind_a4: 4681 mov.l %a4,%a0 # Get current a4 4682 rts 4683 4684faddr_ind_a5: 4685 mov.l %a5,%a0 # Get current a5 4686 rts 4687 4688faddr_ind_a6: 4689 mov.l (%a6),%a0 # Get current a6 4690 rts 4691 4692faddr_ind_a7: 4693 mov.l EXC_A7(%a6),%a0 # Get current a7 4694 rts 4695 4696##################################################### 4697# Address register indirect w/ postincrement: (An)+ # 4698##################################################### 4699faddr_ind_p_a0: 4700 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0 4701 mov.l %d0,%d1 4702 add.l %a0,%d1 # Increment 4703 mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value 4704 mov.l %d0,%a0 4705 rts 4706 4707faddr_ind_p_a1: 4708 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1 4709 mov.l %d0,%d1 4710 add.l %a0,%d1 # Increment 4711 mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value 4712 mov.l %d0,%a0 4713 rts 4714 4715faddr_ind_p_a2: 4716 mov.l %a2,%d0 # Get current a2 4717 mov.l %d0,%d1 4718 add.l %a0,%d1 # Increment 4719 mov.l %d1,%a2 # Save incr value 4720 mov.l %d0,%a0 4721 rts 4722 4723faddr_ind_p_a3: 4724 mov.l %a3,%d0 # Get current a3 4725 mov.l %d0,%d1 4726 add.l %a0,%d1 # Increment 4727 mov.l %d1,%a3 # Save incr value 4728 mov.l %d0,%a0 4729 rts 4730 4731faddr_ind_p_a4: 4732 mov.l %a4,%d0 # Get current a4 4733 mov.l %d0,%d1 4734 add.l %a0,%d1 # Increment 4735 mov.l %d1,%a4 # Save incr value 4736 mov.l %d0,%a0 4737 rts 4738 4739faddr_ind_p_a5: 4740 mov.l %a5,%d0 # Get current a5 4741 mov.l %d0,%d1 4742 add.l %a0,%d1 # Increment 4743 mov.l %d1,%a5 # Save incr value 4744 mov.l %d0,%a0 4745 rts 4746 4747faddr_ind_p_a6: 4748 mov.l (%a6),%d0 # Get current a6 4749 mov.l %d0,%d1 4750 add.l %a0,%d1 # Increment 4751 mov.l %d1,(%a6) # Save incr value 4752 mov.l %d0,%a0 4753 rts 4754 4755faddr_ind_p_a7: 4756 mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag 4757 4758 mov.l EXC_A7(%a6),%d0 # Get current a7 4759 mov.l %d0,%d1 4760 add.l %a0,%d1 # Increment 4761 mov.l %d1,EXC_A7(%a6) # Save incr value 4762 mov.l %d0,%a0 4763 rts 4764 4765#################################################### 4766# Address register indirect w/ predecrement: -(An) # 4767#################################################### 4768faddr_ind_m_a0: 4769 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0 4770 sub.l %a0,%d0 # Decrement 4771 mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value 4772 mov.l %d0,%a0 4773 rts 4774 4775faddr_ind_m_a1: 4776 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1 4777 sub.l %a0,%d0 # Decrement 4778 mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value 4779 mov.l %d0,%a0 4780 rts 4781 4782faddr_ind_m_a2: 4783 mov.l %a2,%d0 # Get current a2 4784 sub.l %a0,%d0 # Decrement 4785 mov.l %d0,%a2 # Save decr value 4786 mov.l %d0,%a0 4787 rts 4788 4789faddr_ind_m_a3: 4790 mov.l %a3,%d0 # Get current a3 4791 sub.l %a0,%d0 # Decrement 4792 mov.l %d0,%a3 # Save decr value 4793 mov.l %d0,%a0 4794 rts 4795 4796faddr_ind_m_a4: 4797 mov.l %a4,%d0 # Get current a4 4798 sub.l %a0,%d0 # Decrement 4799 mov.l %d0,%a4 # Save decr value 4800 mov.l %d0,%a0 4801 rts 4802 4803faddr_ind_m_a5: 4804 mov.l %a5,%d0 # Get current a5 4805 sub.l %a0,%d0 # Decrement 4806 mov.l %d0,%a5 # Save decr value 4807 mov.l %d0,%a0 4808 rts 4809 4810faddr_ind_m_a6: 4811 mov.l (%a6),%d0 # Get current a6 4812 sub.l %a0,%d0 # Decrement 4813 mov.l %d0,(%a6) # Save decr value 4814 mov.l %d0,%a0 4815 rts 4816 4817faddr_ind_m_a7: 4818 mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag 4819 4820 mov.l EXC_A7(%a6),%d0 # Get current a7 4821 sub.l %a0,%d0 # Decrement 4822 mov.l %d0,EXC_A7(%a6) # Save decr value 4823 mov.l %d0,%a0 4824 rts 4825 4826######################################################## 4827# Address register indirect w/ displacement: (d16, An) # 4828######################################################## 4829faddr_ind_disp_a0: 4830 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4831 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4832 bsr.l _imem_read_word 4833 4834 tst.l %d1 # did ifetch fail? 4835 bne.l iea_iacc # yes 4836 4837 mov.w %d0,%a0 # sign extend displacement 4838 4839 add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16 4840 rts 4841 4842faddr_ind_disp_a1: 4843 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4844 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4845 bsr.l _imem_read_word 4846 4847 tst.l %d1 # did ifetch fail? 4848 bne.l iea_iacc # yes 4849 4850 mov.w %d0,%a0 # sign extend displacement 4851 4852 add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16 4853 rts 4854 4855faddr_ind_disp_a2: 4856 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4857 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4858 bsr.l _imem_read_word 4859 4860 tst.l %d1 # did ifetch fail? 4861 bne.l iea_iacc # yes 4862 4863 mov.w %d0,%a0 # sign extend displacement 4864 4865 add.l %a2,%a0 # a2 + d16 4866 rts 4867 4868faddr_ind_disp_a3: 4869 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4870 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4871 bsr.l _imem_read_word 4872 4873 tst.l %d1 # did ifetch fail? 4874 bne.l iea_iacc # yes 4875 4876 mov.w %d0,%a0 # sign extend displacement 4877 4878 add.l %a3,%a0 # a3 + d16 4879 rts 4880 4881faddr_ind_disp_a4: 4882 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4883 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4884 bsr.l _imem_read_word 4885 4886 tst.l %d1 # did ifetch fail? 4887 bne.l iea_iacc # yes 4888 4889 mov.w %d0,%a0 # sign extend displacement 4890 4891 add.l %a4,%a0 # a4 + d16 4892 rts 4893 4894faddr_ind_disp_a5: 4895 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4896 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4897 bsr.l _imem_read_word 4898 4899 tst.l %d1 # did ifetch fail? 4900 bne.l iea_iacc # yes 4901 4902 mov.w %d0,%a0 # sign extend displacement 4903 4904 add.l %a5,%a0 # a5 + d16 4905 rts 4906 4907faddr_ind_disp_a6: 4908 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4909 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4910 bsr.l _imem_read_word 4911 4912 tst.l %d1 # did ifetch fail? 4913 bne.l iea_iacc # yes 4914 4915 mov.w %d0,%a0 # sign extend displacement 4916 4917 add.l (%a6),%a0 # a6 + d16 4918 rts 4919 4920faddr_ind_disp_a7: 4921 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4922 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4923 bsr.l _imem_read_word 4924 4925 tst.l %d1 # did ifetch fail? 4926 bne.l iea_iacc # yes 4927 4928 mov.w %d0,%a0 # sign extend displacement 4929 4930 add.l EXC_A7(%a6),%a0 # a7 + d16 4931 rts 4932 4933######################################################################## 4934# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) # 4935# " " " w/ " (base displacement): (bd, An, Xn) # 4936# Memory indirect postindexed: ([bd, An], Xn, od) # 4937# Memory indirect preindexed: ([bd, An, Xn], od) # 4938######################################################################## 4939faddr_ind_ext: 4940 addq.l &0x8,%d1 4941 bsr.l fetch_dreg # fetch base areg 4942 mov.l %d0,-(%sp) 4943 4944 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4945 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4946 bsr.l _imem_read_word # fetch extword in d0 4947 4948 tst.l %d1 # did ifetch fail? 4949 bne.l iea_iacc # yes 4950 4951 mov.l (%sp)+,%a0 4952 4953 btst &0x8,%d0 4954 bne.w fcalc_mem_ind 4955 4956 mov.l %d0,L_SCR1(%a6) # hold opword 4957 4958 mov.l %d0,%d1 4959 rol.w &0x4,%d1 4960 andi.w &0xf,%d1 # extract index regno 4961 4962# count on fetch_dreg() not to alter a0... 4963 bsr.l fetch_dreg # fetch index 4964 4965 mov.l %d2,-(%sp) # save d2 4966 mov.l L_SCR1(%a6),%d2 # fetch opword 4967 4968 btst &0xb,%d2 # is it word or long? 4969 bne.b faii8_long 4970 ext.l %d0 # sign extend word index 4971faii8_long: 4972 mov.l %d2,%d1 4973 rol.w &0x7,%d1 4974 andi.l &0x3,%d1 # extract scale value 4975 4976 lsl.l %d1,%d0 # shift index by scale 4977 4978 extb.l %d2 # sign extend displacement 4979 add.l %d2,%d0 # index + disp 4980 add.l %d0,%a0 # An + (index + disp) 4981 4982 mov.l (%sp)+,%d2 # restore old d2 4983 rts 4984 4985########################### 4986# Absolute short: (XXX).W # 4987########################### 4988fabs_short: 4989 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 4990 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 4991 bsr.l _imem_read_word # fetch short address 4992 4993 tst.l %d1 # did ifetch fail? 4994 bne.l iea_iacc # yes 4995 4996 mov.w %d0,%a0 # return <ea> in a0 4997 rts 4998 4999########################## 5000# Absolute long: (XXX).L # 5001########################## 5002fabs_long: 5003 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5004 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5005 bsr.l _imem_read_long # fetch long address 5006 5007 tst.l %d1 # did ifetch fail? 5008 bne.l iea_iacc # yes 5009 5010 mov.l %d0,%a0 # return <ea> in a0 5011 rts 5012 5013####################################################### 5014# Program counter indirect w/ displacement: (d16, PC) # 5015####################################################### 5016fpc_ind: 5017 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5018 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 5019 bsr.l _imem_read_word # fetch word displacement 5020 5021 tst.l %d1 # did ifetch fail? 5022 bne.l iea_iacc # yes 5023 5024 mov.w %d0,%a0 # sign extend displacement 5025 5026 add.l EXC_EXTWPTR(%a6),%a0 # pc + d16 5027 5028# _imem_read_word() increased the extwptr by 2. need to adjust here. 5029 subq.l &0x2,%a0 # adjust <ea> 5030 rts 5031 5032########################################################## 5033# PC indirect w/ index(8-bit displacement): (d8, PC, An) # 5034# " " w/ " (base displacement): (bd, PC, An) # 5035# PC memory indirect postindexed: ([bd, PC], Xn, od) # 5036# PC memory indirect preindexed: ([bd, PC, Xn], od) # 5037########################################################## 5038fpc_ind_ext: 5039 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5040 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 5041 bsr.l _imem_read_word # fetch ext word 5042 5043 tst.l %d1 # did ifetch fail? 5044 bne.l iea_iacc # yes 5045 5046 mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0 5047 subq.l &0x2,%a0 # adjust base 5048 5049 btst &0x8,%d0 # is disp only 8 bits? 5050 bne.w fcalc_mem_ind # calc memory indirect 5051 5052 mov.l %d0,L_SCR1(%a6) # store opword 5053 5054 mov.l %d0,%d1 # make extword copy 5055 rol.w &0x4,%d1 # rotate reg num into place 5056 andi.w &0xf,%d1 # extract register number 5057 5058# count on fetch_dreg() not to alter a0... 5059 bsr.l fetch_dreg # fetch index 5060 5061 mov.l %d2,-(%sp) # save d2 5062 mov.l L_SCR1(%a6),%d2 # fetch opword 5063 5064 btst &0xb,%d2 # is index word or long? 5065 bne.b fpii8_long # long 5066 ext.l %d0 # sign extend word index 5067fpii8_long: 5068 mov.l %d2,%d1 5069 rol.w &0x7,%d1 # rotate scale value into place 5070 andi.l &0x3,%d1 # extract scale value 5071 5072 lsl.l %d1,%d0 # shift index by scale 5073 5074 extb.l %d2 # sign extend displacement 5075 add.l %d2,%d0 # disp + index 5076 add.l %d0,%a0 # An + (index + disp) 5077 5078 mov.l (%sp)+,%d2 # restore temp register 5079 rts 5080 5081# d2 = index 5082# d3 = base 5083# d4 = od 5084# d5 = extword 5085fcalc_mem_ind: 5086 btst &0x6,%d0 # is the index suppressed? 5087 beq.b fcalc_index 5088 5089 movm.l &0x3c00,-(%sp) # save d2-d5 5090 5091 mov.l %d0,%d5 # put extword in d5 5092 mov.l %a0,%d3 # put base in d3 5093 5094 clr.l %d2 # yes, so index = 0 5095 bra.b fbase_supp_ck 5096 5097# index: 5098fcalc_index: 5099 mov.l %d0,L_SCR1(%a6) # save d0 (opword) 5100 bfextu %d0{&16:&4},%d1 # fetch dreg index 5101 bsr.l fetch_dreg 5102 5103 movm.l &0x3c00,-(%sp) # save d2-d5 5104 mov.l %d0,%d2 # put index in d2 5105 mov.l L_SCR1(%a6),%d5 5106 mov.l %a0,%d3 5107 5108 btst &0xb,%d5 # is index word or long? 5109 bne.b fno_ext 5110 ext.l %d2 5111 5112fno_ext: 5113 bfextu %d5{&21:&2},%d0 5114 lsl.l %d0,%d2 5115 5116# base address (passed as parameter in d3): 5117# we clear the value here if it should actually be suppressed. 5118fbase_supp_ck: 5119 btst &0x7,%d5 # is the bd suppressed? 5120 beq.b fno_base_sup 5121 clr.l %d3 5122 5123# base displacement: 5124fno_base_sup: 5125 bfextu %d5{&26:&2},%d0 # get bd size 5126# beq.l fmovm_error # if (size == 0) it's reserved 5127 5128 cmpi.b %d0,&0x2 5129 blt.b fno_bd 5130 beq.b fget_word_bd 5131 5132 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5133 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5134 bsr.l _imem_read_long 5135 5136 tst.l %d1 # did ifetch fail? 5137 bne.l fcea_iacc # yes 5138 5139 bra.b fchk_ind 5140 5141fget_word_bd: 5142 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5143 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 5144 bsr.l _imem_read_word 5145 5146 tst.l %d1 # did ifetch fail? 5147 bne.l fcea_iacc # yes 5148 5149 ext.l %d0 # sign extend bd 5150 5151fchk_ind: 5152 add.l %d0,%d3 # base += bd 5153 5154# outer displacement: 5155fno_bd: 5156 bfextu %d5{&30:&2},%d0 # is od suppressed? 5157 beq.w faii_bd 5158 5159 cmpi.b %d0,&0x2 5160 blt.b fnull_od 5161 beq.b fword_od 5162 5163 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5164 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5165 bsr.l _imem_read_long 5166 5167 tst.l %d1 # did ifetch fail? 5168 bne.l fcea_iacc # yes 5169 5170 bra.b fadd_them 5171 5172fword_od: 5173 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5174 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr 5175 bsr.l _imem_read_word 5176 5177 tst.l %d1 # did ifetch fail? 5178 bne.l fcea_iacc # yes 5179 5180 ext.l %d0 # sign extend od 5181 bra.b fadd_them 5182 5183fnull_od: 5184 clr.l %d0 5185 5186fadd_them: 5187 mov.l %d0,%d4 5188 5189 btst &0x2,%d5 # pre or post indexing? 5190 beq.b fpre_indexed 5191 5192 mov.l %d3,%a0 5193 bsr.l _dmem_read_long 5194 5195 tst.l %d1 # did dfetch fail? 5196 bne.w fcea_err # yes 5197 5198 add.l %d2,%d0 # <ea> += index 5199 add.l %d4,%d0 # <ea> += od 5200 bra.b fdone_ea 5201 5202fpre_indexed: 5203 add.l %d2,%d3 # preindexing 5204 mov.l %d3,%a0 5205 bsr.l _dmem_read_long 5206 5207 tst.l %d1 # did dfetch fail? 5208 bne.w fcea_err # yes 5209 5210 add.l %d4,%d0 # ea += od 5211 bra.b fdone_ea 5212 5213faii_bd: 5214 add.l %d2,%d3 # ea = (base + bd) + index 5215 mov.l %d3,%d0 5216fdone_ea: 5217 mov.l %d0,%a0 5218 5219 movm.l (%sp)+,&0x003c # restore d2-d5 5220 rts 5221 5222######################################################### 5223fcea_err: 5224 mov.l %d3,%a0 5225 5226 movm.l (%sp)+,&0x003c # restore d2-d5 5227 mov.w &0x0101,%d0 5228 bra.l iea_dacc 5229 5230fcea_iacc: 5231 movm.l (%sp)+,&0x003c # restore d2-d5 5232 bra.l iea_iacc 5233 5234fmovm_out_err: 5235 bsr.l restore 5236 mov.w &0x00e1,%d0 5237 bra.b fmovm_err 5238 5239fmovm_in_err: 5240 bsr.l restore 5241 mov.w &0x0161,%d0 5242 5243fmovm_err: 5244 mov.l L_SCR1(%a6),%a0 5245 bra.l iea_dacc 5246 5247######################################################################### 5248# XDEF **************************************************************** # 5249# fmovm_ctrl(): emulate fmovm.l of control registers instr # 5250# # 5251# XREF **************************************************************** # 5252# _imem_read_long() - read longword from memory # 5253# iea_iacc() - _imem_read_long() failed; error recovery # 5254# # 5255# INPUT *************************************************************** # 5256# None # 5257# # 5258# OUTPUT ************************************************************** # 5259# If _imem_read_long() doesn't fail: # 5260# USER_FPCR(a6) = new FPCR value # 5261# USER_FPSR(a6) = new FPSR value # 5262# USER_FPIAR(a6) = new FPIAR value # 5263# # 5264# ALGORITHM *********************************************************** # 5265# Decode the instruction type by looking at the extension word # 5266# in order to see how many control registers to fetch from memory. # 5267# Fetch them using _imem_read_long(). If this fetch fails, exit through # 5268# the special access error exit handler iea_iacc(). # 5269# # 5270# Instruction word decoding: # 5271# # 5272# fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} # 5273# # 5274# WORD1 WORD2 # 5275# 1111 0010 00 111100 100$ $$00 0000 0000 # 5276# # 5277# $$$ (100): FPCR # 5278# (010): FPSR # 5279# (001): FPIAR # 5280# (000): FPIAR # 5281# # 5282######################################################################### 5283 5284 global fmovm_ctrl 5285fmovm_ctrl: 5286 mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits 5287 cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ? 5288 beq.w fctrl_in_7 # yes 5289 cmpi.b %d0,&0x98 # fpcr & fpsr ? 5290 beq.w fctrl_in_6 # yes 5291 cmpi.b %d0,&0x94 # fpcr & fpiar ? 5292 beq.b fctrl_in_5 # yes 5293 5294# fmovem.l #<data>, fpsr/fpiar 5295fctrl_in_3: 5296 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5297 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5298 bsr.l _imem_read_long # fetch FPSR from mem 5299 5300 tst.l %d1 # did ifetch fail? 5301 bne.l iea_iacc # yes 5302 5303 mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack 5304 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5305 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5306 bsr.l _imem_read_long # fetch FPIAR from mem 5307 5308 tst.l %d1 # did ifetch fail? 5309 bne.l iea_iacc # yes 5310 5311 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack 5312 rts 5313 5314# fmovem.l #<data>, fpcr/fpiar 5315fctrl_in_5: 5316 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5317 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5318 bsr.l _imem_read_long # fetch FPCR from mem 5319 5320 tst.l %d1 # did ifetch fail? 5321 bne.l iea_iacc # yes 5322 5323 mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack 5324 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5325 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5326 bsr.l _imem_read_long # fetch FPIAR from mem 5327 5328 tst.l %d1 # did ifetch fail? 5329 bne.l iea_iacc # yes 5330 5331 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack 5332 rts 5333 5334# fmovem.l #<data>, fpcr/fpsr 5335fctrl_in_6: 5336 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5337 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5338 bsr.l _imem_read_long # fetch FPCR from mem 5339 5340 tst.l %d1 # did ifetch fail? 5341 bne.l iea_iacc # yes 5342 5343 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem 5344 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5345 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5346 bsr.l _imem_read_long # fetch FPSR from mem 5347 5348 tst.l %d1 # did ifetch fail? 5349 bne.l iea_iacc # yes 5350 5351 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem 5352 rts 5353 5354# fmovem.l #<data>, fpcr/fpsr/fpiar 5355fctrl_in_7: 5356 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5357 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5358 bsr.l _imem_read_long # fetch FPCR from mem 5359 5360 tst.l %d1 # did ifetch fail? 5361 bne.l iea_iacc # yes 5362 5363 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem 5364 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5365 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5366 bsr.l _imem_read_long # fetch FPSR from mem 5367 5368 tst.l %d1 # did ifetch fail? 5369 bne.l iea_iacc # yes 5370 5371 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem 5372 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr 5373 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr 5374 bsr.l _imem_read_long # fetch FPIAR from mem 5375 5376 tst.l %d1 # did ifetch fail? 5377 bne.l iea_iacc # yes 5378 5379 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem 5380 rts 5381 5382########################################################################## 5383 5384######################################################################### 5385# XDEF **************************************************************** # 5386# addsub_scaler2(): scale inputs to fadd/fsub such that no # 5387# OVFL/UNFL exceptions will result # 5388# # 5389# XREF **************************************************************** # 5390# norm() - normalize mantissa after adjusting exponent # 5391# # 5392# INPUT *************************************************************** # 5393# FP_SRC(a6) = fp op1(src) # 5394# FP_DST(a6) = fp op2(dst) # 5395# # 5396# OUTPUT ************************************************************** # 5397# FP_SRC(a6) = fp op1 scaled(src) # 5398# FP_DST(a6) = fp op2 scaled(dst) # 5399# d0 = scale amount # 5400# # 5401# ALGORITHM *********************************************************** # 5402# If the DST exponent is > the SRC exponent, set the DST exponent # 5403# equal to 0x3fff and scale the SRC exponent by the value that the # 5404# DST exponent was scaled by. If the SRC exponent is greater or equal, # 5405# do the opposite. Return this scale factor in d0. # 5406# If the two exponents differ by > the number of mantissa bits # 5407# plus two, then set the smallest exponent to a very small value as a # 5408# quick shortcut. # 5409# # 5410######################################################################### 5411 5412 global addsub_scaler2 5413addsub_scaler2: 5414 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 5415 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 5416 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 5417 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 5418 mov.w SRC_EX(%a0),%d0 5419 mov.w DST_EX(%a1),%d1 5420 mov.w %d0,FP_SCR0_EX(%a6) 5421 mov.w %d1,FP_SCR1_EX(%a6) 5422 5423 andi.w &0x7fff,%d0 5424 andi.w &0x7fff,%d1 5425 mov.w %d0,L_SCR1(%a6) # store src exponent 5426 mov.w %d1,2+L_SCR1(%a6) # store dst exponent 5427 5428 cmp.w %d0, %d1 # is src exp >= dst exp? 5429 bge.l src_exp_ge2 5430 5431# dst exp is > src exp; scale dst to exp = 0x3fff 5432dst_exp_gt2: 5433 bsr.l scale_to_zero_dst 5434 mov.l %d0,-(%sp) # save scale factor 5435 5436 cmpi.b STAG(%a6),&DENORM # is dst denormalized? 5437 bne.b cmpexp12 5438 5439 lea FP_SCR0(%a6),%a0 5440 bsr.l norm # normalize the denorm; result is new exp 5441 neg.w %d0 # new exp = -(shft val) 5442 mov.w %d0,L_SCR1(%a6) # inset new exp 5443 5444cmpexp12: 5445 mov.w 2+L_SCR1(%a6),%d0 5446 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp 5447 5448 cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2? 5449 bge.b quick_scale12 5450 5451 mov.w L_SCR1(%a6),%d0 5452 add.w 0x2(%sp),%d0 # scale src exponent by scale factor 5453 mov.w FP_SCR0_EX(%a6),%d1 5454 and.w &0x8000,%d1 5455 or.w %d1,%d0 # concat {sgn,new exp} 5456 mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent 5457 5458 mov.l (%sp)+,%d0 # return SCALE factor 5459 rts 5460 5461quick_scale12: 5462 andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent 5463 bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1 5464 5465 mov.l (%sp)+,%d0 # return SCALE factor 5466 rts 5467 5468# src exp is >= dst exp; scale src to exp = 0x3fff 5469src_exp_ge2: 5470 bsr.l scale_to_zero_src 5471 mov.l %d0,-(%sp) # save scale factor 5472 5473 cmpi.b DTAG(%a6),&DENORM # is dst denormalized? 5474 bne.b cmpexp22 5475 lea FP_SCR1(%a6),%a0 5476 bsr.l norm # normalize the denorm; result is new exp 5477 neg.w %d0 # new exp = -(shft val) 5478 mov.w %d0,2+L_SCR1(%a6) # inset new exp 5479 5480cmpexp22: 5481 mov.w L_SCR1(%a6),%d0 5482 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp 5483 5484 cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2? 5485 bge.b quick_scale22 5486 5487 mov.w 2+L_SCR1(%a6),%d0 5488 add.w 0x2(%sp),%d0 # scale dst exponent by scale factor 5489 mov.w FP_SCR1_EX(%a6),%d1 5490 andi.w &0x8000,%d1 5491 or.w %d1,%d0 # concat {sgn,new exp} 5492 mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent 5493 5494 mov.l (%sp)+,%d0 # return SCALE factor 5495 rts 5496 5497quick_scale22: 5498 andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent 5499 bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1 5500 5501 mov.l (%sp)+,%d0 # return SCALE factor 5502 rts 5503 5504########################################################################## 5505 5506######################################################################### 5507# XDEF **************************************************************** # 5508# scale_to_zero_src(): scale the exponent of extended precision # 5509# value at FP_SCR0(a6). # 5510# # 5511# XREF **************************************************************** # 5512# norm() - normalize the mantissa if the operand was a DENORM # 5513# # 5514# INPUT *************************************************************** # 5515# FP_SCR0(a6) = extended precision operand to be scaled # 5516# # 5517# OUTPUT ************************************************************** # 5518# FP_SCR0(a6) = scaled extended precision operand # 5519# d0 = scale value # 5520# # 5521# ALGORITHM *********************************************************** # 5522# Set the exponent of the input operand to 0x3fff. Save the value # 5523# of the difference between the original and new exponent. Then, # 5524# normalize the operand if it was a DENORM. Add this normalization # 5525# value to the previous value. Return the result. # 5526# # 5527######################################################################### 5528 5529 global scale_to_zero_src 5530scale_to_zero_src: 5531 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp} 5532 mov.w %d1,%d0 # make a copy 5533 5534 andi.l &0x7fff,%d1 # extract operand's exponent 5535 5536 andi.w &0x8000,%d0 # extract operand's sgn 5537 or.w &0x3fff,%d0 # insert new operand's exponent(=0) 5538 5539 mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent 5540 5541 cmpi.b STAG(%a6),&DENORM # is operand normalized? 5542 beq.b stzs_denorm # normalize the DENORM 5543 5544stzs_norm: 5545 mov.l &0x3fff,%d0 5546 sub.l %d1,%d0 # scale = BIAS + (-exp) 5547 5548 rts 5549 5550stzs_denorm: 5551 lea FP_SCR0(%a6),%a0 # pass ptr to src op 5552 bsr.l norm # normalize denorm 5553 neg.l %d0 # new exponent = -(shft val) 5554 mov.l %d0,%d1 # prepare for op_norm call 5555 bra.b stzs_norm # finish scaling 5556 5557### 5558 5559######################################################################### 5560# XDEF **************************************************************** # 5561# scale_sqrt(): scale the input operand exponent so a subsequent # 5562# fsqrt operation won't take an exception. # 5563# # 5564# XREF **************************************************************** # 5565# norm() - normalize the mantissa if the operand was a DENORM # 5566# # 5567# INPUT *************************************************************** # 5568# FP_SCR0(a6) = extended precision operand to be scaled # 5569# # 5570# OUTPUT ************************************************************** # 5571# FP_SCR0(a6) = scaled extended precision operand # 5572# d0 = scale value # 5573# # 5574# ALGORITHM *********************************************************** # 5575# If the input operand is a DENORM, normalize it. # 5576# If the exponent of the input operand is even, set the exponent # 5577# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the # 5578# exponent of the input operand is off, set the exponent to ox3fff and # 5579# return a scale factor of "(exp-0x3fff)/2". # 5580# # 5581######################################################################### 5582 5583 global scale_sqrt 5584scale_sqrt: 5585 cmpi.b STAG(%a6),&DENORM # is operand normalized? 5586 beq.b ss_denorm # normalize the DENORM 5587 5588 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp} 5589 andi.l &0x7fff,%d1 # extract operand's exponent 5590 5591 andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn 5592 5593 btst &0x0,%d1 # is exp even or odd? 5594 beq.b ss_norm_even 5595 5596 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 5597 5598 mov.l &0x3fff,%d0 5599 sub.l %d1,%d0 # scale = BIAS + (-exp) 5600 asr.l &0x1,%d0 # divide scale factor by 2 5601 rts 5602 5603ss_norm_even: 5604 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 5605 5606 mov.l &0x3ffe,%d0 5607 sub.l %d1,%d0 # scale = BIAS + (-exp) 5608 asr.l &0x1,%d0 # divide scale factor by 2 5609 rts 5610 5611ss_denorm: 5612 lea FP_SCR0(%a6),%a0 # pass ptr to src op 5613 bsr.l norm # normalize denorm 5614 5615 btst &0x0,%d0 # is exp even or odd? 5616 beq.b ss_denorm_even 5617 5618 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 5619 5620 add.l &0x3fff,%d0 5621 asr.l &0x1,%d0 # divide scale factor by 2 5622 rts 5623 5624ss_denorm_even: 5625 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0) 5626 5627 add.l &0x3ffe,%d0 5628 asr.l &0x1,%d0 # divide scale factor by 2 5629 rts 5630 5631### 5632 5633######################################################################### 5634# XDEF **************************************************************** # 5635# scale_to_zero_dst(): scale the exponent of extended precision # 5636# value at FP_SCR1(a6). # 5637# # 5638# XREF **************************************************************** # 5639# norm() - normalize the mantissa if the operand was a DENORM # 5640# # 5641# INPUT *************************************************************** # 5642# FP_SCR1(a6) = extended precision operand to be scaled # 5643# # 5644# OUTPUT ************************************************************** # 5645# FP_SCR1(a6) = scaled extended precision operand # 5646# d0 = scale value # 5647# # 5648# ALGORITHM *********************************************************** # 5649# Set the exponent of the input operand to 0x3fff. Save the value # 5650# of the difference between the original and new exponent. Then, # 5651# normalize the operand if it was a DENORM. Add this normalization # 5652# value to the previous value. Return the result. # 5653# # 5654######################################################################### 5655 5656 global scale_to_zero_dst 5657scale_to_zero_dst: 5658 mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp} 5659 mov.w %d1,%d0 # make a copy 5660 5661 andi.l &0x7fff,%d1 # extract operand's exponent 5662 5663 andi.w &0x8000,%d0 # extract operand's sgn 5664 or.w &0x3fff,%d0 # insert new operand's exponent(=0) 5665 5666 mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent 5667 5668 cmpi.b DTAG(%a6),&DENORM # is operand normalized? 5669 beq.b stzd_denorm # normalize the DENORM 5670 5671stzd_norm: 5672 mov.l &0x3fff,%d0 5673 sub.l %d1,%d0 # scale = BIAS + (-exp) 5674 rts 5675 5676stzd_denorm: 5677 lea FP_SCR1(%a6),%a0 # pass ptr to dst op 5678 bsr.l norm # normalize denorm 5679 neg.l %d0 # new exponent = -(shft val) 5680 mov.l %d0,%d1 # prepare for op_norm call 5681 bra.b stzd_norm # finish scaling 5682 5683########################################################################## 5684 5685######################################################################### 5686# XDEF **************************************************************** # 5687# res_qnan(): return default result w/ QNAN operand for dyadic # 5688# res_snan(): return default result w/ SNAN operand for dyadic # 5689# res_qnan_1op(): return dflt result w/ QNAN operand for monadic # 5690# res_snan_1op(): return dflt result w/ SNAN operand for monadic # 5691# # 5692# XREF **************************************************************** # 5693# None # 5694# # 5695# INPUT *************************************************************** # 5696# FP_SRC(a6) = pointer to extended precision src operand # 5697# FP_DST(a6) = pointer to extended precision dst operand # 5698# # 5699# OUTPUT ************************************************************** # 5700# fp0 = default result # 5701# # 5702# ALGORITHM *********************************************************** # 5703# If either operand (but not both operands) of an operation is a # 5704# nonsignalling NAN, then that NAN is returned as the result. If both # 5705# operands are nonsignalling NANs, then the destination operand # 5706# nonsignalling NAN is returned as the result. # 5707# If either operand to an operation is a signalling NAN (SNAN), # 5708# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap # 5709# enable bit is set in the FPCR, then the trap is taken and the # 5710# destination is not modified. If the SNAN trap enable bit is not set, # 5711# then the SNAN is converted to a nonsignalling NAN (by setting the # 5712# SNAN bit in the operand to one), and the operation continues as # 5713# described in the preceding paragraph, for nonsignalling NANs. # 5714# Make sure the appropriate FPSR bits are set before exiting. # 5715# # 5716######################################################################### 5717 5718 global res_qnan 5719 global res_snan 5720res_qnan: 5721res_snan: 5722 cmp.b DTAG(%a6), &SNAN # is the dst an SNAN? 5723 beq.b dst_snan2 5724 cmp.b DTAG(%a6), &QNAN # is the dst a QNAN? 5725 beq.b dst_qnan2 5726src_nan: 5727 cmp.b STAG(%a6), &QNAN 5728 beq.b src_qnan2 5729 global res_snan_1op 5730res_snan_1op: 5731src_snan2: 5732 bset &0x6, FP_SRC_HI(%a6) # set SNAN bit 5733 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6) 5734 lea FP_SRC(%a6), %a0 5735 bra.b nan_comp 5736 global res_qnan_1op 5737res_qnan_1op: 5738src_qnan2: 5739 or.l &nan_mask, USER_FPSR(%a6) 5740 lea FP_SRC(%a6), %a0 5741 bra.b nan_comp 5742dst_snan2: 5743 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6) 5744 bset &0x6, FP_DST_HI(%a6) # set SNAN bit 5745 lea FP_DST(%a6), %a0 5746 bra.b nan_comp 5747dst_qnan2: 5748 lea FP_DST(%a6), %a0 5749 cmp.b STAG(%a6), &SNAN 5750 bne nan_done 5751 or.l &aiop_mask+snan_mask, USER_FPSR(%a6) 5752nan_done: 5753 or.l &nan_mask, USER_FPSR(%a6) 5754nan_comp: 5755 btst &0x7, FTEMP_EX(%a0) # is NAN neg? 5756 beq.b nan_not_neg 5757 or.l &neg_mask, USER_FPSR(%a6) 5758nan_not_neg: 5759 fmovm.x (%a0), &0x80 5760 rts 5761 5762######################################################################### 5763# XDEF **************************************************************** # 5764# res_operr(): return default result during operand error # 5765# # 5766# XREF **************************************************************** # 5767# None # 5768# # 5769# INPUT *************************************************************** # 5770# None # 5771# # 5772# OUTPUT ************************************************************** # 5773# fp0 = default operand error result # 5774# # 5775# ALGORITHM *********************************************************** # 5776# An nonsignalling NAN is returned as the default result when # 5777# an operand error occurs for the following cases: # 5778# # 5779# Multiply: (Infinity x Zero) # 5780# Divide : (Zero / Zero) || (Infinity / Infinity) # 5781# # 5782######################################################################### 5783 5784 global res_operr 5785res_operr: 5786 or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6) 5787 fmovm.x nan_return(%pc), &0x80 5788 rts 5789 5790nan_return: 5791 long 0x7fff0000, 0xffffffff, 0xffffffff 5792 5793######################################################################### 5794# XDEF **************************************************************** # 5795# _denorm(): denormalize an intermediate result # 5796# # 5797# XREF **************************************************************** # 5798# None # 5799# # 5800# INPUT *************************************************************** # 5801# a0 = points to the operand to be denormalized # 5802# (in the internal extended format) # 5803# # 5804# d0 = rounding precision # 5805# # 5806# OUTPUT ************************************************************** # 5807# a0 = pointer to the denormalized result # 5808# (in the internal extended format) # 5809# # 5810# d0 = guard,round,sticky # 5811# # 5812# ALGORITHM *********************************************************** # 5813# According to the exponent underflow threshold for the given # 5814# precision, shift the mantissa bits to the right in order raise the # 5815# exponent of the operand to the threshold value. While shifting the # 5816# mantissa bits right, maintain the value of the guard, round, and # 5817# sticky bits. # 5818# other notes: # 5819# (1) _denorm() is called by the underflow routines # 5820# (2) _denorm() does NOT affect the status register # 5821# # 5822######################################################################### 5823 5824# 5825# table of exponent threshold values for each precision 5826# 5827tbl_thresh: 5828 short 0x0 5829 short sgl_thresh 5830 short dbl_thresh 5831 5832 global _denorm 5833_denorm: 5834# 5835# Load the exponent threshold for the precision selected and check 5836# to see if (threshold - exponent) is > 65 in which case we can 5837# simply calculate the sticky bit and zero the mantissa. otherwise 5838# we have to call the denormalization routine. 5839# 5840 lsr.b &0x2, %d0 # shift prec to lo bits 5841 mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold 5842 mov.w %d1, %d0 # copy d1 into d0 5843 sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp 5844 cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits) 5845 bpl.b denorm_set_stky # yes; just calc sticky 5846 5847 clr.l %d0 # clear g,r,s 5848 btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set? 5849 beq.b denorm_call # no; don't change anything 5850 bset &29, %d0 # yes; set sticky bit 5851 5852denorm_call: 5853 bsr.l dnrm_lp # denormalize the number 5854 rts 5855 5856# 5857# all bit would have been shifted off during the denorm so simply 5858# calculate if the sticky should be set and clear the entire mantissa. 5859# 5860denorm_set_stky: 5861 mov.l &0x20000000, %d0 # set sticky bit in return value 5862 mov.w %d1, FTEMP_EX(%a0) # load exp with threshold 5863 clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa) 5864 clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa) 5865 rts 5866 5867# # 5868# dnrm_lp(): normalize exponent/mantissa to specified threshold # 5869# # 5870# INPUT: # 5871# %a0 : points to the operand to be denormalized # 5872# %d0{31:29} : initial guard,round,sticky # 5873# %d1{15:0} : denormalization threshold # 5874# OUTPUT: # 5875# %a0 : points to the denormalized operand # 5876# %d0{31:29} : final guard,round,sticky # 5877# # 5878 5879# *** Local Equates *** # 5880set GRS, L_SCR2 # g,r,s temp storage 5881set FTEMP_LO2, L_SCR1 # FTEMP_LO copy 5882 5883 global dnrm_lp 5884dnrm_lp: 5885 5886# 5887# make a copy of FTEMP_LO and place the g,r,s bits directly after it 5888# in memory so as to make the bitfield extraction for denormalization easier. 5889# 5890 mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy 5891 mov.l %d0, GRS(%a6) # place g,r,s after it 5892 5893# 5894# check to see how much less than the underflow threshold the operand 5895# exponent is. 5896# 5897 mov.l %d1, %d0 # copy the denorm threshold 5898 sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent 5899 ble.b dnrm_no_lp # d1 <= 0 5900 cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ? 5901 blt.b case_1 # yes 5902 cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ? 5903 blt.b case_2 # yes 5904 bra.w case_3 # (d1 >= 64) 5905 5906# 5907# No normalization necessary 5908# 5909dnrm_no_lp: 5910 mov.l GRS(%a6), %d0 # restore original g,r,s 5911 rts 5912 5913# 5914# case (0<d1<32) 5915# 5916# %d0 = denorm threshold 5917# %d1 = "n" = amt to shift 5918# 5919# --------------------------------------------------------- 5920# | FTEMP_HI | FTEMP_LO |grs000.........000| 5921# --------------------------------------------------------- 5922# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)-> 5923# \ \ \ \ 5924# \ \ \ \ 5925# \ \ \ \ 5926# \ \ \ \ 5927# \ \ \ \ 5928# \ \ \ \ 5929# \ \ \ \ 5930# \ \ \ \ 5931# <-(n)-><-(32 - n)-><------(32)-------><------(32)-------> 5932# --------------------------------------------------------- 5933# |0.....0| NEW_HI | NEW_FTEMP_LO |grs | 5934# --------------------------------------------------------- 5935# 5936case_1: 5937 mov.l %d2, -(%sp) # create temp storage 5938 5939 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold 5940 mov.l &32, %d0 5941 sub.w %d1, %d0 # %d0 = 32 - %d1 5942 5943 cmpi.w %d1, &29 # is shft amt >= 29 5944 blt.b case1_extract # no; no fix needed 5945 mov.b GRS(%a6), %d2 5946 or.b %d2, 3+FTEMP_LO2(%a6) 5947 5948case1_extract: 5949 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI 5950 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO 5951 bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S 5952 5953 mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI 5954 mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO 5955 5956 bftst %d0{&2:&30} # were bits shifted off? 5957 beq.b case1_sticky_clear # no; go finish 5958 bset &rnd_stky_bit, %d0 # yes; set sticky bit 5959 5960case1_sticky_clear: 5961 and.l &0xe0000000, %d0 # clear all but G,R,S 5962 mov.l (%sp)+, %d2 # restore temp register 5963 rts 5964 5965# 5966# case (32<=d1<64) 5967# 5968# %d0 = denorm threshold 5969# %d1 = "n" = amt to shift 5970# 5971# --------------------------------------------------------- 5972# | FTEMP_HI | FTEMP_LO |grs000.........000| 5973# --------------------------------------------------------- 5974# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)-> 5975# \ \ \ 5976# \ \ \ 5977# \ \ ------------------- 5978# \ -------------------- \ 5979# ------------------- \ \ 5980# \ \ \ 5981# \ \ \ 5982# \ \ \ 5983# <-------(32)------><-(n)-><-(32 - n)-><------(32)-------> 5984# --------------------------------------------------------- 5985# |0...............0|0....0| NEW_LO |grs | 5986# --------------------------------------------------------- 5987# 5988case_2: 5989 mov.l %d2, -(%sp) # create temp storage 5990 5991 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold 5992 subi.w &0x20, %d1 # %d1 now between 0 and 32 5993 mov.l &0x20, %d0 5994 sub.w %d1, %d0 # %d0 = 32 - %d1 5995 5996# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize 5997# the number of bits to check for the sticky detect. 5998# it only plays a role in shift amounts of 61-63. 5999 mov.b GRS(%a6), %d2 6000 or.b %d2, 3+FTEMP_LO2(%a6) 6001 6002 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO 6003 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S 6004 6005 bftst %d1{&2:&30} # were any bits shifted off? 6006 bne.b case2_set_sticky # yes; set sticky bit 6007 bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off? 6008 bne.b case2_set_sticky # yes; set sticky bit 6009 6010 mov.l %d1, %d0 # move new G,R,S to %d0 6011 bra.b case2_end 6012 6013case2_set_sticky: 6014 mov.l %d1, %d0 # move new G,R,S to %d0 6015 bset &rnd_stky_bit, %d0 # set sticky bit 6016 6017case2_end: 6018 clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0 6019 mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO 6020 and.l &0xe0000000, %d0 # clear all but G,R,S 6021 6022 mov.l (%sp)+,%d2 # restore temp register 6023 rts 6024 6025# 6026# case (d1>=64) 6027# 6028# %d0 = denorm threshold 6029# %d1 = amt to shift 6030# 6031case_3: 6032 mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold 6033 6034 cmpi.w %d1, &65 # is shift amt > 65? 6035 blt.b case3_64 # no; it's == 64 6036 beq.b case3_65 # no; it's == 65 6037 6038# 6039# case (d1>65) 6040# 6041# Shift value is > 65 and out of range. All bits are shifted off. 6042# Return a zero mantissa with the sticky bit set 6043# 6044 clr.l FTEMP_HI(%a0) # clear hi(mantissa) 6045 clr.l FTEMP_LO(%a0) # clear lo(mantissa) 6046 mov.l &0x20000000, %d0 # set sticky bit 6047 rts 6048 6049# 6050# case (d1 == 64) 6051# 6052# --------------------------------------------------------- 6053# | FTEMP_HI | FTEMP_LO |grs000.........000| 6054# --------------------------------------------------------- 6055# <-------(32)------> 6056# \ \ 6057# \ \ 6058# \ \ 6059# \ ------------------------------ 6060# ------------------------------- \ 6061# \ \ 6062# \ \ 6063# \ \ 6064# <-------(32)------> 6065# --------------------------------------------------------- 6066# |0...............0|0................0|grs | 6067# --------------------------------------------------------- 6068# 6069case3_64: 6070 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa) 6071 mov.l %d0, %d1 # make a copy 6072 and.l &0xc0000000, %d0 # extract G,R 6073 and.l &0x3fffffff, %d1 # extract other bits 6074 6075 bra.b case3_complete 6076 6077# 6078# case (d1 == 65) 6079# 6080# --------------------------------------------------------- 6081# | FTEMP_HI | FTEMP_LO |grs000.........000| 6082# --------------------------------------------------------- 6083# <-------(32)------> 6084# \ \ 6085# \ \ 6086# \ \ 6087# \ ------------------------------ 6088# -------------------------------- \ 6089# \ \ 6090# \ \ 6091# \ \ 6092# <-------(31)-----> 6093# --------------------------------------------------------- 6094# |0...............0|0................0|0rs | 6095# --------------------------------------------------------- 6096# 6097case3_65: 6098 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa) 6099 and.l &0x80000000, %d0 # extract R bit 6100 lsr.l &0x1, %d0 # shift high bit into R bit 6101 and.l &0x7fffffff, %d1 # extract other bits 6102 6103case3_complete: 6104# last operation done was an "and" of the bits shifted off so the condition 6105# codes are already set so branch accordingly. 6106 bne.b case3_set_sticky # yes; go set new sticky 6107 tst.l FTEMP_LO(%a0) # were any bits shifted off? 6108 bne.b case3_set_sticky # yes; go set new sticky 6109 tst.b GRS(%a6) # were any bits shifted off? 6110 bne.b case3_set_sticky # yes; go set new sticky 6111 6112# 6113# no bits were shifted off so don't set the sticky bit. 6114# the guard and 6115# the entire mantissa is zero. 6116# 6117 clr.l FTEMP_HI(%a0) # clear hi(mantissa) 6118 clr.l FTEMP_LO(%a0) # clear lo(mantissa) 6119 rts 6120 6121# 6122# some bits were shifted off so set the sticky bit. 6123# the entire mantissa is zero. 6124# 6125case3_set_sticky: 6126 bset &rnd_stky_bit,%d0 # set new sticky bit 6127 clr.l FTEMP_HI(%a0) # clear hi(mantissa) 6128 clr.l FTEMP_LO(%a0) # clear lo(mantissa) 6129 rts 6130 6131######################################################################### 6132# XDEF **************************************************************** # 6133# _round(): round result according to precision/mode # 6134# # 6135# XREF **************************************************************** # 6136# None # 6137# # 6138# INPUT *************************************************************** # 6139# a0 = ptr to input operand in internal extended format # 6140# d1(hi) = contains rounding precision: # 6141# ext = $0000xxxx # 6142# sgl = $0004xxxx # 6143# dbl = $0008xxxx # 6144# d1(lo) = contains rounding mode: # 6145# RN = $xxxx0000 # 6146# RZ = $xxxx0001 # 6147# RM = $xxxx0002 # 6148# RP = $xxxx0003 # 6149# d0{31:29} = contains the g,r,s bits (extended) # 6150# # 6151# OUTPUT ************************************************************** # 6152# a0 = pointer to rounded result # 6153# # 6154# ALGORITHM *********************************************************** # 6155# On return the value pointed to by a0 is correctly rounded, # 6156# a0 is preserved and the g-r-s bits in d0 are cleared. # 6157# The result is not typed - the tag field is invalid. The # 6158# result is still in the internal extended format. # 6159# # 6160# The INEX bit of USER_FPSR will be set if the rounded result was # 6161# inexact (i.e. if any of the g-r-s bits were set). # 6162# # 6163######################################################################### 6164 6165 global _round 6166_round: 6167# 6168# ext_grs() looks at the rounding precision and sets the appropriate 6169# G,R,S bits. 6170# If (G,R,S == 0) then result is exact and round is done, else set 6171# the inex flag in status reg and continue. 6172# 6173 bsr.l ext_grs # extract G,R,S 6174 6175 tst.l %d0 # are G,R,S zero? 6176 beq.w truncate # yes; round is complete 6177 6178 or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex 6179 6180# 6181# Use rounding mode as an index into a jump table for these modes. 6182# All of the following assumes grs != 0. 6183# 6184 mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset 6185 jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler 6186 6187tbl_mode: 6188 short rnd_near - tbl_mode 6189 short truncate - tbl_mode # RZ always truncates 6190 short rnd_mnus - tbl_mode 6191 short rnd_plus - tbl_mode 6192 6193################################################################# 6194# ROUND PLUS INFINITY # 6195# # 6196# If sign of fp number = 0 (positive), then add 1 to l. # 6197################################################################# 6198rnd_plus: 6199 tst.b FTEMP_SGN(%a0) # check for sign 6200 bmi.w truncate # if positive then truncate 6201 6202 mov.l &0xffffffff, %d0 # force g,r,s to be all f's 6203 swap %d1 # set up d1 for round prec. 6204 6205 cmpi.b %d1, &s_mode # is prec = sgl? 6206 beq.w add_sgl # yes 6207 bgt.w add_dbl # no; it's dbl 6208 bra.w add_ext # no; it's ext 6209 6210################################################################# 6211# ROUND MINUS INFINITY # 6212# # 6213# If sign of fp number = 1 (negative), then add 1 to l. # 6214################################################################# 6215rnd_mnus: 6216 tst.b FTEMP_SGN(%a0) # check for sign 6217 bpl.w truncate # if negative then truncate 6218 6219 mov.l &0xffffffff, %d0 # force g,r,s to be all f's 6220 swap %d1 # set up d1 for round prec. 6221 6222 cmpi.b %d1, &s_mode # is prec = sgl? 6223 beq.w add_sgl # yes 6224 bgt.w add_dbl # no; it's dbl 6225 bra.w add_ext # no; it's ext 6226 6227################################################################# 6228# ROUND NEAREST # 6229# # 6230# If (g=1), then add 1 to l and if (r=s=0), then clear l # 6231# Note that this will round to even in case of a tie. # 6232################################################################# 6233rnd_near: 6234 asl.l &0x1, %d0 # shift g-bit to c-bit 6235 bcc.w truncate # if (g=1) then 6236 6237 swap %d1 # set up d1 for round prec. 6238 6239 cmpi.b %d1, &s_mode # is prec = sgl? 6240 beq.w add_sgl # yes 6241 bgt.w add_dbl # no; it's dbl 6242 bra.w add_ext # no; it's ext 6243 6244# *** LOCAL EQUATES *** 6245set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec 6246set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec 6247 6248######################### 6249# ADD SINGLE # 6250######################### 6251add_sgl: 6252 add.l &ad_1_sgl, FTEMP_HI(%a0) 6253 bcc.b scc_clr # no mantissa overflow 6254 roxr.w FTEMP_HI(%a0) # shift v-bit back in 6255 roxr.w FTEMP_HI+2(%a0) # shift v-bit back in 6256 add.w &0x1, FTEMP_EX(%a0) # and incr exponent 6257scc_clr: 6258 tst.l %d0 # test for rs = 0 6259 bne.b sgl_done 6260 and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit 6261sgl_done: 6262 and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit 6263 clr.l FTEMP_LO(%a0) # clear d2 6264 rts 6265 6266######################### 6267# ADD EXTENDED # 6268######################### 6269add_ext: 6270 addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit 6271 bcc.b xcc_clr # test for carry out 6272 addq.l &1,FTEMP_HI(%a0) # propagate carry 6273 bcc.b xcc_clr 6274 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit 6275 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit 6276 roxr.w FTEMP_LO(%a0) 6277 roxr.w FTEMP_LO+2(%a0) 6278 add.w &0x1,FTEMP_EX(%a0) # and inc exp 6279xcc_clr: 6280 tst.l %d0 # test rs = 0 6281 bne.b add_ext_done 6282 and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit 6283add_ext_done: 6284 rts 6285 6286######################### 6287# ADD DOUBLE # 6288######################### 6289add_dbl: 6290 add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb 6291 bcc.b dcc_clr # no carry 6292 addq.l &0x1, FTEMP_HI(%a0) # propagate carry 6293 bcc.b dcc_clr # no carry 6294 6295 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit 6296 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit 6297 roxr.w FTEMP_LO(%a0) 6298 roxr.w FTEMP_LO+2(%a0) 6299 addq.w &0x1, FTEMP_EX(%a0) # incr exponent 6300dcc_clr: 6301 tst.l %d0 # test for rs = 0 6302 bne.b dbl_done 6303 and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit 6304 6305dbl_done: 6306 and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit 6307 rts 6308 6309########################### 6310# Truncate all other bits # 6311########################### 6312truncate: 6313 swap %d1 # select rnd prec 6314 6315 cmpi.b %d1, &s_mode # is prec sgl? 6316 beq.w sgl_done # yes 6317 bgt.b dbl_done # no; it's dbl 6318 rts # no; it's ext 6319 6320 6321# 6322# ext_grs(): extract guard, round and sticky bits according to 6323# rounding precision. 6324# 6325# INPUT 6326# d0 = extended precision g,r,s (in d0{31:29}) 6327# d1 = {PREC,ROUND} 6328# OUTPUT 6329# d0{31:29} = guard, round, sticky 6330# 6331# The ext_grs extract the guard/round/sticky bits according to the 6332# selected rounding precision. It is called by the round subroutine 6333# only. All registers except d0 are kept intact. d0 becomes an 6334# updated guard,round,sticky in d0{31:29} 6335# 6336# Notes: the ext_grs uses the round PREC, and therefore has to swap d1 6337# prior to usage, and needs to restore d1 to original. this 6338# routine is tightly tied to the round routine and not meant to 6339# uphold standard subroutine calling practices. 6340# 6341 6342ext_grs: 6343 swap %d1 # have d1.w point to round precision 6344 tst.b %d1 # is rnd prec = extended? 6345 bne.b ext_grs_not_ext # no; go handle sgl or dbl 6346 6347# 6348# %d0 actually already hold g,r,s since _round() had it before calling 6349# this function. so, as long as we don't disturb it, we are "returning" it. 6350# 6351ext_grs_ext: 6352 swap %d1 # yes; return to correct positions 6353 rts 6354 6355ext_grs_not_ext: 6356 movm.l &0x3000, -(%sp) # make some temp registers {d2/d3} 6357 6358 cmpi.b %d1, &s_mode # is rnd prec = sgl? 6359 bne.b ext_grs_dbl # no; go handle dbl 6360 6361# 6362# sgl: 6363# 96 64 40 32 0 6364# ----------------------------------------------------- 6365# | EXP |XXXXXXX| |xx | |grs| 6366# ----------------------------------------------------- 6367# <--(24)--->nn\ / 6368# ee --------------------- 6369# ww | 6370# v 6371# gr new sticky 6372# 6373ext_grs_sgl: 6374 bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right 6375 mov.l &30, %d2 # of the sgl prec. limits 6376 lsl.l %d2, %d3 # shift g-r bits to MSB of d3 6377 mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test 6378 and.l &0x0000003f, %d2 # s bit is the or of all other 6379 bne.b ext_grs_st_stky # bits to the right of g-r 6380 tst.l FTEMP_LO(%a0) # test lower mantissa 6381 bne.b ext_grs_st_stky # if any are set, set sticky 6382 tst.l %d0 # test original g,r,s 6383 bne.b ext_grs_st_stky # if any are set, set sticky 6384 bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit 6385 6386# 6387# dbl: 6388# 96 64 32 11 0 6389# ----------------------------------------------------- 6390# | EXP |XXXXXXX| | |xx |grs| 6391# ----------------------------------------------------- 6392# nn\ / 6393# ee ------- 6394# ww | 6395# v 6396# gr new sticky 6397# 6398ext_grs_dbl: 6399 bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right 6400 mov.l &30, %d2 # of the dbl prec. limits 6401 lsl.l %d2, %d3 # shift g-r bits to the MSB of d3 6402 mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test 6403 and.l &0x000001ff, %d2 # s bit is the or-ing of all 6404 bne.b ext_grs_st_stky # other bits to the right of g-r 6405 tst.l %d0 # test word original g,r,s 6406 bne.b ext_grs_st_stky # if any are set, set sticky 6407 bra.b ext_grs_end_sd # if clear, exit 6408 6409ext_grs_st_stky: 6410 bset &rnd_stky_bit, %d3 # set sticky bit 6411ext_grs_end_sd: 6412 mov.l %d3, %d0 # return grs to d0 6413 6414 movm.l (%sp)+, &0xc # restore scratch registers {d2/d3} 6415 6416 swap %d1 # restore d1 to original 6417 rts 6418 6419######################################################################### 6420# norm(): normalize the mantissa of an extended precision input. the # 6421# input operand should not be normalized already. # 6422# # 6423# XDEF **************************************************************** # 6424# norm() # 6425# # 6426# XREF **************************************************************** # 6427# none # 6428# # 6429# INPUT *************************************************************** # 6430# a0 = pointer fp extended precision operand to normalize # 6431# # 6432# OUTPUT ************************************************************** # 6433# d0 = number of bit positions the mantissa was shifted # 6434# a0 = the input operand's mantissa is normalized; the exponent # 6435# is unchanged. # 6436# # 6437######################################################################### 6438 global norm 6439norm: 6440 mov.l %d2, -(%sp) # create some temp regs 6441 mov.l %d3, -(%sp) 6442 6443 mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa) 6444 mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa) 6445 6446 bfffo %d0{&0:&32}, %d2 # how many places to shift? 6447 beq.b norm_lo # hi(man) is all zeroes! 6448 6449norm_hi: 6450 lsl.l %d2, %d0 # left shift hi(man) 6451 bfextu %d1{&0:%d2}, %d3 # extract lo bits 6452 6453 or.l %d3, %d0 # create hi(man) 6454 lsl.l %d2, %d1 # create lo(man) 6455 6456 mov.l %d0, FTEMP_HI(%a0) # store new hi(man) 6457 mov.l %d1, FTEMP_LO(%a0) # store new lo(man) 6458 6459 mov.l %d2, %d0 # return shift amount 6460 6461 mov.l (%sp)+, %d3 # restore temp regs 6462 mov.l (%sp)+, %d2 6463 6464 rts 6465 6466norm_lo: 6467 bfffo %d1{&0:&32}, %d2 # how many places to shift? 6468 lsl.l %d2, %d1 # shift lo(man) 6469 add.l &32, %d2 # add 32 to shft amount 6470 6471 mov.l %d1, FTEMP_HI(%a0) # store hi(man) 6472 clr.l FTEMP_LO(%a0) # lo(man) is now zero 6473 6474 mov.l %d2, %d0 # return shift amount 6475 6476 mov.l (%sp)+, %d3 # restore temp regs 6477 mov.l (%sp)+, %d2 6478 6479 rts 6480 6481######################################################################### 6482# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO # 6483# - returns corresponding optype tag # 6484# # 6485# XDEF **************************************************************** # 6486# unnorm_fix() # 6487# # 6488# XREF **************************************************************** # 6489# norm() - normalize the mantissa # 6490# # 6491# INPUT *************************************************************** # 6492# a0 = pointer to unnormalized extended precision number # 6493# # 6494# OUTPUT ************************************************************** # 6495# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO # 6496# a0 = input operand has been converted to a norm, denorm, or # 6497# zero; both the exponent and mantissa are changed. # 6498# # 6499######################################################################### 6500 6501 global unnorm_fix 6502unnorm_fix: 6503 bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed? 6504 bne.b unnorm_shift # hi(man) is not all zeroes 6505 6506# 6507# hi(man) is all zeroes so see if any bits in lo(man) are set 6508# 6509unnorm_chk_lo: 6510 bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero? 6511 beq.w unnorm_zero # yes 6512 6513 add.w &32, %d0 # no; fix shift distance 6514 6515# 6516# d0 = # shifts needed for complete normalization 6517# 6518unnorm_shift: 6519 clr.l %d1 # clear top word 6520 mov.w FTEMP_EX(%a0), %d1 # extract exponent 6521 and.w &0x7fff, %d1 # strip off sgn 6522 6523 cmp.w %d0, %d1 # will denorm push exp < 0? 6524 bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0 6525 6526# 6527# exponent would not go < 0. Therefore, number stays normalized 6528# 6529 sub.w %d0, %d1 # shift exponent value 6530 mov.w FTEMP_EX(%a0), %d0 # load old exponent 6531 and.w &0x8000, %d0 # save old sign 6532 or.w %d0, %d1 # {sgn,new exp} 6533 mov.w %d1, FTEMP_EX(%a0) # insert new exponent 6534 6535 bsr.l norm # normalize UNNORM 6536 6537 mov.b &NORM, %d0 # return new optype tag 6538 rts 6539 6540# 6541# exponent would go < 0, so only denormalize until exp = 0 6542# 6543unnorm_nrm_zero: 6544 cmp.b %d1, &32 # is exp <= 32? 6545 bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent 6546 6547 bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man) 6548 mov.l %d0, FTEMP_HI(%a0) # save new hi(man) 6549 6550 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) 6551 lsl.l %d1, %d0 # extract new lo(man) 6552 mov.l %d0, FTEMP_LO(%a0) # save new lo(man) 6553 6554 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 6555 6556 mov.b &DENORM, %d0 # return new optype tag 6557 rts 6558 6559# 6560# only mantissa bits set are in lo(man) 6561# 6562unnorm_nrm_zero_lrg: 6563 sub.w &32, %d1 # adjust shft amt by 32 6564 6565 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) 6566 lsl.l %d1, %d0 # left shift lo(man) 6567 6568 mov.l %d0, FTEMP_HI(%a0) # store new hi(man) 6569 clr.l FTEMP_LO(%a0) # lo(man) = 0 6570 6571 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 6572 6573 mov.b &DENORM, %d0 # return new optype tag 6574 rts 6575 6576# 6577# whole mantissa is zero so this UNNORM is actually a zero 6578# 6579unnorm_zero: 6580 and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero 6581 6582 mov.b &ZERO, %d0 # fix optype tag 6583 rts 6584 6585######################################################################### 6586# XDEF **************************************************************** # 6587# set_tag_x(): return the optype of the input ext fp number # 6588# # 6589# XREF **************************************************************** # 6590# None # 6591# # 6592# INPUT *************************************************************** # 6593# a0 = pointer to extended precision operand # 6594# # 6595# OUTPUT ************************************************************** # 6596# d0 = value of type tag # 6597# one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO # 6598# # 6599# ALGORITHM *********************************************************** # 6600# Simply test the exponent, j-bit, and mantissa values to # 6601# determine the type of operand. # 6602# If it's an unnormalized zero, alter the operand and force it # 6603# to be a normal zero. # 6604# # 6605######################################################################### 6606 6607 global set_tag_x 6608set_tag_x: 6609 mov.w FTEMP_EX(%a0), %d0 # extract exponent 6610 andi.w &0x7fff, %d0 # strip off sign 6611 cmpi.w %d0, &0x7fff # is (EXP == MAX)? 6612 beq.b inf_or_nan_x 6613not_inf_or_nan_x: 6614 btst &0x7,FTEMP_HI(%a0) 6615 beq.b not_norm_x 6616is_norm_x: 6617 mov.b &NORM, %d0 6618 rts 6619not_norm_x: 6620 tst.w %d0 # is exponent = 0? 6621 bne.b is_unnorm_x 6622not_unnorm_x: 6623 tst.l FTEMP_HI(%a0) 6624 bne.b is_denorm_x 6625 tst.l FTEMP_LO(%a0) 6626 bne.b is_denorm_x 6627is_zero_x: 6628 mov.b &ZERO, %d0 6629 rts 6630is_denorm_x: 6631 mov.b &DENORM, %d0 6632 rts 6633# must distinguish now "Unnormalized zeroes" which we 6634# must convert to zero. 6635is_unnorm_x: 6636 tst.l FTEMP_HI(%a0) 6637 bne.b is_unnorm_reg_x 6638 tst.l FTEMP_LO(%a0) 6639 bne.b is_unnorm_reg_x 6640# it's an "unnormalized zero". let's convert it to an actual zero... 6641 andi.w &0x8000,FTEMP_EX(%a0) # clear exponent 6642 mov.b &ZERO, %d0 6643 rts 6644is_unnorm_reg_x: 6645 mov.b &UNNORM, %d0 6646 rts 6647inf_or_nan_x: 6648 tst.l FTEMP_LO(%a0) 6649 bne.b is_nan_x 6650 mov.l FTEMP_HI(%a0), %d0 6651 and.l &0x7fffffff, %d0 # msb is a don't care! 6652 bne.b is_nan_x 6653is_inf_x: 6654 mov.b &INF, %d0 6655 rts 6656is_nan_x: 6657 btst &0x6, FTEMP_HI(%a0) 6658 beq.b is_snan_x 6659 mov.b &QNAN, %d0 6660 rts 6661is_snan_x: 6662 mov.b &SNAN, %d0 6663 rts 6664 6665######################################################################### 6666# XDEF **************************************************************** # 6667# set_tag_d(): return the optype of the input dbl fp number # 6668# # 6669# XREF **************************************************************** # 6670# None # 6671# # 6672# INPUT *************************************************************** # 6673# a0 = points to double precision operand # 6674# # 6675# OUTPUT ************************************************************** # 6676# d0 = value of type tag # 6677# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # 6678# # 6679# ALGORITHM *********************************************************** # 6680# Simply test the exponent, j-bit, and mantissa values to # 6681# determine the type of operand. # 6682# # 6683######################################################################### 6684 6685 global set_tag_d 6686set_tag_d: 6687 mov.l FTEMP(%a0), %d0 6688 mov.l %d0, %d1 6689 6690 andi.l &0x7ff00000, %d0 6691 beq.b zero_or_denorm_d 6692 6693 cmpi.l %d0, &0x7ff00000 6694 beq.b inf_or_nan_d 6695 6696is_norm_d: 6697 mov.b &NORM, %d0 6698 rts 6699zero_or_denorm_d: 6700 and.l &0x000fffff, %d1 6701 bne is_denorm_d 6702 tst.l 4+FTEMP(%a0) 6703 bne is_denorm_d 6704is_zero_d: 6705 mov.b &ZERO, %d0 6706 rts 6707is_denorm_d: 6708 mov.b &DENORM, %d0 6709 rts 6710inf_or_nan_d: 6711 and.l &0x000fffff, %d1 6712 bne is_nan_d 6713 tst.l 4+FTEMP(%a0) 6714 bne is_nan_d 6715is_inf_d: 6716 mov.b &INF, %d0 6717 rts 6718is_nan_d: 6719 btst &19, %d1 6720 bne is_qnan_d 6721is_snan_d: 6722 mov.b &SNAN, %d0 6723 rts 6724is_qnan_d: 6725 mov.b &QNAN, %d0 6726 rts 6727 6728######################################################################### 6729# XDEF **************************************************************** # 6730# set_tag_s(): return the optype of the input sgl fp number # 6731# # 6732# XREF **************************************************************** # 6733# None # 6734# # 6735# INPUT *************************************************************** # 6736# a0 = pointer to single precision operand # 6737# # 6738# OUTPUT ************************************************************** # 6739# d0 = value of type tag # 6740# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # 6741# # 6742# ALGORITHM *********************************************************** # 6743# Simply test the exponent, j-bit, and mantissa values to # 6744# determine the type of operand. # 6745# # 6746######################################################################### 6747 6748 global set_tag_s 6749set_tag_s: 6750 mov.l FTEMP(%a0), %d0 6751 mov.l %d0, %d1 6752 6753 andi.l &0x7f800000, %d0 6754 beq.b zero_or_denorm_s 6755 6756 cmpi.l %d0, &0x7f800000 6757 beq.b inf_or_nan_s 6758 6759is_norm_s: 6760 mov.b &NORM, %d0 6761 rts 6762zero_or_denorm_s: 6763 and.l &0x007fffff, %d1 6764 bne is_denorm_s 6765is_zero_s: 6766 mov.b &ZERO, %d0 6767 rts 6768is_denorm_s: 6769 mov.b &DENORM, %d0 6770 rts 6771inf_or_nan_s: 6772 and.l &0x007fffff, %d1 6773 bne is_nan_s 6774is_inf_s: 6775 mov.b &INF, %d0 6776 rts 6777is_nan_s: 6778 btst &22, %d1 6779 bne is_qnan_s 6780is_snan_s: 6781 mov.b &SNAN, %d0 6782 rts 6783is_qnan_s: 6784 mov.b &QNAN, %d0 6785 rts 6786 6787######################################################################### 6788# XDEF **************************************************************** # 6789# unf_res(): routine to produce default underflow result of a # 6790# scaled extended precision number; this is used by # 6791# fadd/fdiv/fmul/etc. emulation routines. # 6792# unf_res4(): same as above but for fsglmul/fsgldiv which use # 6793# single round prec and extended prec mode. # 6794# # 6795# XREF **************************************************************** # 6796# _denorm() - denormalize according to scale factor # 6797# _round() - round denormalized number according to rnd prec # 6798# # 6799# INPUT *************************************************************** # 6800# a0 = pointer to extended precison operand # 6801# d0 = scale factor # 6802# d1 = rounding precision/mode # 6803# # 6804# OUTPUT ************************************************************** # 6805# a0 = pointer to default underflow result in extended precision # 6806# d0.b = result FPSR_cc which caller may or may not want to save # 6807# # 6808# ALGORITHM *********************************************************** # 6809# Convert the input operand to "internal format" which means the # 6810# exponent is extended to 16 bits and the sign is stored in the unused # 6811# portion of the extended precison operand. Denormalize the number # 6812# according to the scale factor passed in d0. Then, round the # 6813# denormalized result. # 6814# Set the FPSR_exc bits as appropriate but return the cc bits in # 6815# d0 in case the caller doesn't want to save them (as is the case for # 6816# fmove out). # 6817# unf_res4() for fsglmul/fsgldiv forces the denorm to extended # 6818# precision and the rounding mode to single. # 6819# # 6820######################################################################### 6821 global unf_res 6822unf_res: 6823 mov.l %d1, -(%sp) # save rnd prec,mode on stack 6824 6825 btst &0x7, FTEMP_EX(%a0) # make "internal" format 6826 sne FTEMP_SGN(%a0) 6827 6828 mov.w FTEMP_EX(%a0), %d1 # extract exponent 6829 and.w &0x7fff, %d1 6830 sub.w %d0, %d1 6831 mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent 6832 6833 mov.l %a0, -(%sp) # save operand ptr during calls 6834 6835 mov.l 0x4(%sp),%d0 # pass rnd prec. 6836 andi.w &0x00c0,%d0 6837 lsr.w &0x4,%d0 6838 bsr.l _denorm # denorm result 6839 6840 mov.l (%sp),%a0 6841 mov.w 0x6(%sp),%d1 # load prec:mode into %d1 6842 andi.w &0xc0,%d1 # extract rnd prec 6843 lsr.w &0x4,%d1 6844 swap %d1 6845 mov.w 0x6(%sp),%d1 6846 andi.w &0x30,%d1 6847 lsr.w &0x4,%d1 6848 bsr.l _round # round the denorm 6849 6850 mov.l (%sp)+, %a0 6851 6852# result is now rounded properly. convert back to normal format 6853 bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue 6854 tst.b FTEMP_SGN(%a0) # is "internal result" sign set? 6855 beq.b unf_res_chkifzero # no; result is positive 6856 bset &0x7, FTEMP_EX(%a0) # set result sgn 6857 clr.b FTEMP_SGN(%a0) # clear temp sign 6858 6859# the number may have become zero after rounding. set ccodes accordingly. 6860unf_res_chkifzero: 6861 clr.l %d0 6862 tst.l FTEMP_HI(%a0) # is value now a zero? 6863 bne.b unf_res_cont # no 6864 tst.l FTEMP_LO(%a0) 6865 bne.b unf_res_cont # no 6866# bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit 6867 bset &z_bit, %d0 # yes; set zero ccode bit 6868 6869unf_res_cont: 6870 6871# 6872# can inex1 also be set along with unfl and inex2??? 6873# 6874# we know that underflow has occurred. aunfl should be set if INEX2 is also set. 6875# 6876 btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set? 6877 beq.b unf_res_end # no 6878 bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl 6879 6880unf_res_end: 6881 add.l &0x4, %sp # clear stack 6882 rts 6883 6884# unf_res() for fsglmul() and fsgldiv(). 6885 global unf_res4 6886unf_res4: 6887 mov.l %d1,-(%sp) # save rnd prec,mode on stack 6888 6889 btst &0x7,FTEMP_EX(%a0) # make "internal" format 6890 sne FTEMP_SGN(%a0) 6891 6892 mov.w FTEMP_EX(%a0),%d1 # extract exponent 6893 and.w &0x7fff,%d1 6894 sub.w %d0,%d1 6895 mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent 6896 6897 mov.l %a0,-(%sp) # save operand ptr during calls 6898 6899 clr.l %d0 # force rnd prec = ext 6900 bsr.l _denorm # denorm result 6901 6902 mov.l (%sp),%a0 6903 mov.w &s_mode,%d1 # force rnd prec = sgl 6904 swap %d1 6905 mov.w 0x6(%sp),%d1 # load rnd mode 6906 andi.w &0x30,%d1 # extract rnd prec 6907 lsr.w &0x4,%d1 6908 bsr.l _round # round the denorm 6909 6910 mov.l (%sp)+,%a0 6911 6912# result is now rounded properly. convert back to normal format 6913 bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue 6914 tst.b FTEMP_SGN(%a0) # is "internal result" sign set? 6915 beq.b unf_res4_chkifzero # no; result is positive 6916 bset &0x7,FTEMP_EX(%a0) # set result sgn 6917 clr.b FTEMP_SGN(%a0) # clear temp sign 6918 6919# the number may have become zero after rounding. set ccodes accordingly. 6920unf_res4_chkifzero: 6921 clr.l %d0 6922 tst.l FTEMP_HI(%a0) # is value now a zero? 6923 bne.b unf_res4_cont # no 6924 tst.l FTEMP_LO(%a0) 6925 bne.b unf_res4_cont # no 6926# bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit 6927 bset &z_bit,%d0 # yes; set zero ccode bit 6928 6929unf_res4_cont: 6930 6931# 6932# can inex1 also be set along with unfl and inex2??? 6933# 6934# we know that underflow has occurred. aunfl should be set if INEX2 is also set. 6935# 6936 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 6937 beq.b unf_res4_end # no 6938 bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl 6939 6940unf_res4_end: 6941 add.l &0x4,%sp # clear stack 6942 rts 6943 6944######################################################################### 6945# XDEF **************************************************************** # 6946# ovf_res(): routine to produce the default overflow result of # 6947# an overflowing number. # 6948# ovf_res2(): same as above but the rnd mode/prec are passed # 6949# differently. # 6950# # 6951# XREF **************************************************************** # 6952# none # 6953# # 6954# INPUT *************************************************************** # 6955# d1.b = '-1' => (-); '0' => (+) # 6956# ovf_res(): # 6957# d0 = rnd mode/prec # 6958# ovf_res2(): # 6959# hi(d0) = rnd prec # 6960# lo(d0) = rnd mode # 6961# # 6962# OUTPUT ************************************************************** # 6963# a0 = points to extended precision result # 6964# d0.b = condition code bits # 6965# # 6966# ALGORITHM *********************************************************** # 6967# The default overflow result can be determined by the sign of # 6968# the result and the rounding mode/prec in effect. These bits are # 6969# concatenated together to create an index into the default result # 6970# table. A pointer to the correct result is returned in a0. The # 6971# resulting condition codes are returned in d0 in case the caller # 6972# doesn't want FPSR_cc altered (as is the case for fmove out). # 6973# # 6974######################################################################### 6975 6976 global ovf_res 6977ovf_res: 6978 andi.w &0x10,%d1 # keep result sign 6979 lsr.b &0x4,%d0 # shift prec/mode 6980 or.b %d0,%d1 # concat the two 6981 mov.w %d1,%d0 # make a copy 6982 lsl.b &0x1,%d1 # multiply d1 by 2 6983 bra.b ovf_res_load 6984 6985 global ovf_res2 6986ovf_res2: 6987 and.w &0x10, %d1 # keep result sign 6988 or.b %d0, %d1 # insert rnd mode 6989 swap %d0 6990 or.b %d0, %d1 # insert rnd prec 6991 mov.w %d1, %d0 # make a copy 6992 lsl.b &0x1, %d1 # shift left by 1 6993 6994# 6995# use the rounding mode, precision, and result sign as in index into the 6996# two tables below to fetch the default result and the result ccodes. 6997# 6998ovf_res_load: 6999 mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes 7000 lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr 7001 7002 rts 7003 7004tbl_ovfl_cc: 7005 byte 0x2, 0x0, 0x0, 0x2 7006 byte 0x2, 0x0, 0x0, 0x2 7007 byte 0x2, 0x0, 0x0, 0x2 7008 byte 0x0, 0x0, 0x0, 0x0 7009 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 7010 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 7011 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8 7012 7013tbl_ovfl_result: 7014 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 7015 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ 7016 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM 7017 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 7018 7019 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 7020 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ 7021 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM 7022 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 7023 7024 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN 7025 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ 7026 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM 7027 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP 7028 7029 long 0x00000000,0x00000000,0x00000000,0x00000000 7030 long 0x00000000,0x00000000,0x00000000,0x00000000 7031 long 0x00000000,0x00000000,0x00000000,0x00000000 7032 long 0x00000000,0x00000000,0x00000000,0x00000000 7033 7034 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 7035 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ 7036 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 7037 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP 7038 7039 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 7040 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ 7041 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 7042 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP 7043 7044 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN 7045 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ 7046 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM 7047 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP 7048 7049######################################################################### 7050# XDEF **************************************************************** # 7051# fout(): move from fp register to memory or data register # 7052# # 7053# XREF **************************************************************** # 7054# _round() - needed to create EXOP for sgl/dbl precision # 7055# norm() - needed to create EXOP for extended precision # 7056# ovf_res() - create default overflow result for sgl/dbl precision# 7057# unf_res() - create default underflow result for sgl/dbl prec. # 7058# dst_dbl() - create rounded dbl precision result. # 7059# dst_sgl() - create rounded sgl precision result. # 7060# fetch_dreg() - fetch dynamic k-factor reg for packed. # 7061# bindec() - convert FP binary number to packed number. # 7062# _mem_write() - write data to memory. # 7063# _mem_write2() - write data to memory unless supv mode -(a7) exc.# 7064# _dmem_write_{byte,word,long}() - write data to memory. # 7065# store_dreg_{b,w,l}() - store data to data register file. # 7066# facc_out_{b,w,l,d,x}() - data access error occurred. # 7067# # 7068# INPUT *************************************************************** # 7069# a0 = pointer to extended precision source operand # 7070# d0 = round prec,mode # 7071# # 7072# OUTPUT ************************************************************** # 7073# fp0 : intermediate underflow or overflow result if # 7074# OVFL/UNFL occurred for a sgl or dbl operand # 7075# # 7076# ALGORITHM *********************************************************** # 7077# This routine is accessed by many handlers that need to do an # 7078# opclass three move of an operand out to memory. # 7079# Decode an fmove out (opclass 3) instruction to determine if # 7080# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data # 7081# register or memory. The algorithm uses a standard "fmove" to create # 7082# the rounded result. Also, since exceptions are disabled, this also # 7083# create the correct OPERR default result if appropriate. # 7084# For sgl or dbl precision, overflow or underflow can occur. If # 7085# either occurs and is enabled, the EXOP. # 7086# For extended precision, the stacked <ea> must be fixed along # 7087# w/ the address index register as appropriate w/ _calc_ea_fout(). If # 7088# the source is a denorm and if underflow is enabled, an EXOP must be # 7089# created. # 7090# For packed, the k-factor must be fetched from the instruction # 7091# word or a data register. The <ea> must be fixed as w/ extended # 7092# precision. Then, bindec() is called to create the appropriate # 7093# packed result. # 7094# If at any time an access error is flagged by one of the move- # 7095# to-memory routines, then a special exit must be made so that the # 7096# access error can be handled properly. # 7097# # 7098######################################################################### 7099 7100 global fout 7101fout: 7102 bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt 7103 mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index 7104 jmp (tbl_fout.b,%pc,%a1) # jump to routine 7105 7106 swbeg &0x8 7107tbl_fout: 7108 short fout_long - tbl_fout 7109 short fout_sgl - tbl_fout 7110 short fout_ext - tbl_fout 7111 short fout_pack - tbl_fout 7112 short fout_word - tbl_fout 7113 short fout_dbl - tbl_fout 7114 short fout_byte - tbl_fout 7115 short fout_pack - tbl_fout 7116 7117################################################################# 7118# fmove.b out ################################################### 7119################################################################# 7120 7121# Only "Unimplemented Data Type" exceptions enter here. The operand 7122# is either a DENORM or a NORM. 7123fout_byte: 7124 tst.b STAG(%a6) # is operand normalized? 7125 bne.b fout_byte_denorm # no 7126 7127 fmovm.x SRC(%a0),&0x80 # load value 7128 7129fout_byte_norm: 7130 fmov.l %d0,%fpcr # insert rnd prec,mode 7131 7132 fmov.b %fp0,%d0 # exec move out w/ correct rnd mode 7133 7134 fmov.l &0x0,%fpcr # clear FPCR 7135 fmov.l %fpsr,%d1 # fetch FPSR 7136 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 7137 7138 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7139 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7140 beq.b fout_byte_dn # must save to integer regfile 7141 7142 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7143 bsr.l _dmem_write_byte # write byte 7144 7145 tst.l %d1 # did dstore fail? 7146 bne.l facc_out_b # yes 7147 7148 rts 7149 7150fout_byte_dn: 7151 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7152 andi.w &0x7,%d1 7153 bsr.l store_dreg_b 7154 rts 7155 7156fout_byte_denorm: 7157 mov.l SRC_EX(%a0),%d1 7158 andi.l &0x80000000,%d1 # keep DENORM sign 7159 ori.l &0x00800000,%d1 # make smallest sgl 7160 fmov.s %d1,%fp0 7161 bra.b fout_byte_norm 7162 7163################################################################# 7164# fmove.w out ################################################### 7165################################################################# 7166 7167# Only "Unimplemented Data Type" exceptions enter here. The operand 7168# is either a DENORM or a NORM. 7169fout_word: 7170 tst.b STAG(%a6) # is operand normalized? 7171 bne.b fout_word_denorm # no 7172 7173 fmovm.x SRC(%a0),&0x80 # load value 7174 7175fout_word_norm: 7176 fmov.l %d0,%fpcr # insert rnd prec:mode 7177 7178 fmov.w %fp0,%d0 # exec move out w/ correct rnd mode 7179 7180 fmov.l &0x0,%fpcr # clear FPCR 7181 fmov.l %fpsr,%d1 # fetch FPSR 7182 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 7183 7184 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7185 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7186 beq.b fout_word_dn # must save to integer regfile 7187 7188 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7189 bsr.l _dmem_write_word # write word 7190 7191 tst.l %d1 # did dstore fail? 7192 bne.l facc_out_w # yes 7193 7194 rts 7195 7196fout_word_dn: 7197 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7198 andi.w &0x7,%d1 7199 bsr.l store_dreg_w 7200 rts 7201 7202fout_word_denorm: 7203 mov.l SRC_EX(%a0),%d1 7204 andi.l &0x80000000,%d1 # keep DENORM sign 7205 ori.l &0x00800000,%d1 # make smallest sgl 7206 fmov.s %d1,%fp0 7207 bra.b fout_word_norm 7208 7209################################################################# 7210# fmove.l out ################################################### 7211################################################################# 7212 7213# Only "Unimplemented Data Type" exceptions enter here. The operand 7214# is either a DENORM or a NORM. 7215fout_long: 7216 tst.b STAG(%a6) # is operand normalized? 7217 bne.b fout_long_denorm # no 7218 7219 fmovm.x SRC(%a0),&0x80 # load value 7220 7221fout_long_norm: 7222 fmov.l %d0,%fpcr # insert rnd prec:mode 7223 7224 fmov.l %fp0,%d0 # exec move out w/ correct rnd mode 7225 7226 fmov.l &0x0,%fpcr # clear FPCR 7227 fmov.l %fpsr,%d1 # fetch FPSR 7228 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits 7229 7230fout_long_write: 7231 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7232 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7233 beq.b fout_long_dn # must save to integer regfile 7234 7235 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7236 bsr.l _dmem_write_long # write long 7237 7238 tst.l %d1 # did dstore fail? 7239 bne.l facc_out_l # yes 7240 7241 rts 7242 7243fout_long_dn: 7244 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7245 andi.w &0x7,%d1 7246 bsr.l store_dreg_l 7247 rts 7248 7249fout_long_denorm: 7250 mov.l SRC_EX(%a0),%d1 7251 andi.l &0x80000000,%d1 # keep DENORM sign 7252 ori.l &0x00800000,%d1 # make smallest sgl 7253 fmov.s %d1,%fp0 7254 bra.b fout_long_norm 7255 7256################################################################# 7257# fmove.x out ################################################### 7258################################################################# 7259 7260# Only "Unimplemented Data Type" exceptions enter here. The operand 7261# is either a DENORM or a NORM. 7262# The DENORM causes an Underflow exception. 7263fout_ext: 7264 7265# we copy the extended precision result to FP_SCR0 so that the reserved 7266# 16-bit field gets zeroed. we do this since we promise not to disturb 7267# what's at SRC(a0). 7268 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7269 clr.w 2+FP_SCR0_EX(%a6) # clear reserved field 7270 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7271 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7272 7273 fmovm.x SRC(%a0),&0x80 # return result 7274 7275 bsr.l _calc_ea_fout # fix stacked <ea> 7276 7277 mov.l %a0,%a1 # pass: dst addr 7278 lea FP_SCR0(%a6),%a0 # pass: src addr 7279 mov.l &0xc,%d0 # pass: opsize is 12 bytes 7280 7281# we must not yet write the extended precision data to the stack 7282# in the pre-decrement case from supervisor mode or else we'll corrupt 7283# the stack frame. so, leave it in FP_SRC for now and deal with it later... 7284 cmpi.b SPCOND_FLG(%a6),&mda7_flg 7285 beq.b fout_ext_a7 7286 7287 bsr.l _dmem_write # write ext prec number to memory 7288 7289 tst.l %d1 # did dstore fail? 7290 bne.w fout_ext_err # yes 7291 7292 tst.b STAG(%a6) # is operand normalized? 7293 bne.b fout_ext_denorm # no 7294 rts 7295 7296# the number is a DENORM. must set the underflow exception bit 7297fout_ext_denorm: 7298 bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit 7299 7300 mov.b FPCR_ENABLE(%a6),%d0 7301 andi.b &0x0a,%d0 # is UNFL or INEX enabled? 7302 bne.b fout_ext_exc # yes 7303 rts 7304 7305# we don't want to do the write if the exception occurred in supervisor mode 7306# so _mem_write2() handles this for us. 7307fout_ext_a7: 7308 bsr.l _mem_write2 # write ext prec number to memory 7309 7310 tst.l %d1 # did dstore fail? 7311 bne.w fout_ext_err # yes 7312 7313 tst.b STAG(%a6) # is operand normalized? 7314 bne.b fout_ext_denorm # no 7315 rts 7316 7317fout_ext_exc: 7318 lea FP_SCR0(%a6),%a0 7319 bsr.l norm # normalize the mantissa 7320 neg.w %d0 # new exp = -(shft amt) 7321 andi.w &0x7fff,%d0 7322 andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign 7323 or.w %d0,FP_SCR0_EX(%a6) # insert new exponent 7324 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 7325 rts 7326 7327fout_ext_err: 7328 mov.l EXC_A6(%a6),(%a6) # fix stacked a6 7329 bra.l facc_out_x 7330 7331######################################################################### 7332# fmove.s out ########################################################### 7333######################################################################### 7334fout_sgl: 7335 andi.b &0x30,%d0 # clear rnd prec 7336 ori.b &s_mode*0x10,%d0 # insert sgl prec 7337 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack 7338 7339# 7340# operand is a normalized number. first, we check to see if the move out 7341# would cause either an underflow or overflow. these cases are handled 7342# separately. otherwise, set the FPCR to the proper rounding mode and 7343# execute the move. 7344# 7345 mov.w SRC_EX(%a0),%d0 # extract exponent 7346 andi.w &0x7fff,%d0 # strip sign 7347 7348 cmpi.w %d0,&SGL_HI # will operand overflow? 7349 bgt.w fout_sgl_ovfl # yes; go handle OVFL 7350 beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL 7351 cmpi.w %d0,&SGL_LO # will operand underflow? 7352 blt.w fout_sgl_unfl # yes; go handle underflow 7353 7354# 7355# NORMs(in range) can be stored out by a simple "fmov.s" 7356# Unnormalized inputs can come through this point. 7357# 7358fout_sgl_exg: 7359 fmovm.x SRC(%a0),&0x80 # fetch fop from stack 7360 7361 fmov.l L_SCR3(%a6),%fpcr # set FPCR 7362 fmov.l &0x0,%fpsr # clear FPSR 7363 7364 fmov.s %fp0,%d0 # store does convert and round 7365 7366 fmov.l &0x0,%fpcr # clear FPCR 7367 fmov.l %fpsr,%d1 # save FPSR 7368 7369 or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex 7370 7371fout_sgl_exg_write: 7372 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7373 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7374 beq.b fout_sgl_exg_write_dn # must save to integer regfile 7375 7376 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7377 bsr.l _dmem_write_long # write long 7378 7379 tst.l %d1 # did dstore fail? 7380 bne.l facc_out_l # yes 7381 7382 rts 7383 7384fout_sgl_exg_write_dn: 7385 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7386 andi.w &0x7,%d1 7387 bsr.l store_dreg_l 7388 rts 7389 7390# 7391# here, we know that the operand would UNFL if moved out to single prec, 7392# so, denorm and round and then use generic store single routine to 7393# write the value to memory. 7394# 7395fout_sgl_unfl: 7396 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL 7397 7398 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7399 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7400 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7401 mov.l %a0,-(%sp) 7402 7403 clr.l %d0 # pass: S.F. = 0 7404 7405 cmpi.b STAG(%a6),&DENORM # fetch src optype tag 7406 bne.b fout_sgl_unfl_cont # let DENORMs fall through 7407 7408 lea FP_SCR0(%a6),%a0 7409 bsr.l norm # normalize the DENORM 7410 7411fout_sgl_unfl_cont: 7412 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 7413 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 7414 bsr.l unf_res # calc default underflow result 7415 7416 lea FP_SCR0(%a6),%a0 # pass: ptr to fop 7417 bsr.l dst_sgl # convert to single prec 7418 7419 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7420 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7421 beq.b fout_sgl_unfl_dn # must save to integer regfile 7422 7423 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7424 bsr.l _dmem_write_long # write long 7425 7426 tst.l %d1 # did dstore fail? 7427 bne.l facc_out_l # yes 7428 7429 bra.b fout_sgl_unfl_chkexc 7430 7431fout_sgl_unfl_dn: 7432 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7433 andi.w &0x7,%d1 7434 bsr.l store_dreg_l 7435 7436fout_sgl_unfl_chkexc: 7437 mov.b FPCR_ENABLE(%a6),%d1 7438 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 7439 bne.w fout_sd_exc_unfl # yes 7440 addq.l &0x4,%sp 7441 rts 7442 7443# 7444# it's definitely an overflow so call ovf_res to get the correct answer 7445# 7446fout_sgl_ovfl: 7447 tst.b 3+SRC_HI(%a0) # is result inexact? 7448 bne.b fout_sgl_ovfl_inex2 7449 tst.l SRC_LO(%a0) # is result inexact? 7450 bne.b fout_sgl_ovfl_inex2 7451 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 7452 bra.b fout_sgl_ovfl_cont 7453fout_sgl_ovfl_inex2: 7454 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2 7455 7456fout_sgl_ovfl_cont: 7457 mov.l %a0,-(%sp) 7458 7459# call ovf_res() w/ sgl prec and the correct rnd mode to create the default 7460# overflow result. DON'T save the returned ccodes from ovf_res() since 7461# fmove out doesn't alter them. 7462 tst.b SRC_EX(%a0) # is operand negative? 7463 smi %d1 # set if so 7464 mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode 7465 bsr.l ovf_res # calc OVFL result 7466 fmovm.x (%a0),&0x80 # load default overflow result 7467 fmov.s %fp0,%d0 # store to single 7468 7469 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode 7470 andi.b &0x38,%d1 # is mode == 0? (Dreg dst) 7471 beq.b fout_sgl_ovfl_dn # must save to integer regfile 7472 7473 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct 7474 bsr.l _dmem_write_long # write long 7475 7476 tst.l %d1 # did dstore fail? 7477 bne.l facc_out_l # yes 7478 7479 bra.b fout_sgl_ovfl_chkexc 7480 7481fout_sgl_ovfl_dn: 7482 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn 7483 andi.w &0x7,%d1 7484 bsr.l store_dreg_l 7485 7486fout_sgl_ovfl_chkexc: 7487 mov.b FPCR_ENABLE(%a6),%d1 7488 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 7489 bne.w fout_sd_exc_ovfl # yes 7490 addq.l &0x4,%sp 7491 rts 7492 7493# 7494# move out MAY overflow: 7495# (1) force the exp to 0x3fff 7496# (2) do a move w/ appropriate rnd mode 7497# (3) if exp still equals zero, then insert original exponent 7498# for the correct result. 7499# if exp now equals one, then it overflowed so call ovf_res. 7500# 7501fout_sgl_may_ovfl: 7502 mov.w SRC_EX(%a0),%d1 # fetch current sign 7503 andi.w &0x8000,%d1 # keep it,clear exp 7504 ori.w &0x3fff,%d1 # insert exp = 0 7505 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp 7506 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man) 7507 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man) 7508 7509 fmov.l L_SCR3(%a6),%fpcr # set FPCR 7510 7511 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded 7512 fmov.l &0x0,%fpcr # clear FPCR 7513 7514 fabs.x %fp0 # need absolute value 7515 fcmp.b %fp0,&0x2 # did exponent increase? 7516 fblt.w fout_sgl_exg # no; go finish NORM 7517 bra.w fout_sgl_ovfl # yes; go handle overflow 7518 7519################ 7520 7521fout_sd_exc_unfl: 7522 mov.l (%sp)+,%a0 7523 7524 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7525 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7526 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7527 7528 cmpi.b STAG(%a6),&DENORM # was src a DENORM? 7529 bne.b fout_sd_exc_cont # no 7530 7531 lea FP_SCR0(%a6),%a0 7532 bsr.l norm 7533 neg.l %d0 7534 andi.w &0x7fff,%d0 7535 bfins %d0,FP_SCR0_EX(%a6){&1:&15} 7536 bra.b fout_sd_exc_cont 7537 7538fout_sd_exc: 7539fout_sd_exc_ovfl: 7540 mov.l (%sp)+,%a0 # restore a0 7541 7542 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7543 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7544 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7545 7546fout_sd_exc_cont: 7547 bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit 7548 sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit 7549 lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM 7550 7551 mov.b 3+L_SCR3(%a6),%d1 7552 lsr.b &0x4,%d1 7553 andi.w &0x0c,%d1 7554 swap %d1 7555 mov.b 3+L_SCR3(%a6),%d1 7556 lsr.b &0x4,%d1 7557 andi.w &0x03,%d1 7558 clr.l %d0 # pass: zero g,r,s 7559 bsr.l _round # round the DENORM 7560 7561 tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative? 7562 beq.b fout_sd_exc_done # no 7563 bset &0x7,FP_SCR0_EX(%a6) # yes 7564 7565fout_sd_exc_done: 7566 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 7567 rts 7568 7569################################################################# 7570# fmove.d out ################################################### 7571################################################################# 7572fout_dbl: 7573 andi.b &0x30,%d0 # clear rnd prec 7574 ori.b &d_mode*0x10,%d0 # insert dbl prec 7575 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack 7576 7577# 7578# operand is a normalized number. first, we check to see if the move out 7579# would cause either an underflow or overflow. these cases are handled 7580# separately. otherwise, set the FPCR to the proper rounding mode and 7581# execute the move. 7582# 7583 mov.w SRC_EX(%a0),%d0 # extract exponent 7584 andi.w &0x7fff,%d0 # strip sign 7585 7586 cmpi.w %d0,&DBL_HI # will operand overflow? 7587 bgt.w fout_dbl_ovfl # yes; go handle OVFL 7588 beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL 7589 cmpi.w %d0,&DBL_LO # will operand underflow? 7590 blt.w fout_dbl_unfl # yes; go handle underflow 7591 7592# 7593# NORMs(in range) can be stored out by a simple "fmov.d" 7594# Unnormalized inputs can come through this point. 7595# 7596fout_dbl_exg: 7597 fmovm.x SRC(%a0),&0x80 # fetch fop from stack 7598 7599 fmov.l L_SCR3(%a6),%fpcr # set FPCR 7600 fmov.l &0x0,%fpsr # clear FPSR 7601 7602 fmov.d %fp0,L_SCR1(%a6) # store does convert and round 7603 7604 fmov.l &0x0,%fpcr # clear FPCR 7605 fmov.l %fpsr,%d0 # save FPSR 7606 7607 or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex 7608 7609 mov.l EXC_EA(%a6),%a1 # pass: dst addr 7610 lea L_SCR1(%a6),%a0 # pass: src addr 7611 movq.l &0x8,%d0 # pass: opsize is 8 bytes 7612 bsr.l _dmem_write # store dbl fop to memory 7613 7614 tst.l %d1 # did dstore fail? 7615 bne.l facc_out_d # yes 7616 7617 rts # no; so we're finished 7618 7619# 7620# here, we know that the operand would UNFL if moved out to double prec, 7621# so, denorm and round and then use generic store double routine to 7622# write the value to memory. 7623# 7624fout_dbl_unfl: 7625 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL 7626 7627 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 7628 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 7629 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 7630 mov.l %a0,-(%sp) 7631 7632 clr.l %d0 # pass: S.F. = 0 7633 7634 cmpi.b STAG(%a6),&DENORM # fetch src optype tag 7635 bne.b fout_dbl_unfl_cont # let DENORMs fall through 7636 7637 lea FP_SCR0(%a6),%a0 7638 bsr.l norm # normalize the DENORM 7639 7640fout_dbl_unfl_cont: 7641 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 7642 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 7643 bsr.l unf_res # calc default underflow result 7644 7645 lea FP_SCR0(%a6),%a0 # pass: ptr to fop 7646 bsr.l dst_dbl # convert to single prec 7647 mov.l %d0,L_SCR1(%a6) 7648 mov.l %d1,L_SCR2(%a6) 7649 7650 mov.l EXC_EA(%a6),%a1 # pass: dst addr 7651 lea L_SCR1(%a6),%a0 # pass: src addr 7652 movq.l &0x8,%d0 # pass: opsize is 8 bytes 7653 bsr.l _dmem_write # store dbl fop to memory 7654 7655 tst.l %d1 # did dstore fail? 7656 bne.l facc_out_d # yes 7657 7658 mov.b FPCR_ENABLE(%a6),%d1 7659 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 7660 bne.w fout_sd_exc_unfl # yes 7661 addq.l &0x4,%sp 7662 rts 7663 7664# 7665# it's definitely an overflow so call ovf_res to get the correct answer 7666# 7667fout_dbl_ovfl: 7668 mov.w 2+SRC_LO(%a0),%d0 7669 andi.w &0x7ff,%d0 7670 bne.b fout_dbl_ovfl_inex2 7671 7672 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 7673 bra.b fout_dbl_ovfl_cont 7674fout_dbl_ovfl_inex2: 7675 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2 7676 7677fout_dbl_ovfl_cont: 7678 mov.l %a0,-(%sp) 7679 7680# call ovf_res() w/ dbl prec and the correct rnd mode to create the default 7681# overflow result. DON'T save the returned ccodes from ovf_res() since 7682# fmove out doesn't alter them. 7683 tst.b SRC_EX(%a0) # is operand negative? 7684 smi %d1 # set if so 7685 mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode 7686 bsr.l ovf_res # calc OVFL result 7687 fmovm.x (%a0),&0x80 # load default overflow result 7688 fmov.d %fp0,L_SCR1(%a6) # store to double 7689 7690 mov.l EXC_EA(%a6),%a1 # pass: dst addr 7691 lea L_SCR1(%a6),%a0 # pass: src addr 7692 movq.l &0x8,%d0 # pass: opsize is 8 bytes 7693 bsr.l _dmem_write # store dbl fop to memory 7694 7695 tst.l %d1 # did dstore fail? 7696 bne.l facc_out_d # yes 7697 7698 mov.b FPCR_ENABLE(%a6),%d1 7699 andi.b &0x0a,%d1 # is UNFL or INEX enabled? 7700 bne.w fout_sd_exc_ovfl # yes 7701 addq.l &0x4,%sp 7702 rts 7703 7704# 7705# move out MAY overflow: 7706# (1) force the exp to 0x3fff 7707# (2) do a move w/ appropriate rnd mode 7708# (3) if exp still equals zero, then insert original exponent 7709# for the correct result. 7710# if exp now equals one, then it overflowed so call ovf_res. 7711# 7712fout_dbl_may_ovfl: 7713 mov.w SRC_EX(%a0),%d1 # fetch current sign 7714 andi.w &0x8000,%d1 # keep it,clear exp 7715 ori.w &0x3fff,%d1 # insert exp = 0 7716 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp 7717 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man) 7718 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man) 7719 7720 fmov.l L_SCR3(%a6),%fpcr # set FPCR 7721 7722 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded 7723 fmov.l &0x0,%fpcr # clear FPCR 7724 7725 fabs.x %fp0 # need absolute value 7726 fcmp.b %fp0,&0x2 # did exponent increase? 7727 fblt.w fout_dbl_exg # no; go finish NORM 7728 bra.w fout_dbl_ovfl # yes; go handle overflow 7729 7730######################################################################### 7731# XDEF **************************************************************** # 7732# dst_dbl(): create double precision value from extended prec. # 7733# # 7734# XREF **************************************************************** # 7735# None # 7736# # 7737# INPUT *************************************************************** # 7738# a0 = pointer to source operand in extended precision # 7739# # 7740# OUTPUT ************************************************************** # 7741# d0 = hi(double precision result) # 7742# d1 = lo(double precision result) # 7743# # 7744# ALGORITHM *********************************************************** # 7745# # 7746# Changes extended precision to double precision. # 7747# Note: no attempt is made to round the extended value to double. # 7748# dbl_sign = ext_sign # 7749# dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) # 7750# get rid of ext integer bit # 7751# dbl_mant = ext_mant{62:12} # 7752# # 7753# --------------- --------------- --------------- # 7754# extended -> |s| exp | |1| ms mant | | ls mant | # 7755# --------------- --------------- --------------- # 7756# 95 64 63 62 32 31 11 0 # 7757# | | # 7758# | | # 7759# | | # 7760# v v # 7761# --------------- --------------- # 7762# double -> |s|exp| mant | | mant | # 7763# --------------- --------------- # 7764# 63 51 32 31 0 # 7765# # 7766######################################################################### 7767 7768dst_dbl: 7769 clr.l %d0 # clear d0 7770 mov.w FTEMP_EX(%a0),%d0 # get exponent 7771 subi.w &EXT_BIAS,%d0 # subtract extended precision bias 7772 addi.w &DBL_BIAS,%d0 # add double precision bias 7773 tst.b FTEMP_HI(%a0) # is number a denorm? 7774 bmi.b dst_get_dupper # no 7775 subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1 7776dst_get_dupper: 7777 swap %d0 # d0 now in upper word 7778 lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp 7779 tst.b FTEMP_EX(%a0) # test sign 7780 bpl.b dst_get_dman # if positive, go process mantissa 7781 bset &0x1f,%d0 # if negative, set sign 7782dst_get_dman: 7783 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 7784 bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms 7785 or.l %d1,%d0 # put these bits in ms word of double 7786 mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack 7787 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 7788 mov.l &21,%d0 # load shift count 7789 lsl.l %d0,%d1 # put lower 11 bits in upper bits 7790 mov.l %d1,L_SCR2(%a6) # build lower lword in memory 7791 mov.l FTEMP_LO(%a0),%d1 # get ls mantissa 7792 bfextu %d1{&0:&21},%d0 # get ls 21 bits of double 7793 mov.l L_SCR2(%a6),%d1 7794 or.l %d0,%d1 # put them in double result 7795 mov.l L_SCR1(%a6),%d0 7796 rts 7797 7798######################################################################### 7799# XDEF **************************************************************** # 7800# dst_sgl(): create single precision value from extended prec # 7801# # 7802# XREF **************************************************************** # 7803# # 7804# INPUT *************************************************************** # 7805# a0 = pointer to source operand in extended precision # 7806# # 7807# OUTPUT ************************************************************** # 7808# d0 = single precision result # 7809# # 7810# ALGORITHM *********************************************************** # 7811# # 7812# Changes extended precision to single precision. # 7813# sgl_sign = ext_sign # 7814# sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) # 7815# get rid of ext integer bit # 7816# sgl_mant = ext_mant{62:12} # 7817# # 7818# --------------- --------------- --------------- # 7819# extended -> |s| exp | |1| ms mant | | ls mant | # 7820# --------------- --------------- --------------- # 7821# 95 64 63 62 40 32 31 12 0 # 7822# | | # 7823# | | # 7824# | | # 7825# v v # 7826# --------------- # 7827# single -> |s|exp| mant | # 7828# --------------- # 7829# 31 22 0 # 7830# # 7831######################################################################### 7832 7833dst_sgl: 7834 clr.l %d0 7835 mov.w FTEMP_EX(%a0),%d0 # get exponent 7836 subi.w &EXT_BIAS,%d0 # subtract extended precision bias 7837 addi.w &SGL_BIAS,%d0 # add single precision bias 7838 tst.b FTEMP_HI(%a0) # is number a denorm? 7839 bmi.b dst_get_supper # no 7840 subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1 7841dst_get_supper: 7842 swap %d0 # put exp in upper word of d0 7843 lsl.l &0x7,%d0 # shift it into single exp bits 7844 tst.b FTEMP_EX(%a0) # test sign 7845 bpl.b dst_get_sman # if positive, continue 7846 bset &0x1f,%d0 # if negative, put in sign first 7847dst_get_sman: 7848 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa 7849 andi.l &0x7fffff00,%d1 # get upper 23 bits of ms 7850 lsr.l &0x8,%d1 # and put them flush right 7851 or.l %d1,%d0 # put these bits in ms word of single 7852 rts 7853 7854############################################################################## 7855fout_pack: 7856 bsr.l _calc_ea_fout # fetch the <ea> 7857 mov.l %a0,-(%sp) 7858 7859 mov.b STAG(%a6),%d0 # fetch input type 7860 bne.w fout_pack_not_norm # input is not NORM 7861 7862fout_pack_norm: 7863 btst &0x4,EXC_CMDREG(%a6) # static or dynamic? 7864 beq.b fout_pack_s # static 7865 7866fout_pack_d: 7867 mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg 7868 lsr.b &0x4,%d1 7869 andi.w &0x7,%d1 7870 7871 bsr.l fetch_dreg # fetch Dn w/ k-factor 7872 7873 bra.b fout_pack_type 7874fout_pack_s: 7875 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field 7876 7877fout_pack_type: 7878 bfexts %d0{&25:&7},%d0 # extract k-factor 7879 mov.l %d0,-(%sp) 7880 7881 lea FP_SRC(%a6),%a0 # pass: ptr to input 7882 7883# bindec is currently scrambling FP_SRC for denorm inputs. 7884# we'll have to change this, but for now, tough luck!!! 7885 bsr.l bindec # convert xprec to packed 7886 7887# andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields 7888 andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields 7889 7890 mov.l (%sp)+,%d0 7891 7892 tst.b 3+FP_SCR0_EX(%a6) 7893 bne.b fout_pack_set 7894 tst.l FP_SCR0_HI(%a6) 7895 bne.b fout_pack_set 7896 tst.l FP_SCR0_LO(%a6) 7897 bne.b fout_pack_set 7898 7899# add the extra condition that only if the k-factor was zero, too, should 7900# we zero the exponent 7901 tst.l %d0 7902 bne.b fout_pack_set 7903# "mantissa" is all zero which means that the answer is zero. but, the '040 7904# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore, 7905# if the mantissa is zero, I will zero the exponent, too. 7906# the question now is whether the exponents sign bit is allowed to be non-zero 7907# for a zero, also... 7908 andi.w &0xf000,FP_SCR0(%a6) 7909 7910fout_pack_set: 7911 7912 lea FP_SCR0(%a6),%a0 # pass: src addr 7913 7914fout_pack_write: 7915 mov.l (%sp)+,%a1 # pass: dst addr 7916 mov.l &0xc,%d0 # pass: opsize is 12 bytes 7917 7918 cmpi.b SPCOND_FLG(%a6),&mda7_flg 7919 beq.b fout_pack_a7 7920 7921 bsr.l _dmem_write # write ext prec number to memory 7922 7923 tst.l %d1 # did dstore fail? 7924 bne.w fout_ext_err # yes 7925 7926 rts 7927 7928# we don't want to do the write if the exception occurred in supervisor mode 7929# so _mem_write2() handles this for us. 7930fout_pack_a7: 7931 bsr.l _mem_write2 # write ext prec number to memory 7932 7933 tst.l %d1 # did dstore fail? 7934 bne.w fout_ext_err # yes 7935 7936 rts 7937 7938fout_pack_not_norm: 7939 cmpi.b %d0,&DENORM # is it a DENORM? 7940 beq.w fout_pack_norm # yes 7941 lea FP_SRC(%a6),%a0 7942 clr.w 2+FP_SRC_EX(%a6) 7943 cmpi.b %d0,&SNAN # is it an SNAN? 7944 beq.b fout_pack_snan # yes 7945 bra.b fout_pack_write # no 7946 7947fout_pack_snan: 7948 ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP 7949 bset &0x6,FP_SRC_HI(%a6) # set snan bit 7950 bra.b fout_pack_write 7951 7952######################################################################### 7953# XDEF **************************************************************** # 7954# fmul(): emulates the fmul instruction # 7955# fsmul(): emulates the fsmul instruction # 7956# fdmul(): emulates the fdmul instruction # 7957# # 7958# XREF **************************************************************** # 7959# scale_to_zero_src() - scale src exponent to zero # 7960# scale_to_zero_dst() - scale dst exponent to zero # 7961# unf_res() - return default underflow result # 7962# ovf_res() - return default overflow result # 7963# res_qnan() - return QNAN result # 7964# res_snan() - return SNAN result # 7965# # 7966# INPUT *************************************************************** # 7967# a0 = pointer to extended precision source operand # 7968# a1 = pointer to extended precision destination operand # 7969# d0 rnd prec,mode # 7970# # 7971# OUTPUT ************************************************************** # 7972# fp0 = result # 7973# fp1 = EXOP (if exception occurred) # 7974# # 7975# ALGORITHM *********************************************************** # 7976# Handle NANs, infinities, and zeroes as special cases. Divide # 7977# norms/denorms into ext/sgl/dbl precision. # 7978# For norms/denorms, scale the exponents such that a multiply # 7979# instruction won't cause an exception. Use the regular fmul to # 7980# compute a result. Check if the regular operands would have taken # 7981# an exception. If so, return the default overflow/underflow result # 7982# and return the EXOP if exceptions are enabled. Else, scale the # 7983# result operand to the proper exponent. # 7984# # 7985######################################################################### 7986 7987 align 0x10 7988tbl_fmul_ovfl: 7989 long 0x3fff - 0x7ffe # ext_max 7990 long 0x3fff - 0x407e # sgl_max 7991 long 0x3fff - 0x43fe # dbl_max 7992tbl_fmul_unfl: 7993 long 0x3fff + 0x0001 # ext_unfl 7994 long 0x3fff - 0x3f80 # sgl_unfl 7995 long 0x3fff - 0x3c00 # dbl_unfl 7996 7997 global fsmul 7998fsmul: 7999 andi.b &0x30,%d0 # clear rnd prec 8000 ori.b &s_mode*0x10,%d0 # insert sgl prec 8001 bra.b fmul 8002 8003 global fdmul 8004fdmul: 8005 andi.b &0x30,%d0 8006 ori.b &d_mode*0x10,%d0 # insert dbl prec 8007 8008 global fmul 8009fmul: 8010 mov.l %d0,L_SCR3(%a6) # store rnd info 8011 8012 clr.w %d1 8013 mov.b DTAG(%a6),%d1 8014 lsl.b &0x3,%d1 8015 or.b STAG(%a6),%d1 # combine src tags 8016 bne.w fmul_not_norm # optimize on non-norm input 8017 8018fmul_norm: 8019 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 8020 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 8021 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 8022 8023 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8024 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8025 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8026 8027 bsr.l scale_to_zero_src # scale src exponent 8028 mov.l %d0,-(%sp) # save scale factor 1 8029 8030 bsr.l scale_to_zero_dst # scale dst exponent 8031 8032 add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2 8033 8034 mov.w 2+L_SCR3(%a6),%d1 # fetch precision 8035 lsr.b &0x6,%d1 # shift to lo bits 8036 mov.l (%sp)+,%d0 # load S.F. 8037 cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl? 8038 beq.w fmul_may_ovfl # result may rnd to overflow 8039 blt.w fmul_ovfl # result will overflow 8040 8041 cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl? 8042 beq.w fmul_may_unfl # result may rnd to no unfl 8043 bgt.w fmul_unfl # result will underflow 8044 8045# 8046# NORMAL: 8047# - the result of the multiply operation will neither overflow nor underflow. 8048# - do the multiply to the proper precision and rounding mode. 8049# - scale the result exponent using the scale factor. if both operands were 8050# normalized then we really don't need to go through this scaling. but for now, 8051# this will do. 8052# 8053fmul_normal: 8054 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8055 8056 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8057 fmov.l &0x0,%fpsr # clear FPSR 8058 8059 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8060 8061 fmov.l %fpsr,%d1 # save status 8062 fmov.l &0x0,%fpcr # clear FPCR 8063 8064 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8065 8066fmul_normal_exit: 8067 fmovm.x &0x80,FP_SCR0(%a6) # store out result 8068 mov.l %d2,-(%sp) # save d2 8069 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 8070 mov.l %d1,%d2 # make a copy 8071 andi.l &0x7fff,%d1 # strip sign 8072 andi.w &0x8000,%d2 # keep old sign 8073 sub.l %d0,%d1 # add scale factor 8074 or.w %d2,%d1 # concat old sign,new exp 8075 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8076 mov.l (%sp)+,%d2 # restore d2 8077 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 8078 rts 8079 8080# 8081# OVERFLOW: 8082# - the result of the multiply operation is an overflow. 8083# - do the multiply to the proper precision and rounding mode in order to 8084# set the inexact bits. 8085# - calculate the default result and return it in fp0. 8086# - if overflow or inexact is enabled, we need a multiply result rounded to 8087# extended precision. if the original operation was extended, then we have this 8088# result. if the original operation was single or double, we have to do another 8089# multiply using extended precision and the correct rounding mode. the result 8090# of this operation then has its exponent scaled by -0x6000 to create the 8091# exceptional operand. 8092# 8093fmul_ovfl: 8094 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8095 8096 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8097 fmov.l &0x0,%fpsr # clear FPSR 8098 8099 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8100 8101 fmov.l %fpsr,%d1 # save status 8102 fmov.l &0x0,%fpcr # clear FPCR 8103 8104 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8105 8106# save setting this until now because this is where fmul_may_ovfl may jump in 8107fmul_ovfl_tst: 8108 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 8109 8110 mov.b FPCR_ENABLE(%a6),%d1 8111 andi.b &0x13,%d1 # is OVFL or INEX enabled? 8112 bne.b fmul_ovfl_ena # yes 8113 8114# calculate the default result 8115fmul_ovfl_dis: 8116 btst &neg_bit,FPSR_CC(%a6) # is result negative? 8117 sne %d1 # set sign param accordingly 8118 mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode 8119 bsr.l ovf_res # calculate default result 8120 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 8121 fmovm.x (%a0),&0x80 # return default result in fp0 8122 rts 8123 8124# 8125# OVFL is enabled; Create EXOP: 8126# - if precision is extended, then we have the EXOP. simply bias the exponent 8127# with an extra -0x6000. if the precision is single or double, we need to 8128# calculate a result rounded to extended precision. 8129# 8130fmul_ovfl_ena: 8131 mov.l L_SCR3(%a6),%d1 8132 andi.b &0xc0,%d1 # test the rnd prec 8133 bne.b fmul_ovfl_ena_sd # it's sgl or dbl 8134 8135fmul_ovfl_ena_cont: 8136 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 8137 8138 mov.l %d2,-(%sp) # save d2 8139 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 8140 mov.w %d1,%d2 # make a copy 8141 andi.l &0x7fff,%d1 # strip sign 8142 sub.l %d0,%d1 # add scale factor 8143 subi.l &0x6000,%d1 # subtract bias 8144 andi.w &0x7fff,%d1 # clear sign bit 8145 andi.w &0x8000,%d2 # keep old sign 8146 or.w %d2,%d1 # concat old sign,new exp 8147 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8148 mov.l (%sp)+,%d2 # restore d2 8149 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8150 bra.b fmul_ovfl_dis 8151 8152fmul_ovfl_ena_sd: 8153 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8154 8155 mov.l L_SCR3(%a6),%d1 8156 andi.b &0x30,%d1 # keep rnd mode only 8157 fmov.l %d1,%fpcr # set FPCR 8158 8159 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8160 8161 fmov.l &0x0,%fpcr # clear FPCR 8162 bra.b fmul_ovfl_ena_cont 8163 8164# 8165# may OVERFLOW: 8166# - the result of the multiply operation MAY overflow. 8167# - do the multiply to the proper precision and rounding mode in order to 8168# set the inexact bits. 8169# - calculate the default result and return it in fp0. 8170# 8171fmul_may_ovfl: 8172 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 8173 8174 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8175 fmov.l &0x0,%fpsr # clear FPSR 8176 8177 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8178 8179 fmov.l %fpsr,%d1 # save status 8180 fmov.l &0x0,%fpcr # clear FPCR 8181 8182 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8183 8184 fabs.x %fp0,%fp1 # make a copy of result 8185 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 8186 fbge.w fmul_ovfl_tst # yes; overflow has occurred 8187 8188# no, it didn't overflow; we have correct result 8189 bra.w fmul_normal_exit 8190 8191# 8192# UNDERFLOW: 8193# - the result of the multiply operation is an underflow. 8194# - do the multiply to the proper precision and rounding mode in order to 8195# set the inexact bits. 8196# - calculate the default result and return it in fp0. 8197# - if overflow or inexact is enabled, we need a multiply result rounded to 8198# extended precision. if the original operation was extended, then we have this 8199# result. if the original operation was single or double, we have to do another 8200# multiply using extended precision and the correct rounding mode. the result 8201# of this operation then has its exponent scaled by -0x6000 to create the 8202# exceptional operand. 8203# 8204fmul_unfl: 8205 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 8206 8207# for fun, let's use only extended precision, round to zero. then, let 8208# the unf_res() routine figure out all the rest. 8209# will we get the correct answer. 8210 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8211 8212 fmov.l &rz_mode*0x10,%fpcr # set FPCR 8213 fmov.l &0x0,%fpsr # clear FPSR 8214 8215 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8216 8217 fmov.l %fpsr,%d1 # save status 8218 fmov.l &0x0,%fpcr # clear FPCR 8219 8220 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8221 8222 mov.b FPCR_ENABLE(%a6),%d1 8223 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 8224 bne.b fmul_unfl_ena # yes 8225 8226fmul_unfl_dis: 8227 fmovm.x &0x80,FP_SCR0(%a6) # store out result 8228 8229 lea FP_SCR0(%a6),%a0 # pass: result addr 8230 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 8231 bsr.l unf_res # calculate default result 8232 or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z' 8233 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 8234 rts 8235 8236# 8237# UNFL is enabled. 8238# 8239fmul_unfl_ena: 8240 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 8241 8242 mov.l L_SCR3(%a6),%d1 8243 andi.b &0xc0,%d1 # is precision extended? 8244 bne.b fmul_unfl_ena_sd # no, sgl or dbl 8245 8246# if the rnd mode is anything but RZ, then we have to re-do the above 8247# multiplication because we used RZ for all. 8248 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8249 8250fmul_unfl_ena_cont: 8251 fmov.l &0x0,%fpsr # clear FPSR 8252 8253 fmul.x FP_SCR0(%a6),%fp1 # execute multiply 8254 8255 fmov.l &0x0,%fpcr # clear FPCR 8256 8257 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 8258 mov.l %d2,-(%sp) # save d2 8259 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 8260 mov.l %d1,%d2 # make a copy 8261 andi.l &0x7fff,%d1 # strip sign 8262 andi.w &0x8000,%d2 # keep old sign 8263 sub.l %d0,%d1 # add scale factor 8264 addi.l &0x6000,%d1 # add bias 8265 andi.w &0x7fff,%d1 8266 or.w %d2,%d1 # concat old sign,new exp 8267 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8268 mov.l (%sp)+,%d2 # restore d2 8269 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8270 bra.w fmul_unfl_dis 8271 8272fmul_unfl_ena_sd: 8273 mov.l L_SCR3(%a6),%d1 8274 andi.b &0x30,%d1 # use only rnd mode 8275 fmov.l %d1,%fpcr # set FPCR 8276 8277 bra.b fmul_unfl_ena_cont 8278 8279# MAY UNDERFLOW: 8280# -use the correct rounding mode and precision. this code favors operations 8281# that do not underflow. 8282fmul_may_unfl: 8283 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8284 8285 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8286 fmov.l &0x0,%fpsr # clear FPSR 8287 8288 fmul.x FP_SCR0(%a6),%fp0 # execute multiply 8289 8290 fmov.l %fpsr,%d1 # save status 8291 fmov.l &0x0,%fpcr # clear FPCR 8292 8293 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8294 8295 fabs.x %fp0,%fp1 # make a copy of result 8296 fcmp.b %fp1,&0x2 # is |result| > 2.b? 8297 fbgt.w fmul_normal_exit # no; no underflow occurred 8298 fblt.w fmul_unfl # yes; underflow occurred 8299 8300# 8301# we still don't know if underflow occurred. result is ~ equal to 2. but, 8302# we don't know if the result was an underflow that rounded up to a 2 or 8303# a normalized number that rounded down to a 2. so, redo the entire operation 8304# using RZ as the rounding mode to see what the pre-rounded result is. 8305# this case should be relatively rare. 8306# 8307 fmovm.x FP_SCR1(%a6),&0x40 # load dst operand 8308 8309 mov.l L_SCR3(%a6),%d1 8310 andi.b &0xc0,%d1 # keep rnd prec 8311 ori.b &rz_mode*0x10,%d1 # insert RZ 8312 8313 fmov.l %d1,%fpcr # set FPCR 8314 fmov.l &0x0,%fpsr # clear FPSR 8315 8316 fmul.x FP_SCR0(%a6),%fp1 # execute multiply 8317 8318 fmov.l &0x0,%fpcr # clear FPCR 8319 fabs.x %fp1 # make absolute value 8320 fcmp.b %fp1,&0x2 # is |result| < 2.b? 8321 fbge.w fmul_normal_exit # no; no underflow occurred 8322 bra.w fmul_unfl # yes, underflow occurred 8323 8324################################################################################ 8325 8326# 8327# Multiply: inputs are not both normalized; what are they? 8328# 8329fmul_not_norm: 8330 mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1 8331 jmp (tbl_fmul_op.b,%pc,%d1.w) 8332 8333 swbeg &48 8334tbl_fmul_op: 8335 short fmul_norm - tbl_fmul_op # NORM x NORM 8336 short fmul_zero - tbl_fmul_op # NORM x ZERO 8337 short fmul_inf_src - tbl_fmul_op # NORM x INF 8338 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN 8339 short fmul_norm - tbl_fmul_op # NORM x DENORM 8340 short fmul_res_snan - tbl_fmul_op # NORM x SNAN 8341 short tbl_fmul_op - tbl_fmul_op # 8342 short tbl_fmul_op - tbl_fmul_op # 8343 8344 short fmul_zero - tbl_fmul_op # ZERO x NORM 8345 short fmul_zero - tbl_fmul_op # ZERO x ZERO 8346 short fmul_res_operr - tbl_fmul_op # ZERO x INF 8347 short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN 8348 short fmul_zero - tbl_fmul_op # ZERO x DENORM 8349 short fmul_res_snan - tbl_fmul_op # ZERO x SNAN 8350 short tbl_fmul_op - tbl_fmul_op # 8351 short tbl_fmul_op - tbl_fmul_op # 8352 8353 short fmul_inf_dst - tbl_fmul_op # INF x NORM 8354 short fmul_res_operr - tbl_fmul_op # INF x ZERO 8355 short fmul_inf_dst - tbl_fmul_op # INF x INF 8356 short fmul_res_qnan - tbl_fmul_op # INF x QNAN 8357 short fmul_inf_dst - tbl_fmul_op # INF x DENORM 8358 short fmul_res_snan - tbl_fmul_op # INF x SNAN 8359 short tbl_fmul_op - tbl_fmul_op # 8360 short tbl_fmul_op - tbl_fmul_op # 8361 8362 short fmul_res_qnan - tbl_fmul_op # QNAN x NORM 8363 short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO 8364 short fmul_res_qnan - tbl_fmul_op # QNAN x INF 8365 short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN 8366 short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM 8367 short fmul_res_snan - tbl_fmul_op # QNAN x SNAN 8368 short tbl_fmul_op - tbl_fmul_op # 8369 short tbl_fmul_op - tbl_fmul_op # 8370 8371 short fmul_norm - tbl_fmul_op # NORM x NORM 8372 short fmul_zero - tbl_fmul_op # NORM x ZERO 8373 short fmul_inf_src - tbl_fmul_op # NORM x INF 8374 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN 8375 short fmul_norm - tbl_fmul_op # NORM x DENORM 8376 short fmul_res_snan - tbl_fmul_op # NORM x SNAN 8377 short tbl_fmul_op - tbl_fmul_op # 8378 short tbl_fmul_op - tbl_fmul_op # 8379 8380 short fmul_res_snan - tbl_fmul_op # SNAN x NORM 8381 short fmul_res_snan - tbl_fmul_op # SNAN x ZERO 8382 short fmul_res_snan - tbl_fmul_op # SNAN x INF 8383 short fmul_res_snan - tbl_fmul_op # SNAN x QNAN 8384 short fmul_res_snan - tbl_fmul_op # SNAN x DENORM 8385 short fmul_res_snan - tbl_fmul_op # SNAN x SNAN 8386 short tbl_fmul_op - tbl_fmul_op # 8387 short tbl_fmul_op - tbl_fmul_op # 8388 8389fmul_res_operr: 8390 bra.l res_operr 8391fmul_res_snan: 8392 bra.l res_snan 8393fmul_res_qnan: 8394 bra.l res_qnan 8395 8396# 8397# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm) 8398# 8399 global fmul_zero # global for fsglmul 8400fmul_zero: 8401 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 8402 mov.b DST_EX(%a1),%d1 8403 eor.b %d0,%d1 8404 bpl.b fmul_zero_p # result ZERO is pos. 8405fmul_zero_n: 8406 fmov.s &0x80000000,%fp0 # load -ZERO 8407 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N 8408 rts 8409fmul_zero_p: 8410 fmov.s &0x00000000,%fp0 # load +ZERO 8411 mov.b &z_bmask,FPSR_CC(%a6) # set Z 8412 rts 8413 8414# 8415# Multiply: (inf x inf) || (inf x norm) || (inf x denorm) 8416# 8417# Note: The j-bit for an infinity is a don't-care. However, to be 8418# strictly compatible w/ the 68881/882, we make sure to return an 8419# INF w/ the j-bit set if the input INF j-bit was set. Destination 8420# INFs take priority. 8421# 8422 global fmul_inf_dst # global for fsglmul 8423fmul_inf_dst: 8424 fmovm.x DST(%a1),&0x80 # return INF result in fp0 8425 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 8426 mov.b DST_EX(%a1),%d1 8427 eor.b %d0,%d1 8428 bpl.b fmul_inf_dst_p # result INF is pos. 8429fmul_inf_dst_n: 8430 fabs.x %fp0 # clear result sign 8431 fneg.x %fp0 # set result sign 8432 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N 8433 rts 8434fmul_inf_dst_p: 8435 fabs.x %fp0 # clear result sign 8436 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 8437 rts 8438 8439 global fmul_inf_src # global for fsglmul 8440fmul_inf_src: 8441 fmovm.x SRC(%a0),&0x80 # return INF result in fp0 8442 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 8443 mov.b DST_EX(%a1),%d1 8444 eor.b %d0,%d1 8445 bpl.b fmul_inf_dst_p # result INF is pos. 8446 bra.b fmul_inf_dst_n 8447 8448######################################################################### 8449# XDEF **************************************************************** # 8450# fin(): emulates the fmove instruction # 8451# fsin(): emulates the fsmove instruction # 8452# fdin(): emulates the fdmove instruction # 8453# # 8454# XREF **************************************************************** # 8455# norm() - normalize mantissa for EXOP on denorm # 8456# scale_to_zero_src() - scale src exponent to zero # 8457# ovf_res() - return default overflow result # 8458# unf_res() - return default underflow result # 8459# res_qnan_1op() - return QNAN result # 8460# res_snan_1op() - return SNAN result # 8461# # 8462# INPUT *************************************************************** # 8463# a0 = pointer to extended precision source operand # 8464# d0 = round prec/mode # 8465# # 8466# OUTPUT ************************************************************** # 8467# fp0 = result # 8468# fp1 = EXOP (if exception occurred) # 8469# # 8470# ALGORITHM *********************************************************** # 8471# Handle NANs, infinities, and zeroes as special cases. Divide # 8472# norms into extended, single, and double precision. # 8473# Norms can be emulated w/ a regular fmove instruction. For # 8474# sgl/dbl, must scale exponent and perform an "fmove". Check to see # 8475# if the result would have overflowed/underflowed. If so, use unf_res() # 8476# or ovf_res() to return the default result. Also return EXOP if # 8477# exception is enabled. If no exception, return the default result. # 8478# Unnorms don't pass through here. # 8479# # 8480######################################################################### 8481 8482 global fsin 8483fsin: 8484 andi.b &0x30,%d0 # clear rnd prec 8485 ori.b &s_mode*0x10,%d0 # insert sgl precision 8486 bra.b fin 8487 8488 global fdin 8489fdin: 8490 andi.b &0x30,%d0 # clear rnd prec 8491 ori.b &d_mode*0x10,%d0 # insert dbl precision 8492 8493 global fin 8494fin: 8495 mov.l %d0,L_SCR3(%a6) # store rnd info 8496 8497 mov.b STAG(%a6),%d1 # fetch src optype tag 8498 bne.w fin_not_norm # optimize on non-norm input 8499 8500# 8501# FP MOVE IN: NORMs and DENORMs ONLY! 8502# 8503fin_norm: 8504 andi.b &0xc0,%d0 # is precision extended? 8505 bne.w fin_not_ext # no, so go handle dbl or sgl 8506 8507# 8508# precision selected is extended. so...we cannot get an underflow 8509# or overflow because of rounding to the correct precision. so... 8510# skip the scaling and unscaling... 8511# 8512 tst.b SRC_EX(%a0) # is the operand negative? 8513 bpl.b fin_norm_done # no 8514 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit 8515fin_norm_done: 8516 fmovm.x SRC(%a0),&0x80 # return result in fp0 8517 rts 8518 8519# 8520# for an extended precision DENORM, the UNFL exception bit is set 8521# the accrued bit is NOT set in this instance(no inexactness!) 8522# 8523fin_denorm: 8524 andi.b &0xc0,%d0 # is precision extended? 8525 bne.w fin_not_ext # no, so go handle dbl or sgl 8526 8527 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 8528 tst.b SRC_EX(%a0) # is the operand negative? 8529 bpl.b fin_denorm_done # no 8530 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit 8531fin_denorm_done: 8532 fmovm.x SRC(%a0),&0x80 # return result in fp0 8533 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 8534 bne.b fin_denorm_unfl_ena # yes 8535 rts 8536 8537# 8538# the input is an extended DENORM and underflow is enabled in the FPCR. 8539# normalize the mantissa and add the bias of 0x6000 to the resulting negative 8540# exponent and insert back into the operand. 8541# 8542fin_denorm_unfl_ena: 8543 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8544 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8545 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8546 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 8547 bsr.l norm # normalize result 8548 neg.w %d0 # new exponent = -(shft val) 8549 addi.w &0x6000,%d0 # add new bias to exponent 8550 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 8551 andi.w &0x8000,%d1 # keep old sign 8552 andi.w &0x7fff,%d0 # clear sign position 8553 or.w %d1,%d0 # concat new exo,old sign 8554 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 8555 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8556 rts 8557 8558# 8559# operand is to be rounded to single or double precision 8560# 8561fin_not_ext: 8562 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 8563 bne.b fin_dbl 8564 8565# 8566# operand is to be rounded to single precision 8567# 8568fin_sgl: 8569 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8570 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8571 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8572 bsr.l scale_to_zero_src # calculate scale factor 8573 8574 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 8575 bge.w fin_sd_unfl # yes; go handle underflow 8576 cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 8577 beq.w fin_sd_may_ovfl # maybe; go check 8578 blt.w fin_sd_ovfl # yes; go handle overflow 8579 8580# 8581# operand will NOT overflow or underflow when moved into the fp reg file 8582# 8583fin_sd_normal: 8584 fmov.l &0x0,%fpsr # clear FPSR 8585 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8586 8587 fmov.x FP_SCR0(%a6),%fp0 # perform move 8588 8589 fmov.l %fpsr,%d1 # save FPSR 8590 fmov.l &0x0,%fpcr # clear FPCR 8591 8592 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8593 8594fin_sd_normal_exit: 8595 mov.l %d2,-(%sp) # save d2 8596 fmovm.x &0x80,FP_SCR0(%a6) # store out result 8597 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 8598 mov.w %d1,%d2 # make a copy 8599 andi.l &0x7fff,%d1 # strip sign 8600 sub.l %d0,%d1 # add scale factor 8601 andi.w &0x8000,%d2 # keep old sign 8602 or.w %d1,%d2 # concat old sign,new exponent 8603 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 8604 mov.l (%sp)+,%d2 # restore d2 8605 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 8606 rts 8607 8608# 8609# operand is to be rounded to double precision 8610# 8611fin_dbl: 8612 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8613 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8614 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8615 bsr.l scale_to_zero_src # calculate scale factor 8616 8617 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 8618 bge.w fin_sd_unfl # yes; go handle underflow 8619 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 8620 beq.w fin_sd_may_ovfl # maybe; go check 8621 blt.w fin_sd_ovfl # yes; go handle overflow 8622 bra.w fin_sd_normal # no; ho handle normalized op 8623 8624# 8625# operand WILL underflow when moved in to the fp register file 8626# 8627fin_sd_unfl: 8628 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 8629 8630 tst.b FP_SCR0_EX(%a6) # is operand negative? 8631 bpl.b fin_sd_unfl_tst 8632 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit 8633 8634# if underflow or inexact is enabled, then go calculate the EXOP first. 8635fin_sd_unfl_tst: 8636 mov.b FPCR_ENABLE(%a6),%d1 8637 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 8638 bne.b fin_sd_unfl_ena # yes 8639 8640fin_sd_unfl_dis: 8641 lea FP_SCR0(%a6),%a0 # pass: result addr 8642 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 8643 bsr.l unf_res # calculate default result 8644 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z' 8645 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 8646 rts 8647 8648# 8649# operand will underflow AND underflow or inexact is enabled. 8650# Therefore, we must return the result rounded to extended precision. 8651# 8652fin_sd_unfl_ena: 8653 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 8654 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 8655 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 8656 8657 mov.l %d2,-(%sp) # save d2 8658 mov.w %d1,%d2 # make a copy 8659 andi.l &0x7fff,%d1 # strip sign 8660 sub.l %d0,%d1 # subtract scale factor 8661 andi.w &0x8000,%d2 # extract old sign 8662 addi.l &0x6000,%d1 # add new bias 8663 andi.w &0x7fff,%d1 8664 or.w %d1,%d2 # concat old sign,new exp 8665 mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent 8666 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 8667 mov.l (%sp)+,%d2 # restore d2 8668 bra.b fin_sd_unfl_dis 8669 8670# 8671# operand WILL overflow. 8672# 8673fin_sd_ovfl: 8674 fmov.l &0x0,%fpsr # clear FPSR 8675 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8676 8677 fmov.x FP_SCR0(%a6),%fp0 # perform move 8678 8679 fmov.l &0x0,%fpcr # clear FPCR 8680 fmov.l %fpsr,%d1 # save FPSR 8681 8682 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8683 8684fin_sd_ovfl_tst: 8685 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 8686 8687 mov.b FPCR_ENABLE(%a6),%d1 8688 andi.b &0x13,%d1 # is OVFL or INEX enabled? 8689 bne.b fin_sd_ovfl_ena # yes 8690 8691# 8692# OVFL is not enabled; therefore, we must create the default result by 8693# calling ovf_res(). 8694# 8695fin_sd_ovfl_dis: 8696 btst &neg_bit,FPSR_CC(%a6) # is result negative? 8697 sne %d1 # set sign param accordingly 8698 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 8699 bsr.l ovf_res # calculate default result 8700 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 8701 fmovm.x (%a0),&0x80 # return default result in fp0 8702 rts 8703 8704# 8705# OVFL is enabled. 8706# the INEX2 bit has already been updated by the round to the correct precision. 8707# now, round to extended(and don't alter the FPSR). 8708# 8709fin_sd_ovfl_ena: 8710 mov.l %d2,-(%sp) # save d2 8711 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 8712 mov.l %d1,%d2 # make a copy 8713 andi.l &0x7fff,%d1 # strip sign 8714 andi.w &0x8000,%d2 # keep old sign 8715 sub.l %d0,%d1 # add scale factor 8716 sub.l &0x6000,%d1 # subtract bias 8717 andi.w &0x7fff,%d1 8718 or.w %d2,%d1 8719 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8720 mov.l (%sp)+,%d2 # restore d2 8721 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8722 bra.b fin_sd_ovfl_dis 8723 8724# 8725# the move in MAY overflow. so... 8726# 8727fin_sd_may_ovfl: 8728 fmov.l &0x0,%fpsr # clear FPSR 8729 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8730 8731 fmov.x FP_SCR0(%a6),%fp0 # perform the move 8732 8733 fmov.l %fpsr,%d1 # save status 8734 fmov.l &0x0,%fpcr # clear FPCR 8735 8736 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8737 8738 fabs.x %fp0,%fp1 # make a copy of result 8739 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 8740 fbge.w fin_sd_ovfl_tst # yes; overflow has occurred 8741 8742# no, it didn't overflow; we have correct result 8743 bra.w fin_sd_normal_exit 8744 8745########################################################################## 8746 8747# 8748# operand is not a NORM: check its optype and branch accordingly 8749# 8750fin_not_norm: 8751 cmpi.b %d1,&DENORM # weed out DENORM 8752 beq.w fin_denorm 8753 cmpi.b %d1,&SNAN # weed out SNANs 8754 beq.l res_snan_1op 8755 cmpi.b %d1,&QNAN # weed out QNANs 8756 beq.l res_qnan_1op 8757 8758# 8759# do the fmove in; at this point, only possible ops are ZERO and INF. 8760# use fmov to determine ccodes. 8761# prec:mode should be zero at this point but it won't affect answer anyways. 8762# 8763 fmov.x SRC(%a0),%fp0 # do fmove in 8764 fmov.l %fpsr,%d0 # no exceptions possible 8765 rol.l &0x8,%d0 # put ccodes in lo byte 8766 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes 8767 rts 8768 8769######################################################################### 8770# XDEF **************************************************************** # 8771# fdiv(): emulates the fdiv instruction # 8772# fsdiv(): emulates the fsdiv instruction # 8773# fddiv(): emulates the fddiv instruction # 8774# # 8775# XREF **************************************************************** # 8776# scale_to_zero_src() - scale src exponent to zero # 8777# scale_to_zero_dst() - scale dst exponent to zero # 8778# unf_res() - return default underflow result # 8779# ovf_res() - return default overflow result # 8780# res_qnan() - return QNAN result # 8781# res_snan() - return SNAN result # 8782# # 8783# INPUT *************************************************************** # 8784# a0 = pointer to extended precision source operand # 8785# a1 = pointer to extended precision destination operand # 8786# d0 rnd prec,mode # 8787# # 8788# OUTPUT ************************************************************** # 8789# fp0 = result # 8790# fp1 = EXOP (if exception occurred) # 8791# # 8792# ALGORITHM *********************************************************** # 8793# Handle NANs, infinities, and zeroes as special cases. Divide # 8794# norms/denorms into ext/sgl/dbl precision. # 8795# For norms/denorms, scale the exponents such that a divide # 8796# instruction won't cause an exception. Use the regular fdiv to # 8797# compute a result. Check if the regular operands would have taken # 8798# an exception. If so, return the default overflow/underflow result # 8799# and return the EXOP if exceptions are enabled. Else, scale the # 8800# result operand to the proper exponent. # 8801# # 8802######################################################################### 8803 8804 align 0x10 8805tbl_fdiv_unfl: 8806 long 0x3fff - 0x0000 # ext_unfl 8807 long 0x3fff - 0x3f81 # sgl_unfl 8808 long 0x3fff - 0x3c01 # dbl_unfl 8809 8810tbl_fdiv_ovfl: 8811 long 0x3fff - 0x7ffe # ext overflow exponent 8812 long 0x3fff - 0x407e # sgl overflow exponent 8813 long 0x3fff - 0x43fe # dbl overflow exponent 8814 8815 global fsdiv 8816fsdiv: 8817 andi.b &0x30,%d0 # clear rnd prec 8818 ori.b &s_mode*0x10,%d0 # insert sgl prec 8819 bra.b fdiv 8820 8821 global fddiv 8822fddiv: 8823 andi.b &0x30,%d0 # clear rnd prec 8824 ori.b &d_mode*0x10,%d0 # insert dbl prec 8825 8826 global fdiv 8827fdiv: 8828 mov.l %d0,L_SCR3(%a6) # store rnd info 8829 8830 clr.w %d1 8831 mov.b DTAG(%a6),%d1 8832 lsl.b &0x3,%d1 8833 or.b STAG(%a6),%d1 # combine src tags 8834 8835 bne.w fdiv_not_norm # optimize on non-norm input 8836 8837# 8838# DIVIDE: NORMs and DENORMs ONLY! 8839# 8840fdiv_norm: 8841 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 8842 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 8843 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 8844 8845 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 8846 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 8847 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 8848 8849 bsr.l scale_to_zero_src # scale src exponent 8850 mov.l %d0,-(%sp) # save scale factor 1 8851 8852 bsr.l scale_to_zero_dst # scale dst exponent 8853 8854 neg.l (%sp) # SCALE FACTOR = scale1 - scale2 8855 add.l %d0,(%sp) 8856 8857 mov.w 2+L_SCR3(%a6),%d1 # fetch precision 8858 lsr.b &0x6,%d1 # shift to lo bits 8859 mov.l (%sp)+,%d0 # load S.F. 8860 cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow? 8861 ble.w fdiv_may_ovfl # result will overflow 8862 8863 cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow? 8864 beq.w fdiv_may_unfl # maybe 8865 bgt.w fdiv_unfl # yes; go handle underflow 8866 8867fdiv_normal: 8868 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 8869 8870 fmov.l L_SCR3(%a6),%fpcr # save FPCR 8871 fmov.l &0x0,%fpsr # clear FPSR 8872 8873 fdiv.x FP_SCR0(%a6),%fp0 # perform divide 8874 8875 fmov.l %fpsr,%d1 # save FPSR 8876 fmov.l &0x0,%fpcr # clear FPCR 8877 8878 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8879 8880fdiv_normal_exit: 8881 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack 8882 mov.l %d2,-(%sp) # store d2 8883 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 8884 mov.l %d1,%d2 # make a copy 8885 andi.l &0x7fff,%d1 # strip sign 8886 andi.w &0x8000,%d2 # keep old sign 8887 sub.l %d0,%d1 # add scale factor 8888 or.w %d2,%d1 # concat old sign,new exp 8889 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8890 mov.l (%sp)+,%d2 # restore d2 8891 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 8892 rts 8893 8894tbl_fdiv_ovfl2: 8895 long 0x7fff 8896 long 0x407f 8897 long 0x43ff 8898 8899fdiv_no_ovfl: 8900 mov.l (%sp)+,%d0 # restore scale factor 8901 bra.b fdiv_normal_exit 8902 8903fdiv_may_ovfl: 8904 mov.l %d0,-(%sp) # save scale factor 8905 8906 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 8907 8908 fmov.l L_SCR3(%a6),%fpcr # set FPCR 8909 fmov.l &0x0,%fpsr # set FPSR 8910 8911 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 8912 8913 fmov.l %fpsr,%d0 8914 fmov.l &0x0,%fpcr 8915 8916 or.l %d0,USER_FPSR(%a6) # save INEX,N 8917 8918 fmovm.x &0x01,-(%sp) # save result to stack 8919 mov.w (%sp),%d0 # fetch new exponent 8920 add.l &0xc,%sp # clear result from stack 8921 andi.l &0x7fff,%d0 # strip sign 8922 sub.l (%sp),%d0 # add scale factor 8923 cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4) 8924 blt.b fdiv_no_ovfl 8925 mov.l (%sp)+,%d0 8926 8927fdiv_ovfl_tst: 8928 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 8929 8930 mov.b FPCR_ENABLE(%a6),%d1 8931 andi.b &0x13,%d1 # is OVFL or INEX enabled? 8932 bne.b fdiv_ovfl_ena # yes 8933 8934fdiv_ovfl_dis: 8935 btst &neg_bit,FPSR_CC(%a6) # is result negative? 8936 sne %d1 # set sign param accordingly 8937 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 8938 bsr.l ovf_res # calculate default result 8939 or.b %d0,FPSR_CC(%a6) # set INF if applicable 8940 fmovm.x (%a0),&0x80 # return default result in fp0 8941 rts 8942 8943fdiv_ovfl_ena: 8944 mov.l L_SCR3(%a6),%d1 8945 andi.b &0xc0,%d1 # is precision extended? 8946 bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl 8947 8948fdiv_ovfl_ena_cont: 8949 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 8950 8951 mov.l %d2,-(%sp) # save d2 8952 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 8953 mov.w %d1,%d2 # make a copy 8954 andi.l &0x7fff,%d1 # strip sign 8955 sub.l %d0,%d1 # add scale factor 8956 subi.l &0x6000,%d1 # subtract bias 8957 andi.w &0x7fff,%d1 # clear sign bit 8958 andi.w &0x8000,%d2 # keep old sign 8959 or.w %d2,%d1 # concat old sign,new exp 8960 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 8961 mov.l (%sp)+,%d2 # restore d2 8962 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 8963 bra.b fdiv_ovfl_dis 8964 8965fdiv_ovfl_ena_sd: 8966 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand 8967 8968 mov.l L_SCR3(%a6),%d1 8969 andi.b &0x30,%d1 # keep rnd mode 8970 fmov.l %d1,%fpcr # set FPCR 8971 8972 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 8973 8974 fmov.l &0x0,%fpcr # clear FPCR 8975 bra.b fdiv_ovfl_ena_cont 8976 8977fdiv_unfl: 8978 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 8979 8980 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 8981 8982 fmov.l &rz_mode*0x10,%fpcr # set FPCR 8983 fmov.l &0x0,%fpsr # clear FPSR 8984 8985 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 8986 8987 fmov.l %fpsr,%d1 # save status 8988 fmov.l &0x0,%fpcr # clear FPCR 8989 8990 or.l %d1,USER_FPSR(%a6) # save INEX2,N 8991 8992 mov.b FPCR_ENABLE(%a6),%d1 8993 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 8994 bne.b fdiv_unfl_ena # yes 8995 8996fdiv_unfl_dis: 8997 fmovm.x &0x80,FP_SCR0(%a6) # store out result 8998 8999 lea FP_SCR0(%a6),%a0 # pass: result addr 9000 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 9001 bsr.l unf_res # calculate default result 9002 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set 9003 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 9004 rts 9005 9006# 9007# UNFL is enabled. 9008# 9009fdiv_unfl_ena: 9010 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 9011 9012 mov.l L_SCR3(%a6),%d1 9013 andi.b &0xc0,%d1 # is precision extended? 9014 bne.b fdiv_unfl_ena_sd # no, sgl or dbl 9015 9016 fmov.l L_SCR3(%a6),%fpcr # set FPCR 9017 9018fdiv_unfl_ena_cont: 9019 fmov.l &0x0,%fpsr # clear FPSR 9020 9021 fdiv.x FP_SCR0(%a6),%fp1 # execute divide 9022 9023 fmov.l &0x0,%fpcr # clear FPCR 9024 9025 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 9026 mov.l %d2,-(%sp) # save d2 9027 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 9028 mov.l %d1,%d2 # make a copy 9029 andi.l &0x7fff,%d1 # strip sign 9030 andi.w &0x8000,%d2 # keep old sign 9031 sub.l %d0,%d1 # add scale factoer 9032 addi.l &0x6000,%d1 # add bias 9033 andi.w &0x7fff,%d1 9034 or.w %d2,%d1 # concat old sign,new exp 9035 mov.w %d1,FP_SCR0_EX(%a6) # insert new exp 9036 mov.l (%sp)+,%d2 # restore d2 9037 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 9038 bra.w fdiv_unfl_dis 9039 9040fdiv_unfl_ena_sd: 9041 mov.l L_SCR3(%a6),%d1 9042 andi.b &0x30,%d1 # use only rnd mode 9043 fmov.l %d1,%fpcr # set FPCR 9044 9045 bra.b fdiv_unfl_ena_cont 9046 9047# 9048# the divide operation MAY underflow: 9049# 9050fdiv_may_unfl: 9051 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 9052 9053 fmov.l L_SCR3(%a6),%fpcr # set FPCR 9054 fmov.l &0x0,%fpsr # clear FPSR 9055 9056 fdiv.x FP_SCR0(%a6),%fp0 # execute divide 9057 9058 fmov.l %fpsr,%d1 # save status 9059 fmov.l &0x0,%fpcr # clear FPCR 9060 9061 or.l %d1,USER_FPSR(%a6) # save INEX2,N 9062 9063 fabs.x %fp0,%fp1 # make a copy of result 9064 fcmp.b %fp1,&0x1 # is |result| > 1.b? 9065 fbgt.w fdiv_normal_exit # no; no underflow occurred 9066 fblt.w fdiv_unfl # yes; underflow occurred 9067 9068# 9069# we still don't know if underflow occurred. result is ~ equal to 1. but, 9070# we don't know if the result was an underflow that rounded up to a 1 9071# or a normalized number that rounded down to a 1. so, redo the entire 9072# operation using RZ as the rounding mode to see what the pre-rounded 9073# result is. this case should be relatively rare. 9074# 9075 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 9076 9077 mov.l L_SCR3(%a6),%d1 9078 andi.b &0xc0,%d1 # keep rnd prec 9079 ori.b &rz_mode*0x10,%d1 # insert RZ 9080 9081 fmov.l %d1,%fpcr # set FPCR 9082 fmov.l &0x0,%fpsr # clear FPSR 9083 9084 fdiv.x FP_SCR0(%a6),%fp1 # execute divide 9085 9086 fmov.l &0x0,%fpcr # clear FPCR 9087 fabs.x %fp1 # make absolute value 9088 fcmp.b %fp1,&0x1 # is |result| < 1.b? 9089 fbge.w fdiv_normal_exit # no; no underflow occurred 9090 bra.w fdiv_unfl # yes; underflow occurred 9091 9092############################################################################ 9093 9094# 9095# Divide: inputs are not both normalized; what are they? 9096# 9097fdiv_not_norm: 9098 mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1 9099 jmp (tbl_fdiv_op.b,%pc,%d1.w*1) 9100 9101 swbeg &48 9102tbl_fdiv_op: 9103 short fdiv_norm - tbl_fdiv_op # NORM / NORM 9104 short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO 9105 short fdiv_zero_load - tbl_fdiv_op # NORM / INF 9106 short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN 9107 short fdiv_norm - tbl_fdiv_op # NORM / DENORM 9108 short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN 9109 short tbl_fdiv_op - tbl_fdiv_op # 9110 short tbl_fdiv_op - tbl_fdiv_op # 9111 9112 short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM 9113 short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO 9114 short fdiv_zero_load - tbl_fdiv_op # ZERO / INF 9115 short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN 9116 short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM 9117 short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN 9118 short tbl_fdiv_op - tbl_fdiv_op # 9119 short tbl_fdiv_op - tbl_fdiv_op # 9120 9121 short fdiv_inf_dst - tbl_fdiv_op # INF / NORM 9122 short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO 9123 short fdiv_res_operr - tbl_fdiv_op # INF / INF 9124 short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN 9125 short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM 9126 short fdiv_res_snan - tbl_fdiv_op # INF / SNAN 9127 short tbl_fdiv_op - tbl_fdiv_op # 9128 short tbl_fdiv_op - tbl_fdiv_op # 9129 9130 short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM 9131 short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO 9132 short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF 9133 short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN 9134 short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM 9135 short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN 9136 short tbl_fdiv_op - tbl_fdiv_op # 9137 short tbl_fdiv_op - tbl_fdiv_op # 9138 9139 short fdiv_norm - tbl_fdiv_op # DENORM / NORM 9140 short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO 9141 short fdiv_zero_load - tbl_fdiv_op # DENORM / INF 9142 short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN 9143 short fdiv_norm - tbl_fdiv_op # DENORM / DENORM 9144 short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN 9145 short tbl_fdiv_op - tbl_fdiv_op # 9146 short tbl_fdiv_op - tbl_fdiv_op # 9147 9148 short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM 9149 short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO 9150 short fdiv_res_snan - tbl_fdiv_op # SNAN / INF 9151 short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN 9152 short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM 9153 short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN 9154 short tbl_fdiv_op - tbl_fdiv_op # 9155 short tbl_fdiv_op - tbl_fdiv_op # 9156 9157fdiv_res_qnan: 9158 bra.l res_qnan 9159fdiv_res_snan: 9160 bra.l res_snan 9161fdiv_res_operr: 9162 bra.l res_operr 9163 9164 global fdiv_zero_load # global for fsgldiv 9165fdiv_zero_load: 9166 mov.b SRC_EX(%a0),%d0 # result sign is exclusive 9167 mov.b DST_EX(%a1),%d1 # or of input signs. 9168 eor.b %d0,%d1 9169 bpl.b fdiv_zero_load_p # result is positive 9170 fmov.s &0x80000000,%fp0 # load a -ZERO 9171 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N 9172 rts 9173fdiv_zero_load_p: 9174 fmov.s &0x00000000,%fp0 # load a +ZERO 9175 mov.b &z_bmask,FPSR_CC(%a6) # set Z 9176 rts 9177 9178# 9179# The destination was In Range and the source was a ZERO. The result, 9180# Therefore, is an INF w/ the proper sign. 9181# So, determine the sign and return a new INF (w/ the j-bit cleared). 9182# 9183 global fdiv_inf_load # global for fsgldiv 9184fdiv_inf_load: 9185 ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ 9186 mov.b SRC_EX(%a0),%d0 # load both signs 9187 mov.b DST_EX(%a1),%d1 9188 eor.b %d0,%d1 9189 bpl.b fdiv_inf_load_p # result is positive 9190 fmov.s &0xff800000,%fp0 # make result -INF 9191 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N 9192 rts 9193fdiv_inf_load_p: 9194 fmov.s &0x7f800000,%fp0 # make result +INF 9195 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 9196 rts 9197 9198# 9199# The destination was an INF w/ an In Range or ZERO source, the result is 9200# an INF w/ the proper sign. 9201# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the 9202# dst INF is set, then then j-bit of the result INF is also set). 9203# 9204 global fdiv_inf_dst # global for fsgldiv 9205fdiv_inf_dst: 9206 mov.b DST_EX(%a1),%d0 # load both signs 9207 mov.b SRC_EX(%a0),%d1 9208 eor.b %d0,%d1 9209 bpl.b fdiv_inf_dst_p # result is positive 9210 9211 fmovm.x DST(%a1),&0x80 # return result in fp0 9212 fabs.x %fp0 # clear sign bit 9213 fneg.x %fp0 # set sign bit 9214 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG 9215 rts 9216 9217fdiv_inf_dst_p: 9218 fmovm.x DST(%a1),&0x80 # return result in fp0 9219 fabs.x %fp0 # return positive INF 9220 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 9221 rts 9222 9223######################################################################### 9224# XDEF **************************************************************** # 9225# fneg(): emulates the fneg instruction # 9226# fsneg(): emulates the fsneg instruction # 9227# fdneg(): emulates the fdneg instruction # 9228# # 9229# XREF **************************************************************** # 9230# norm() - normalize a denorm to provide EXOP # 9231# scale_to_zero_src() - scale sgl/dbl source exponent # 9232# ovf_res() - return default overflow result # 9233# unf_res() - return default underflow result # 9234# res_qnan_1op() - return QNAN result # 9235# res_snan_1op() - return SNAN result # 9236# # 9237# INPUT *************************************************************** # 9238# a0 = pointer to extended precision source operand # 9239# d0 = rnd prec,mode # 9240# # 9241# OUTPUT ************************************************************** # 9242# fp0 = result # 9243# fp1 = EXOP (if exception occurred) # 9244# # 9245# ALGORITHM *********************************************************** # 9246# Handle NANs, zeroes, and infinities as special cases. Separate # 9247# norms/denorms into ext/sgl/dbl precisions. Extended precision can be # 9248# emulated by simply setting sign bit. Sgl/dbl operands must be scaled # 9249# and an actual fneg performed to see if overflow/underflow would have # 9250# occurred. If so, return default underflow/overflow result. Else, # 9251# scale the result exponent and return result. FPSR gets set based on # 9252# the result value. # 9253# # 9254######################################################################### 9255 9256 global fsneg 9257fsneg: 9258 andi.b &0x30,%d0 # clear rnd prec 9259 ori.b &s_mode*0x10,%d0 # insert sgl precision 9260 bra.b fneg 9261 9262 global fdneg 9263fdneg: 9264 andi.b &0x30,%d0 # clear rnd prec 9265 ori.b &d_mode*0x10,%d0 # insert dbl prec 9266 9267 global fneg 9268fneg: 9269 mov.l %d0,L_SCR3(%a6) # store rnd info 9270 mov.b STAG(%a6),%d1 9271 bne.w fneg_not_norm # optimize on non-norm input 9272 9273# 9274# NEGATE SIGN : norms and denorms ONLY! 9275# 9276fneg_norm: 9277 andi.b &0xc0,%d0 # is precision extended? 9278 bne.w fneg_not_ext # no; go handle sgl or dbl 9279 9280# 9281# precision selected is extended. so...we can not get an underflow 9282# or overflow because of rounding to the correct precision. so... 9283# skip the scaling and unscaling... 9284# 9285 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9286 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9287 mov.w SRC_EX(%a0),%d0 9288 eori.w &0x8000,%d0 # negate sign 9289 bpl.b fneg_norm_load # sign is positive 9290 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 9291fneg_norm_load: 9292 mov.w %d0,FP_SCR0_EX(%a6) 9293 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 9294 rts 9295 9296# 9297# for an extended precision DENORM, the UNFL exception bit is set 9298# the accrued bit is NOT set in this instance(no inexactness!) 9299# 9300fneg_denorm: 9301 andi.b &0xc0,%d0 # is precision extended? 9302 bne.b fneg_not_ext # no; go handle sgl or dbl 9303 9304 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 9305 9306 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9307 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9308 mov.w SRC_EX(%a0),%d0 9309 eori.w &0x8000,%d0 # negate sign 9310 bpl.b fneg_denorm_done # no 9311 mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit 9312fneg_denorm_done: 9313 mov.w %d0,FP_SCR0_EX(%a6) 9314 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 9315 9316 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 9317 bne.b fneg_ext_unfl_ena # yes 9318 rts 9319 9320# 9321# the input is an extended DENORM and underflow is enabled in the FPCR. 9322# normalize the mantissa and add the bias of 0x6000 to the resulting negative 9323# exponent and insert back into the operand. 9324# 9325fneg_ext_unfl_ena: 9326 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 9327 bsr.l norm # normalize result 9328 neg.w %d0 # new exponent = -(shft val) 9329 addi.w &0x6000,%d0 # add new bias to exponent 9330 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 9331 andi.w &0x8000,%d1 # keep old sign 9332 andi.w &0x7fff,%d0 # clear sign position 9333 or.w %d1,%d0 # concat old sign, new exponent 9334 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 9335 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 9336 rts 9337 9338# 9339# operand is either single or double 9340# 9341fneg_not_ext: 9342 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 9343 bne.b fneg_dbl 9344 9345# 9346# operand is to be rounded to single precision 9347# 9348fneg_sgl: 9349 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 9350 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9351 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9352 bsr.l scale_to_zero_src # calculate scale factor 9353 9354 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 9355 bge.w fneg_sd_unfl # yes; go handle underflow 9356 cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 9357 beq.w fneg_sd_may_ovfl # maybe; go check 9358 blt.w fneg_sd_ovfl # yes; go handle overflow 9359 9360# 9361# operand will NOT overflow or underflow when moved in to the fp reg file 9362# 9363fneg_sd_normal: 9364 fmov.l &0x0,%fpsr # clear FPSR 9365 fmov.l L_SCR3(%a6),%fpcr # set FPCR 9366 9367 fneg.x FP_SCR0(%a6),%fp0 # perform negation 9368 9369 fmov.l %fpsr,%d1 # save FPSR 9370 fmov.l &0x0,%fpcr # clear FPCR 9371 9372 or.l %d1,USER_FPSR(%a6) # save INEX2,N 9373 9374fneg_sd_normal_exit: 9375 mov.l %d2,-(%sp) # save d2 9376 fmovm.x &0x80,FP_SCR0(%a6) # store out result 9377 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 9378 mov.w %d1,%d2 # make a copy 9379 andi.l &0x7fff,%d1 # strip sign 9380 sub.l %d0,%d1 # add scale factor 9381 andi.w &0x8000,%d2 # keep old sign 9382 or.w %d1,%d2 # concat old sign,new exp 9383 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 9384 mov.l (%sp)+,%d2 # restore d2 9385 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 9386 rts 9387 9388# 9389# operand is to be rounded to double precision 9390# 9391fneg_dbl: 9392 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 9393 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9394 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9395 bsr.l scale_to_zero_src # calculate scale factor 9396 9397 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 9398 bge.b fneg_sd_unfl # yes; go handle underflow 9399 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 9400 beq.w fneg_sd_may_ovfl # maybe; go check 9401 blt.w fneg_sd_ovfl # yes; go handle overflow 9402 bra.w fneg_sd_normal # no; ho handle normalized op 9403 9404# 9405# operand WILL underflow when moved in to the fp register file 9406# 9407fneg_sd_unfl: 9408 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 9409 9410 eori.b &0x80,FP_SCR0_EX(%a6) # negate sign 9411 bpl.b fneg_sd_unfl_tst 9412 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit 9413 9414# if underflow or inexact is enabled, go calculate EXOP first. 9415fneg_sd_unfl_tst: 9416 mov.b FPCR_ENABLE(%a6),%d1 9417 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 9418 bne.b fneg_sd_unfl_ena # yes 9419 9420fneg_sd_unfl_dis: 9421 lea FP_SCR0(%a6),%a0 # pass: result addr 9422 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 9423 bsr.l unf_res # calculate default result 9424 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z' 9425 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 9426 rts 9427 9428# 9429# operand will underflow AND underflow is enabled. 9430# Therefore, we must return the result rounded to extended precision. 9431# 9432fneg_sd_unfl_ena: 9433 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 9434 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 9435 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 9436 9437 mov.l %d2,-(%sp) # save d2 9438 mov.l %d1,%d2 # make a copy 9439 andi.l &0x7fff,%d1 # strip sign 9440 andi.w &0x8000,%d2 # keep old sign 9441 sub.l %d0,%d1 # subtract scale factor 9442 addi.l &0x6000,%d1 # add new bias 9443 andi.w &0x7fff,%d1 9444 or.w %d2,%d1 # concat new sign,new exp 9445 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 9446 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 9447 mov.l (%sp)+,%d2 # restore d2 9448 bra.b fneg_sd_unfl_dis 9449 9450# 9451# operand WILL overflow. 9452# 9453fneg_sd_ovfl: 9454 fmov.l &0x0,%fpsr # clear FPSR 9455 fmov.l L_SCR3(%a6),%fpcr # set FPCR 9456 9457 fneg.x FP_SCR0(%a6),%fp0 # perform negation 9458 9459 fmov.l &0x0,%fpcr # clear FPCR 9460 fmov.l %fpsr,%d1 # save FPSR 9461 9462 or.l %d1,USER_FPSR(%a6) # save INEX2,N 9463 9464fneg_sd_ovfl_tst: 9465 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 9466 9467 mov.b FPCR_ENABLE(%a6),%d1 9468 andi.b &0x13,%d1 # is OVFL or INEX enabled? 9469 bne.b fneg_sd_ovfl_ena # yes 9470 9471# 9472# OVFL is not enabled; therefore, we must create the default result by 9473# calling ovf_res(). 9474# 9475fneg_sd_ovfl_dis: 9476 btst &neg_bit,FPSR_CC(%a6) # is result negative? 9477 sne %d1 # set sign param accordingly 9478 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 9479 bsr.l ovf_res # calculate default result 9480 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 9481 fmovm.x (%a0),&0x80 # return default result in fp0 9482 rts 9483 9484# 9485# OVFL is enabled. 9486# the INEX2 bit has already been updated by the round to the correct precision. 9487# now, round to extended(and don't alter the FPSR). 9488# 9489fneg_sd_ovfl_ena: 9490 mov.l %d2,-(%sp) # save d2 9491 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 9492 mov.l %d1,%d2 # make a copy 9493 andi.l &0x7fff,%d1 # strip sign 9494 andi.w &0x8000,%d2 # keep old sign 9495 sub.l %d0,%d1 # add scale factor 9496 subi.l &0x6000,%d1 # subtract bias 9497 andi.w &0x7fff,%d1 9498 or.w %d2,%d1 # concat sign,exp 9499 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 9500 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 9501 mov.l (%sp)+,%d2 # restore d2 9502 bra.b fneg_sd_ovfl_dis 9503 9504# 9505# the move in MAY underflow. so... 9506# 9507fneg_sd_may_ovfl: 9508 fmov.l &0x0,%fpsr # clear FPSR 9509 fmov.l L_SCR3(%a6),%fpcr # set FPCR 9510 9511 fneg.x FP_SCR0(%a6),%fp0 # perform negation 9512 9513 fmov.l %fpsr,%d1 # save status 9514 fmov.l &0x0,%fpcr # clear FPCR 9515 9516 or.l %d1,USER_FPSR(%a6) # save INEX2,N 9517 9518 fabs.x %fp0,%fp1 # make a copy of result 9519 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 9520 fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred 9521 9522# no, it didn't overflow; we have correct result 9523 bra.w fneg_sd_normal_exit 9524 9525########################################################################## 9526 9527# 9528# input is not normalized; what is it? 9529# 9530fneg_not_norm: 9531 cmpi.b %d1,&DENORM # weed out DENORM 9532 beq.w fneg_denorm 9533 cmpi.b %d1,&SNAN # weed out SNAN 9534 beq.l res_snan_1op 9535 cmpi.b %d1,&QNAN # weed out QNAN 9536 beq.l res_qnan_1op 9537 9538# 9539# do the fneg; at this point, only possible ops are ZERO and INF. 9540# use fneg to determine ccodes. 9541# prec:mode should be zero at this point but it won't affect answer anyways. 9542# 9543 fneg.x SRC_EX(%a0),%fp0 # do fneg 9544 fmov.l %fpsr,%d0 9545 rol.l &0x8,%d0 # put ccodes in lo byte 9546 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes 9547 rts 9548 9549######################################################################### 9550# XDEF **************************************************************** # 9551# ftst(): emulates the ftest instruction # 9552# # 9553# XREF **************************************************************** # 9554# res{s,q}nan_1op() - set NAN result for monadic instruction # 9555# # 9556# INPUT *************************************************************** # 9557# a0 = pointer to extended precision source operand # 9558# # 9559# OUTPUT ************************************************************** # 9560# none # 9561# # 9562# ALGORITHM *********************************************************** # 9563# Check the source operand tag (STAG) and set the FPCR according # 9564# to the operand type and sign. # 9565# # 9566######################################################################### 9567 9568 global ftst 9569ftst: 9570 mov.b STAG(%a6),%d1 9571 bne.b ftst_not_norm # optimize on non-norm input 9572 9573# 9574# Norm: 9575# 9576ftst_norm: 9577 tst.b SRC_EX(%a0) # is operand negative? 9578 bmi.b ftst_norm_m # yes 9579 rts 9580ftst_norm_m: 9581 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 9582 rts 9583 9584# 9585# input is not normalized; what is it? 9586# 9587ftst_not_norm: 9588 cmpi.b %d1,&ZERO # weed out ZERO 9589 beq.b ftst_zero 9590 cmpi.b %d1,&INF # weed out INF 9591 beq.b ftst_inf 9592 cmpi.b %d1,&SNAN # weed out SNAN 9593 beq.l res_snan_1op 9594 cmpi.b %d1,&QNAN # weed out QNAN 9595 beq.l res_qnan_1op 9596 9597# 9598# Denorm: 9599# 9600ftst_denorm: 9601 tst.b SRC_EX(%a0) # is operand negative? 9602 bmi.b ftst_denorm_m # yes 9603 rts 9604ftst_denorm_m: 9605 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit 9606 rts 9607 9608# 9609# Infinity: 9610# 9611ftst_inf: 9612 tst.b SRC_EX(%a0) # is operand negative? 9613 bmi.b ftst_inf_m # yes 9614ftst_inf_p: 9615 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 9616 rts 9617ftst_inf_m: 9618 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits 9619 rts 9620 9621# 9622# Zero: 9623# 9624ftst_zero: 9625 tst.b SRC_EX(%a0) # is operand negative? 9626 bmi.b ftst_zero_m # yes 9627ftst_zero_p: 9628 mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit 9629 rts 9630ftst_zero_m: 9631 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 9632 rts 9633 9634######################################################################### 9635# XDEF **************************************************************** # 9636# fint(): emulates the fint instruction # 9637# # 9638# XREF **************************************************************** # 9639# res_{s,q}nan_1op() - set NAN result for monadic operation # 9640# # 9641# INPUT *************************************************************** # 9642# a0 = pointer to extended precision source operand # 9643# d0 = round precision/mode # 9644# # 9645# OUTPUT ************************************************************** # 9646# fp0 = result # 9647# # 9648# ALGORITHM *********************************************************** # 9649# Separate according to operand type. Unnorms don't pass through # 9650# here. For norms, load the rounding mode/prec, execute a "fint", then # 9651# store the resulting FPSR bits. # 9652# For denorms, force the j-bit to a one and do the same as for # 9653# norms. Denorms are so low that the answer will either be a zero or a # 9654# one. # 9655# For zeroes/infs/NANs, return the same while setting the FPSR # 9656# as appropriate. # 9657# # 9658######################################################################### 9659 9660 global fint 9661fint: 9662 mov.b STAG(%a6),%d1 9663 bne.b fint_not_norm # optimize on non-norm input 9664 9665# 9666# Norm: 9667# 9668fint_norm: 9669 andi.b &0x30,%d0 # set prec = ext 9670 9671 fmov.l %d0,%fpcr # set FPCR 9672 fmov.l &0x0,%fpsr # clear FPSR 9673 9674 fint.x SRC(%a0),%fp0 # execute fint 9675 9676 fmov.l &0x0,%fpcr # clear FPCR 9677 fmov.l %fpsr,%d0 # save FPSR 9678 or.l %d0,USER_FPSR(%a6) # set exception bits 9679 9680 rts 9681 9682# 9683# input is not normalized; what is it? 9684# 9685fint_not_norm: 9686 cmpi.b %d1,&ZERO # weed out ZERO 9687 beq.b fint_zero 9688 cmpi.b %d1,&INF # weed out INF 9689 beq.b fint_inf 9690 cmpi.b %d1,&DENORM # weed out DENORM 9691 beq.b fint_denorm 9692 cmpi.b %d1,&SNAN # weed out SNAN 9693 beq.l res_snan_1op 9694 bra.l res_qnan_1op # weed out QNAN 9695 9696# 9697# Denorm: 9698# 9699# for DENORMs, the result will be either (+/-)ZERO or (+/-)1. 9700# also, the INEX2 and AINEX exception bits will be set. 9701# so, we could either set these manually or force the DENORM 9702# to a very small NORM and ship it to the NORM routine. 9703# I do the latter. 9704# 9705fint_denorm: 9706 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp 9707 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM 9708 lea FP_SCR0(%a6),%a0 9709 bra.b fint_norm 9710 9711# 9712# Zero: 9713# 9714fint_zero: 9715 tst.b SRC_EX(%a0) # is ZERO negative? 9716 bmi.b fint_zero_m # yes 9717fint_zero_p: 9718 fmov.s &0x00000000,%fp0 # return +ZERO in fp0 9719 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 9720 rts 9721fint_zero_m: 9722 fmov.s &0x80000000,%fp0 # return -ZERO in fp0 9723 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 9724 rts 9725 9726# 9727# Infinity: 9728# 9729fint_inf: 9730 fmovm.x SRC(%a0),&0x80 # return result in fp0 9731 tst.b SRC_EX(%a0) # is INF negative? 9732 bmi.b fint_inf_m # yes 9733fint_inf_p: 9734 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 9735 rts 9736fint_inf_m: 9737 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits 9738 rts 9739 9740######################################################################### 9741# XDEF **************************************************************** # 9742# fintrz(): emulates the fintrz instruction # 9743# # 9744# XREF **************************************************************** # 9745# res_{s,q}nan_1op() - set NAN result for monadic operation # 9746# # 9747# INPUT *************************************************************** # 9748# a0 = pointer to extended precision source operand # 9749# d0 = round precision/mode # 9750# # 9751# OUTPUT ************************************************************** # 9752# fp0 = result # 9753# # 9754# ALGORITHM *********************************************************** # 9755# Separate according to operand type. Unnorms don't pass through # 9756# here. For norms, load the rounding mode/prec, execute a "fintrz", # 9757# then store the resulting FPSR bits. # 9758# For denorms, force the j-bit to a one and do the same as for # 9759# norms. Denorms are so low that the answer will either be a zero or a # 9760# one. # 9761# For zeroes/infs/NANs, return the same while setting the FPSR # 9762# as appropriate. # 9763# # 9764######################################################################### 9765 9766 global fintrz 9767fintrz: 9768 mov.b STAG(%a6),%d1 9769 bne.b fintrz_not_norm # optimize on non-norm input 9770 9771# 9772# Norm: 9773# 9774fintrz_norm: 9775 fmov.l &0x0,%fpsr # clear FPSR 9776 9777 fintrz.x SRC(%a0),%fp0 # execute fintrz 9778 9779 fmov.l %fpsr,%d0 # save FPSR 9780 or.l %d0,USER_FPSR(%a6) # set exception bits 9781 9782 rts 9783 9784# 9785# input is not normalized; what is it? 9786# 9787fintrz_not_norm: 9788 cmpi.b %d1,&ZERO # weed out ZERO 9789 beq.b fintrz_zero 9790 cmpi.b %d1,&INF # weed out INF 9791 beq.b fintrz_inf 9792 cmpi.b %d1,&DENORM # weed out DENORM 9793 beq.b fintrz_denorm 9794 cmpi.b %d1,&SNAN # weed out SNAN 9795 beq.l res_snan_1op 9796 bra.l res_qnan_1op # weed out QNAN 9797 9798# 9799# Denorm: 9800# 9801# for DENORMs, the result will be (+/-)ZERO. 9802# also, the INEX2 and AINEX exception bits will be set. 9803# so, we could either set these manually or force the DENORM 9804# to a very small NORM and ship it to the NORM routine. 9805# I do the latter. 9806# 9807fintrz_denorm: 9808 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp 9809 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM 9810 lea FP_SCR0(%a6),%a0 9811 bra.b fintrz_norm 9812 9813# 9814# Zero: 9815# 9816fintrz_zero: 9817 tst.b SRC_EX(%a0) # is ZERO negative? 9818 bmi.b fintrz_zero_m # yes 9819fintrz_zero_p: 9820 fmov.s &0x00000000,%fp0 # return +ZERO in fp0 9821 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 9822 rts 9823fintrz_zero_m: 9824 fmov.s &0x80000000,%fp0 # return -ZERO in fp0 9825 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 9826 rts 9827 9828# 9829# Infinity: 9830# 9831fintrz_inf: 9832 fmovm.x SRC(%a0),&0x80 # return result in fp0 9833 tst.b SRC_EX(%a0) # is INF negative? 9834 bmi.b fintrz_inf_m # yes 9835fintrz_inf_p: 9836 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 9837 rts 9838fintrz_inf_m: 9839 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits 9840 rts 9841 9842######################################################################### 9843# XDEF **************************************************************** # 9844# fabs(): emulates the fabs instruction # 9845# fsabs(): emulates the fsabs instruction # 9846# fdabs(): emulates the fdabs instruction # 9847# # 9848# XREF **************************************************************** # 9849# norm() - normalize denorm mantissa to provide EXOP # 9850# scale_to_zero_src() - make exponent. = 0; get scale factor # 9851# unf_res() - calculate underflow result # 9852# ovf_res() - calculate overflow result # 9853# res_{s,q}nan_1op() - set NAN result for monadic operation # 9854# # 9855# INPUT *************************************************************** # 9856# a0 = pointer to extended precision source operand # 9857# d0 = rnd precision/mode # 9858# # 9859# OUTPUT ************************************************************** # 9860# fp0 = result # 9861# fp1 = EXOP (if exception occurred) # 9862# # 9863# ALGORITHM *********************************************************** # 9864# Handle NANs, infinities, and zeroes as special cases. Divide # 9865# norms into extended, single, and double precision. # 9866# Simply clear sign for extended precision norm. Ext prec denorm # 9867# gets an EXOP created for it since it's an underflow. # 9868# Double and single precision can overflow and underflow. First, # 9869# scale the operand such that the exponent is zero. Perform an "fabs" # 9870# using the correct rnd mode/prec. Check to see if the original # 9871# exponent would take an exception. If so, use unf_res() or ovf_res() # 9872# to calculate the default result. Also, create the EXOP for the # 9873# exceptional case. If no exception should occur, insert the correct # 9874# result exponent and return. # 9875# Unnorms don't pass through here. # 9876# # 9877######################################################################### 9878 9879 global fsabs 9880fsabs: 9881 andi.b &0x30,%d0 # clear rnd prec 9882 ori.b &s_mode*0x10,%d0 # insert sgl precision 9883 bra.b fabs 9884 9885 global fdabs 9886fdabs: 9887 andi.b &0x30,%d0 # clear rnd prec 9888 ori.b &d_mode*0x10,%d0 # insert dbl precision 9889 9890 global fabs 9891fabs: 9892 mov.l %d0,L_SCR3(%a6) # store rnd info 9893 mov.b STAG(%a6),%d1 9894 bne.w fabs_not_norm # optimize on non-norm input 9895 9896# 9897# ABSOLUTE VALUE: norms and denorms ONLY! 9898# 9899fabs_norm: 9900 andi.b &0xc0,%d0 # is precision extended? 9901 bne.b fabs_not_ext # no; go handle sgl or dbl 9902 9903# 9904# precision selected is extended. so...we can not get an underflow 9905# or overflow because of rounding to the correct precision. so... 9906# skip the scaling and unscaling... 9907# 9908 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9909 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9910 mov.w SRC_EX(%a0),%d1 9911 bclr &15,%d1 # force absolute value 9912 mov.w %d1,FP_SCR0_EX(%a6) # insert exponent 9913 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 9914 rts 9915 9916# 9917# for an extended precision DENORM, the UNFL exception bit is set 9918# the accrued bit is NOT set in this instance(no inexactness!) 9919# 9920fabs_denorm: 9921 andi.b &0xc0,%d0 # is precision extended? 9922 bne.b fabs_not_ext # no 9923 9924 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 9925 9926 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9927 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9928 mov.w SRC_EX(%a0),%d0 9929 bclr &15,%d0 # clear sign 9930 mov.w %d0,FP_SCR0_EX(%a6) # insert exponent 9931 9932 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 9933 9934 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled? 9935 bne.b fabs_ext_unfl_ena 9936 rts 9937 9938# 9939# the input is an extended DENORM and underflow is enabled in the FPCR. 9940# normalize the mantissa and add the bias of 0x6000 to the resulting negative 9941# exponent and insert back into the operand. 9942# 9943fabs_ext_unfl_ena: 9944 lea FP_SCR0(%a6),%a0 # pass: ptr to operand 9945 bsr.l norm # normalize result 9946 neg.w %d0 # new exponent = -(shft val) 9947 addi.w &0x6000,%d0 # add new bias to exponent 9948 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp 9949 andi.w &0x8000,%d1 # keep old sign 9950 andi.w &0x7fff,%d0 # clear sign position 9951 or.w %d1,%d0 # concat old sign, new exponent 9952 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent 9953 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 9954 rts 9955 9956# 9957# operand is either single or double 9958# 9959fabs_not_ext: 9960 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 9961 bne.b fabs_dbl 9962 9963# 9964# operand is to be rounded to single precision 9965# 9966fabs_sgl: 9967 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 9968 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 9969 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 9970 bsr.l scale_to_zero_src # calculate scale factor 9971 9972 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow? 9973 bge.w fabs_sd_unfl # yes; go handle underflow 9974 cmpi.l %d0,&0x3fff-0x407e # will move in overflow? 9975 beq.w fabs_sd_may_ovfl # maybe; go check 9976 blt.w fabs_sd_ovfl # yes; go handle overflow 9977 9978# 9979# operand will NOT overflow or underflow when moved in to the fp reg file 9980# 9981fabs_sd_normal: 9982 fmov.l &0x0,%fpsr # clear FPSR 9983 fmov.l L_SCR3(%a6),%fpcr # set FPCR 9984 9985 fabs.x FP_SCR0(%a6),%fp0 # perform absolute 9986 9987 fmov.l %fpsr,%d1 # save FPSR 9988 fmov.l &0x0,%fpcr # clear FPCR 9989 9990 or.l %d1,USER_FPSR(%a6) # save INEX2,N 9991 9992fabs_sd_normal_exit: 9993 mov.l %d2,-(%sp) # save d2 9994 fmovm.x &0x80,FP_SCR0(%a6) # store out result 9995 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 9996 mov.l %d1,%d2 # make a copy 9997 andi.l &0x7fff,%d1 # strip sign 9998 sub.l %d0,%d1 # add scale factor 9999 andi.w &0x8000,%d2 # keep old sign 10000 or.w %d1,%d2 # concat old sign,new exp 10001 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 10002 mov.l (%sp)+,%d2 # restore d2 10003 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 10004 rts 10005 10006# 10007# operand is to be rounded to double precision 10008# 10009fabs_dbl: 10010 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10011 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 10012 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10013 bsr.l scale_to_zero_src # calculate scale factor 10014 10015 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow? 10016 bge.b fabs_sd_unfl # yes; go handle underflow 10017 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow? 10018 beq.w fabs_sd_may_ovfl # maybe; go check 10019 blt.w fabs_sd_ovfl # yes; go handle overflow 10020 bra.w fabs_sd_normal # no; ho handle normalized op 10021 10022# 10023# operand WILL underflow when moved in to the fp register file 10024# 10025fabs_sd_unfl: 10026 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 10027 10028 bclr &0x7,FP_SCR0_EX(%a6) # force absolute value 10029 10030# if underflow or inexact is enabled, go calculate EXOP first. 10031 mov.b FPCR_ENABLE(%a6),%d1 10032 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 10033 bne.b fabs_sd_unfl_ena # yes 10034 10035fabs_sd_unfl_dis: 10036 lea FP_SCR0(%a6),%a0 # pass: result addr 10037 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 10038 bsr.l unf_res # calculate default result 10039 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode 10040 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 10041 rts 10042 10043# 10044# operand will underflow AND underflow is enabled. 10045# Therefore, we must return the result rounded to extended precision. 10046# 10047fabs_sd_unfl_ena: 10048 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 10049 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 10050 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 10051 10052 mov.l %d2,-(%sp) # save d2 10053 mov.l %d1,%d2 # make a copy 10054 andi.l &0x7fff,%d1 # strip sign 10055 andi.w &0x8000,%d2 # keep old sign 10056 sub.l %d0,%d1 # subtract scale factor 10057 addi.l &0x6000,%d1 # add new bias 10058 andi.w &0x7fff,%d1 10059 or.w %d2,%d1 # concat new sign,new exp 10060 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 10061 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 10062 mov.l (%sp)+,%d2 # restore d2 10063 bra.b fabs_sd_unfl_dis 10064 10065# 10066# operand WILL overflow. 10067# 10068fabs_sd_ovfl: 10069 fmov.l &0x0,%fpsr # clear FPSR 10070 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10071 10072 fabs.x FP_SCR0(%a6),%fp0 # perform absolute 10073 10074 fmov.l &0x0,%fpcr # clear FPCR 10075 fmov.l %fpsr,%d1 # save FPSR 10076 10077 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10078 10079fabs_sd_ovfl_tst: 10080 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 10081 10082 mov.b FPCR_ENABLE(%a6),%d1 10083 andi.b &0x13,%d1 # is OVFL or INEX enabled? 10084 bne.b fabs_sd_ovfl_ena # yes 10085 10086# 10087# OVFL is not enabled; therefore, we must create the default result by 10088# calling ovf_res(). 10089# 10090fabs_sd_ovfl_dis: 10091 btst &neg_bit,FPSR_CC(%a6) # is result negative? 10092 sne %d1 # set sign param accordingly 10093 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 10094 bsr.l ovf_res # calculate default result 10095 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 10096 fmovm.x (%a0),&0x80 # return default result in fp0 10097 rts 10098 10099# 10100# OVFL is enabled. 10101# the INEX2 bit has already been updated by the round to the correct precision. 10102# now, round to extended(and don't alter the FPSR). 10103# 10104fabs_sd_ovfl_ena: 10105 mov.l %d2,-(%sp) # save d2 10106 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10107 mov.l %d1,%d2 # make a copy 10108 andi.l &0x7fff,%d1 # strip sign 10109 andi.w &0x8000,%d2 # keep old sign 10110 sub.l %d0,%d1 # add scale factor 10111 subi.l &0x6000,%d1 # subtract bias 10112 andi.w &0x7fff,%d1 10113 or.w %d2,%d1 # concat sign,exp 10114 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10115 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10116 mov.l (%sp)+,%d2 # restore d2 10117 bra.b fabs_sd_ovfl_dis 10118 10119# 10120# the move in MAY underflow. so... 10121# 10122fabs_sd_may_ovfl: 10123 fmov.l &0x0,%fpsr # clear FPSR 10124 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10125 10126 fabs.x FP_SCR0(%a6),%fp0 # perform absolute 10127 10128 fmov.l %fpsr,%d1 # save status 10129 fmov.l &0x0,%fpcr # clear FPCR 10130 10131 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10132 10133 fabs.x %fp0,%fp1 # make a copy of result 10134 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 10135 fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred 10136 10137# no, it didn't overflow; we have correct result 10138 bra.w fabs_sd_normal_exit 10139 10140########################################################################## 10141 10142# 10143# input is not normalized; what is it? 10144# 10145fabs_not_norm: 10146 cmpi.b %d1,&DENORM # weed out DENORM 10147 beq.w fabs_denorm 10148 cmpi.b %d1,&SNAN # weed out SNAN 10149 beq.l res_snan_1op 10150 cmpi.b %d1,&QNAN # weed out QNAN 10151 beq.l res_qnan_1op 10152 10153 fabs.x SRC(%a0),%fp0 # force absolute value 10154 10155 cmpi.b %d1,&INF # weed out INF 10156 beq.b fabs_inf 10157fabs_zero: 10158 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 10159 rts 10160fabs_inf: 10161 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 10162 rts 10163 10164######################################################################### 10165# XDEF **************************************************************** # 10166# fcmp(): fp compare op routine # 10167# # 10168# XREF **************************************************************** # 10169# res_qnan() - return QNAN result # 10170# res_snan() - return SNAN result # 10171# # 10172# INPUT *************************************************************** # 10173# a0 = pointer to extended precision source operand # 10174# a1 = pointer to extended precision destination operand # 10175# d0 = round prec/mode # 10176# # 10177# OUTPUT ************************************************************** # 10178# None # 10179# # 10180# ALGORITHM *********************************************************** # 10181# Handle NANs and denorms as special cases. For everything else, # 10182# just use the actual fcmp instruction to produce the correct condition # 10183# codes. # 10184# # 10185######################################################################### 10186 10187 global fcmp 10188fcmp: 10189 clr.w %d1 10190 mov.b DTAG(%a6),%d1 10191 lsl.b &0x3,%d1 10192 or.b STAG(%a6),%d1 10193 bne.b fcmp_not_norm # optimize on non-norm input 10194 10195# 10196# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs 10197# 10198fcmp_norm: 10199 fmovm.x DST(%a1),&0x80 # load dst op 10200 10201 fcmp.x %fp0,SRC(%a0) # do compare 10202 10203 fmov.l %fpsr,%d0 # save FPSR 10204 rol.l &0x8,%d0 # extract ccode bits 10205 mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set) 10206 10207 rts 10208 10209# 10210# fcmp: inputs are not both normalized; what are they? 10211# 10212fcmp_not_norm: 10213 mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1 10214 jmp (tbl_fcmp_op.b,%pc,%d1.w*1) 10215 10216 swbeg &48 10217tbl_fcmp_op: 10218 short fcmp_norm - tbl_fcmp_op # NORM - NORM 10219 short fcmp_norm - tbl_fcmp_op # NORM - ZERO 10220 short fcmp_norm - tbl_fcmp_op # NORM - INF 10221 short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN 10222 short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM 10223 short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN 10224 short tbl_fcmp_op - tbl_fcmp_op # 10225 short tbl_fcmp_op - tbl_fcmp_op # 10226 10227 short fcmp_norm - tbl_fcmp_op # ZERO - NORM 10228 short fcmp_norm - tbl_fcmp_op # ZERO - ZERO 10229 short fcmp_norm - tbl_fcmp_op # ZERO - INF 10230 short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN 10231 short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM 10232 short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN 10233 short tbl_fcmp_op - tbl_fcmp_op # 10234 short tbl_fcmp_op - tbl_fcmp_op # 10235 10236 short fcmp_norm - tbl_fcmp_op # INF - NORM 10237 short fcmp_norm - tbl_fcmp_op # INF - ZERO 10238 short fcmp_norm - tbl_fcmp_op # INF - INF 10239 short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN 10240 short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM 10241 short fcmp_res_snan - tbl_fcmp_op # INF - SNAN 10242 short tbl_fcmp_op - tbl_fcmp_op # 10243 short tbl_fcmp_op - tbl_fcmp_op # 10244 10245 short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM 10246 short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO 10247 short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF 10248 short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN 10249 short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM 10250 short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN 10251 short tbl_fcmp_op - tbl_fcmp_op # 10252 short tbl_fcmp_op - tbl_fcmp_op # 10253 10254 short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM 10255 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO 10256 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF 10257 short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN 10258 short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM 10259 short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN 10260 short tbl_fcmp_op - tbl_fcmp_op # 10261 short tbl_fcmp_op - tbl_fcmp_op # 10262 10263 short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM 10264 short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO 10265 short fcmp_res_snan - tbl_fcmp_op # SNAN - INF 10266 short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN 10267 short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM 10268 short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN 10269 short tbl_fcmp_op - tbl_fcmp_op # 10270 short tbl_fcmp_op - tbl_fcmp_op # 10271 10272# unlike all other functions for QNAN and SNAN, fcmp does NOT set the 10273# 'N' bit for a negative QNAN or SNAN input so we must squelch it here. 10274fcmp_res_qnan: 10275 bsr.l res_qnan 10276 andi.b &0xf7,FPSR_CC(%a6) 10277 rts 10278fcmp_res_snan: 10279 bsr.l res_snan 10280 andi.b &0xf7,FPSR_CC(%a6) 10281 rts 10282 10283# 10284# DENORMs are a little more difficult. 10285# If you have a 2 DENORMs, then you can just force the j-bit to a one 10286# and use the fcmp_norm routine. 10287# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one 10288# and use the fcmp_norm routine. 10289# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also. 10290# But with a DENORM and a NORM of the same sign, the neg bit is set if the 10291# (1) signs are (+) and the DENORM is the dst or 10292# (2) signs are (-) and the DENORM is the src 10293# 10294 10295fcmp_dnrm_s: 10296 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10297 mov.l SRC_HI(%a0),%d0 10298 bset &31,%d0 # DENORM src; make into small norm 10299 mov.l %d0,FP_SCR0_HI(%a6) 10300 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10301 lea FP_SCR0(%a6),%a0 10302 bra.w fcmp_norm 10303 10304fcmp_dnrm_d: 10305 mov.l DST_EX(%a1),FP_SCR0_EX(%a6) 10306 mov.l DST_HI(%a1),%d0 10307 bset &31,%d0 # DENORM src; make into small norm 10308 mov.l %d0,FP_SCR0_HI(%a6) 10309 mov.l DST_LO(%a1),FP_SCR0_LO(%a6) 10310 lea FP_SCR0(%a6),%a1 10311 bra.w fcmp_norm 10312 10313fcmp_dnrm_sd: 10314 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 10315 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10316 mov.l DST_HI(%a1),%d0 10317 bset &31,%d0 # DENORM dst; make into small norm 10318 mov.l %d0,FP_SCR1_HI(%a6) 10319 mov.l SRC_HI(%a0),%d0 10320 bset &31,%d0 # DENORM dst; make into small norm 10321 mov.l %d0,FP_SCR0_HI(%a6) 10322 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 10323 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10324 lea FP_SCR1(%a6),%a1 10325 lea FP_SCR0(%a6),%a0 10326 bra.w fcmp_norm 10327 10328fcmp_nrm_dnrm: 10329 mov.b SRC_EX(%a0),%d0 # determine if like signs 10330 mov.b DST_EX(%a1),%d1 10331 eor.b %d0,%d1 10332 bmi.w fcmp_dnrm_s 10333 10334# signs are the same, so must determine the answer ourselves. 10335 tst.b %d0 # is src op negative? 10336 bmi.b fcmp_nrm_dnrm_m # yes 10337 rts 10338fcmp_nrm_dnrm_m: 10339 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 10340 rts 10341 10342fcmp_dnrm_nrm: 10343 mov.b SRC_EX(%a0),%d0 # determine if like signs 10344 mov.b DST_EX(%a1),%d1 10345 eor.b %d0,%d1 10346 bmi.w fcmp_dnrm_d 10347 10348# signs are the same, so must determine the answer ourselves. 10349 tst.b %d0 # is src op negative? 10350 bpl.b fcmp_dnrm_nrm_m # no 10351 rts 10352fcmp_dnrm_nrm_m: 10353 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 10354 rts 10355 10356######################################################################### 10357# XDEF **************************************************************** # 10358# fsglmul(): emulates the fsglmul instruction # 10359# # 10360# XREF **************************************************************** # 10361# scale_to_zero_src() - scale src exponent to zero # 10362# scale_to_zero_dst() - scale dst exponent to zero # 10363# unf_res4() - return default underflow result for sglop # 10364# ovf_res() - return default overflow result # 10365# res_qnan() - return QNAN result # 10366# res_snan() - return SNAN result # 10367# # 10368# INPUT *************************************************************** # 10369# a0 = pointer to extended precision source operand # 10370# a1 = pointer to extended precision destination operand # 10371# d0 rnd prec,mode # 10372# # 10373# OUTPUT ************************************************************** # 10374# fp0 = result # 10375# fp1 = EXOP (if exception occurred) # 10376# # 10377# ALGORITHM *********************************************************** # 10378# Handle NANs, infinities, and zeroes as special cases. Divide # 10379# norms/denorms into ext/sgl/dbl precision. # 10380# For norms/denorms, scale the exponents such that a multiply # 10381# instruction won't cause an exception. Use the regular fsglmul to # 10382# compute a result. Check if the regular operands would have taken # 10383# an exception. If so, return the default overflow/underflow result # 10384# and return the EXOP if exceptions are enabled. Else, scale the # 10385# result operand to the proper exponent. # 10386# # 10387######################################################################### 10388 10389 global fsglmul 10390fsglmul: 10391 mov.l %d0,L_SCR3(%a6) # store rnd info 10392 10393 clr.w %d1 10394 mov.b DTAG(%a6),%d1 10395 lsl.b &0x3,%d1 10396 or.b STAG(%a6),%d1 10397 10398 bne.w fsglmul_not_norm # optimize on non-norm input 10399 10400fsglmul_norm: 10401 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 10402 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 10403 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 10404 10405 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10406 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 10407 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10408 10409 bsr.l scale_to_zero_src # scale exponent 10410 mov.l %d0,-(%sp) # save scale factor 1 10411 10412 bsr.l scale_to_zero_dst # scale dst exponent 10413 10414 add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2 10415 10416 cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl? 10417 beq.w fsglmul_may_ovfl # result may rnd to overflow 10418 blt.w fsglmul_ovfl # result will overflow 10419 10420 cmpi.l %d0,&0x3fff+0x0001 # would result unfl? 10421 beq.w fsglmul_may_unfl # result may rnd to no unfl 10422 bgt.w fsglmul_unfl # result will underflow 10423 10424fsglmul_normal: 10425 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10426 10427 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10428 fmov.l &0x0,%fpsr # clear FPSR 10429 10430 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10431 10432 fmov.l %fpsr,%d1 # save status 10433 fmov.l &0x0,%fpcr # clear FPCR 10434 10435 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10436 10437fsglmul_normal_exit: 10438 fmovm.x &0x80,FP_SCR0(%a6) # store out result 10439 mov.l %d2,-(%sp) # save d2 10440 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 10441 mov.l %d1,%d2 # make a copy 10442 andi.l &0x7fff,%d1 # strip sign 10443 andi.w &0x8000,%d2 # keep old sign 10444 sub.l %d0,%d1 # add scale factor 10445 or.w %d2,%d1 # concat old sign,new exp 10446 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10447 mov.l (%sp)+,%d2 # restore d2 10448 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 10449 rts 10450 10451fsglmul_ovfl: 10452 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10453 10454 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10455 fmov.l &0x0,%fpsr # clear FPSR 10456 10457 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10458 10459 fmov.l %fpsr,%d1 # save status 10460 fmov.l &0x0,%fpcr # clear FPCR 10461 10462 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10463 10464fsglmul_ovfl_tst: 10465 10466# save setting this until now because this is where fsglmul_may_ovfl may jump in 10467 or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex 10468 10469 mov.b FPCR_ENABLE(%a6),%d1 10470 andi.b &0x13,%d1 # is OVFL or INEX enabled? 10471 bne.b fsglmul_ovfl_ena # yes 10472 10473fsglmul_ovfl_dis: 10474 btst &neg_bit,FPSR_CC(%a6) # is result negative? 10475 sne %d1 # set sign param accordingly 10476 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 10477 andi.b &0x30,%d0 # force prec = ext 10478 bsr.l ovf_res # calculate default result 10479 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 10480 fmovm.x (%a0),&0x80 # return default result in fp0 10481 rts 10482 10483fsglmul_ovfl_ena: 10484 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 10485 10486 mov.l %d2,-(%sp) # save d2 10487 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10488 mov.l %d1,%d2 # make a copy 10489 andi.l &0x7fff,%d1 # strip sign 10490 sub.l %d0,%d1 # add scale factor 10491 subi.l &0x6000,%d1 # subtract bias 10492 andi.w &0x7fff,%d1 10493 andi.w &0x8000,%d2 # keep old sign 10494 or.w %d2,%d1 # concat old sign,new exp 10495 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10496 mov.l (%sp)+,%d2 # restore d2 10497 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10498 bra.b fsglmul_ovfl_dis 10499 10500fsglmul_may_ovfl: 10501 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10502 10503 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10504 fmov.l &0x0,%fpsr # clear FPSR 10505 10506 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10507 10508 fmov.l %fpsr,%d1 # save status 10509 fmov.l &0x0,%fpcr # clear FPCR 10510 10511 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10512 10513 fabs.x %fp0,%fp1 # make a copy of result 10514 fcmp.b %fp1,&0x2 # is |result| >= 2.b? 10515 fbge.w fsglmul_ovfl_tst # yes; overflow has occurred 10516 10517# no, it didn't overflow; we have correct result 10518 bra.w fsglmul_normal_exit 10519 10520fsglmul_unfl: 10521 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 10522 10523 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10524 10525 fmov.l &rz_mode*0x10,%fpcr # set FPCR 10526 fmov.l &0x0,%fpsr # clear FPSR 10527 10528 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10529 10530 fmov.l %fpsr,%d1 # save status 10531 fmov.l &0x0,%fpcr # clear FPCR 10532 10533 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10534 10535 mov.b FPCR_ENABLE(%a6),%d1 10536 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 10537 bne.b fsglmul_unfl_ena # yes 10538 10539fsglmul_unfl_dis: 10540 fmovm.x &0x80,FP_SCR0(%a6) # store out result 10541 10542 lea FP_SCR0(%a6),%a0 # pass: result addr 10543 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 10544 bsr.l unf_res4 # calculate default result 10545 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 10546 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 10547 rts 10548 10549# 10550# UNFL is enabled. 10551# 10552fsglmul_unfl_ena: 10553 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 10554 10555 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10556 fmov.l &0x0,%fpsr # clear FPSR 10557 10558 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply 10559 10560 fmov.l &0x0,%fpcr # clear FPCR 10561 10562 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 10563 mov.l %d2,-(%sp) # save d2 10564 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10565 mov.l %d1,%d2 # make a copy 10566 andi.l &0x7fff,%d1 # strip sign 10567 andi.w &0x8000,%d2 # keep old sign 10568 sub.l %d0,%d1 # add scale factor 10569 addi.l &0x6000,%d1 # add bias 10570 andi.w &0x7fff,%d1 10571 or.w %d2,%d1 # concat old sign,new exp 10572 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10573 mov.l (%sp)+,%d2 # restore d2 10574 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10575 bra.w fsglmul_unfl_dis 10576 10577fsglmul_may_unfl: 10578 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10579 10580 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10581 fmov.l &0x0,%fpsr # clear FPSR 10582 10583 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply 10584 10585 fmov.l %fpsr,%d1 # save status 10586 fmov.l &0x0,%fpcr # clear FPCR 10587 10588 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10589 10590 fabs.x %fp0,%fp1 # make a copy of result 10591 fcmp.b %fp1,&0x2 # is |result| > 2.b? 10592 fbgt.w fsglmul_normal_exit # no; no underflow occurred 10593 fblt.w fsglmul_unfl # yes; underflow occurred 10594 10595# 10596# we still don't know if underflow occurred. result is ~ equal to 2. but, 10597# we don't know if the result was an underflow that rounded up to a 2 or 10598# a normalized number that rounded down to a 2. so, redo the entire operation 10599# using RZ as the rounding mode to see what the pre-rounded result is. 10600# this case should be relatively rare. 10601# 10602 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 10603 10604 mov.l L_SCR3(%a6),%d1 10605 andi.b &0xc0,%d1 # keep rnd prec 10606 ori.b &rz_mode*0x10,%d1 # insert RZ 10607 10608 fmov.l %d1,%fpcr # set FPCR 10609 fmov.l &0x0,%fpsr # clear FPSR 10610 10611 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply 10612 10613 fmov.l &0x0,%fpcr # clear FPCR 10614 fabs.x %fp1 # make absolute value 10615 fcmp.b %fp1,&0x2 # is |result| < 2.b? 10616 fbge.w fsglmul_normal_exit # no; no underflow occurred 10617 bra.w fsglmul_unfl # yes, underflow occurred 10618 10619############################################################################## 10620 10621# 10622# Single Precision Multiply: inputs are not both normalized; what are they? 10623# 10624fsglmul_not_norm: 10625 mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1 10626 jmp (tbl_fsglmul_op.b,%pc,%d1.w*1) 10627 10628 swbeg &48 10629tbl_fsglmul_op: 10630 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM 10631 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO 10632 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF 10633 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN 10634 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM 10635 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN 10636 short tbl_fsglmul_op - tbl_fsglmul_op # 10637 short tbl_fsglmul_op - tbl_fsglmul_op # 10638 10639 short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM 10640 short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO 10641 short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF 10642 short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN 10643 short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM 10644 short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN 10645 short tbl_fsglmul_op - tbl_fsglmul_op # 10646 short tbl_fsglmul_op - tbl_fsglmul_op # 10647 10648 short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM 10649 short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO 10650 short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF 10651 short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN 10652 short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM 10653 short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN 10654 short tbl_fsglmul_op - tbl_fsglmul_op # 10655 short tbl_fsglmul_op - tbl_fsglmul_op # 10656 10657 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM 10658 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO 10659 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF 10660 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN 10661 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM 10662 short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN 10663 short tbl_fsglmul_op - tbl_fsglmul_op # 10664 short tbl_fsglmul_op - tbl_fsglmul_op # 10665 10666 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM 10667 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO 10668 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF 10669 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN 10670 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM 10671 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN 10672 short tbl_fsglmul_op - tbl_fsglmul_op # 10673 short tbl_fsglmul_op - tbl_fsglmul_op # 10674 10675 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM 10676 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO 10677 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF 10678 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN 10679 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM 10680 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN 10681 short tbl_fsglmul_op - tbl_fsglmul_op # 10682 short tbl_fsglmul_op - tbl_fsglmul_op # 10683 10684fsglmul_res_operr: 10685 bra.l res_operr 10686fsglmul_res_snan: 10687 bra.l res_snan 10688fsglmul_res_qnan: 10689 bra.l res_qnan 10690fsglmul_zero: 10691 bra.l fmul_zero 10692fsglmul_inf_src: 10693 bra.l fmul_inf_src 10694fsglmul_inf_dst: 10695 bra.l fmul_inf_dst 10696 10697######################################################################### 10698# XDEF **************************************************************** # 10699# fsgldiv(): emulates the fsgldiv instruction # 10700# # 10701# XREF **************************************************************** # 10702# scale_to_zero_src() - scale src exponent to zero # 10703# scale_to_zero_dst() - scale dst exponent to zero # 10704# unf_res4() - return default underflow result for sglop # 10705# ovf_res() - return default overflow result # 10706# res_qnan() - return QNAN result # 10707# res_snan() - return SNAN result # 10708# # 10709# INPUT *************************************************************** # 10710# a0 = pointer to extended precision source operand # 10711# a1 = pointer to extended precision destination operand # 10712# d0 rnd prec,mode # 10713# # 10714# OUTPUT ************************************************************** # 10715# fp0 = result # 10716# fp1 = EXOP (if exception occurred) # 10717# # 10718# ALGORITHM *********************************************************** # 10719# Handle NANs, infinities, and zeroes as special cases. Divide # 10720# norms/denorms into ext/sgl/dbl precision. # 10721# For norms/denorms, scale the exponents such that a divide # 10722# instruction won't cause an exception. Use the regular fsgldiv to # 10723# compute a result. Check if the regular operands would have taken # 10724# an exception. If so, return the default overflow/underflow result # 10725# and return the EXOP if exceptions are enabled. Else, scale the # 10726# result operand to the proper exponent. # 10727# # 10728######################################################################### 10729 10730 global fsgldiv 10731fsgldiv: 10732 mov.l %d0,L_SCR3(%a6) # store rnd info 10733 10734 clr.w %d1 10735 mov.b DTAG(%a6),%d1 10736 lsl.b &0x3,%d1 10737 or.b STAG(%a6),%d1 # combine src tags 10738 10739 bne.w fsgldiv_not_norm # optimize on non-norm input 10740 10741# 10742# DIVIDE: NORMs and DENORMs ONLY! 10743# 10744fsgldiv_norm: 10745 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 10746 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 10747 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 10748 10749 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 10750 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 10751 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 10752 10753 bsr.l scale_to_zero_src # calculate scale factor 1 10754 mov.l %d0,-(%sp) # save scale factor 1 10755 10756 bsr.l scale_to_zero_dst # calculate scale factor 2 10757 10758 neg.l (%sp) # S.F. = scale1 - scale2 10759 add.l %d0,(%sp) 10760 10761 mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode 10762 lsr.b &0x6,%d1 10763 mov.l (%sp)+,%d0 10764 cmpi.l %d0,&0x3fff-0x7ffe 10765 ble.w fsgldiv_may_ovfl 10766 10767 cmpi.l %d0,&0x3fff-0x0000 # will result underflow? 10768 beq.w fsgldiv_may_unfl # maybe 10769 bgt.w fsgldiv_unfl # yes; go handle underflow 10770 10771fsgldiv_normal: 10772 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10773 10774 fmov.l L_SCR3(%a6),%fpcr # save FPCR 10775 fmov.l &0x0,%fpsr # clear FPSR 10776 10777 fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide 10778 10779 fmov.l %fpsr,%d1 # save FPSR 10780 fmov.l &0x0,%fpcr # clear FPCR 10781 10782 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10783 10784fsgldiv_normal_exit: 10785 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack 10786 mov.l %d2,-(%sp) # save d2 10787 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp} 10788 mov.l %d1,%d2 # make a copy 10789 andi.l &0x7fff,%d1 # strip sign 10790 andi.w &0x8000,%d2 # keep old sign 10791 sub.l %d0,%d1 # add scale factor 10792 or.w %d2,%d1 # concat old sign,new exp 10793 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10794 mov.l (%sp)+,%d2 # restore d2 10795 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 10796 rts 10797 10798fsgldiv_may_ovfl: 10799 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10800 10801 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10802 fmov.l &0x0,%fpsr # set FPSR 10803 10804 fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide 10805 10806 fmov.l %fpsr,%d1 10807 fmov.l &0x0,%fpcr 10808 10809 or.l %d1,USER_FPSR(%a6) # save INEX,N 10810 10811 fmovm.x &0x01,-(%sp) # save result to stack 10812 mov.w (%sp),%d1 # fetch new exponent 10813 add.l &0xc,%sp # clear result 10814 andi.l &0x7fff,%d1 # strip sign 10815 sub.l %d0,%d1 # add scale factor 10816 cmp.l %d1,&0x7fff # did divide overflow? 10817 blt.b fsgldiv_normal_exit 10818 10819fsgldiv_ovfl_tst: 10820 or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex 10821 10822 mov.b FPCR_ENABLE(%a6),%d1 10823 andi.b &0x13,%d1 # is OVFL or INEX enabled? 10824 bne.b fsgldiv_ovfl_ena # yes 10825 10826fsgldiv_ovfl_dis: 10827 btst &neg_bit,FPSR_CC(%a6) # is result negative 10828 sne %d1 # set sign param accordingly 10829 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 10830 andi.b &0x30,%d0 # kill precision 10831 bsr.l ovf_res # calculate default result 10832 or.b %d0,FPSR_CC(%a6) # set INF if applicable 10833 fmovm.x (%a0),&0x80 # return default result in fp0 10834 rts 10835 10836fsgldiv_ovfl_ena: 10837 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack 10838 10839 mov.l %d2,-(%sp) # save d2 10840 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10841 mov.l %d1,%d2 # make a copy 10842 andi.l &0x7fff,%d1 # strip sign 10843 andi.w &0x8000,%d2 # keep old sign 10844 sub.l %d0,%d1 # add scale factor 10845 subi.l &0x6000,%d1 # subtract new bias 10846 andi.w &0x7fff,%d1 # clear ms bit 10847 or.w %d2,%d1 # concat old sign,new exp 10848 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10849 mov.l (%sp)+,%d2 # restore d2 10850 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10851 bra.b fsgldiv_ovfl_dis 10852 10853fsgldiv_unfl: 10854 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 10855 10856 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10857 10858 fmov.l &rz_mode*0x10,%fpcr # set FPCR 10859 fmov.l &0x0,%fpsr # clear FPSR 10860 10861 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide 10862 10863 fmov.l %fpsr,%d1 # save status 10864 fmov.l &0x0,%fpcr # clear FPCR 10865 10866 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10867 10868 mov.b FPCR_ENABLE(%a6),%d1 10869 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 10870 bne.b fsgldiv_unfl_ena # yes 10871 10872fsgldiv_unfl_dis: 10873 fmovm.x &0x80,FP_SCR0(%a6) # store out result 10874 10875 lea FP_SCR0(%a6),%a0 # pass: result addr 10876 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 10877 bsr.l unf_res4 # calculate default result 10878 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 10879 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 10880 rts 10881 10882# 10883# UNFL is enabled. 10884# 10885fsgldiv_unfl_ena: 10886 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 10887 10888 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10889 fmov.l &0x0,%fpsr # clear FPSR 10890 10891 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide 10892 10893 fmov.l &0x0,%fpcr # clear FPCR 10894 10895 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 10896 mov.l %d2,-(%sp) # save d2 10897 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 10898 mov.l %d1,%d2 # make a copy 10899 andi.l &0x7fff,%d1 # strip sign 10900 andi.w &0x8000,%d2 # keep old sign 10901 sub.l %d0,%d1 # add scale factor 10902 addi.l &0x6000,%d1 # add bias 10903 andi.w &0x7fff,%d1 # clear top bit 10904 or.w %d2,%d1 # concat old sign, new exp 10905 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 10906 mov.l (%sp)+,%d2 # restore d2 10907 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 10908 bra.b fsgldiv_unfl_dis 10909 10910# 10911# the divide operation MAY underflow: 10912# 10913fsgldiv_may_unfl: 10914 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 10915 10916 fmov.l L_SCR3(%a6),%fpcr # set FPCR 10917 fmov.l &0x0,%fpsr # clear FPSR 10918 10919 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide 10920 10921 fmov.l %fpsr,%d1 # save status 10922 fmov.l &0x0,%fpcr # clear FPCR 10923 10924 or.l %d1,USER_FPSR(%a6) # save INEX2,N 10925 10926 fabs.x %fp0,%fp1 # make a copy of result 10927 fcmp.b %fp1,&0x1 # is |result| > 1.b? 10928 fbgt.w fsgldiv_normal_exit # no; no underflow occurred 10929 fblt.w fsgldiv_unfl # yes; underflow occurred 10930 10931# 10932# we still don't know if underflow occurred. result is ~ equal to 1. but, 10933# we don't know if the result was an underflow that rounded up to a 1 10934# or a normalized number that rounded down to a 1. so, redo the entire 10935# operation using RZ as the rounding mode to see what the pre-rounded 10936# result is. this case should be relatively rare. 10937# 10938 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1 10939 10940 clr.l %d1 # clear scratch register 10941 ori.b &rz_mode*0x10,%d1 # force RZ rnd mode 10942 10943 fmov.l %d1,%fpcr # set FPCR 10944 fmov.l &0x0,%fpsr # clear FPSR 10945 10946 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide 10947 10948 fmov.l &0x0,%fpcr # clear FPCR 10949 fabs.x %fp1 # make absolute value 10950 fcmp.b %fp1,&0x1 # is |result| < 1.b? 10951 fbge.w fsgldiv_normal_exit # no; no underflow occurred 10952 bra.w fsgldiv_unfl # yes; underflow occurred 10953 10954############################################################################ 10955 10956# 10957# Divide: inputs are not both normalized; what are they? 10958# 10959fsgldiv_not_norm: 10960 mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1 10961 jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1) 10962 10963 swbeg &48 10964tbl_fsgldiv_op: 10965 short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM 10966 short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO 10967 short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF 10968 short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN 10969 short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM 10970 short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN 10971 short tbl_fsgldiv_op - tbl_fsgldiv_op # 10972 short tbl_fsgldiv_op - tbl_fsgldiv_op # 10973 10974 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM 10975 short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO 10976 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF 10977 short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN 10978 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM 10979 short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN 10980 short tbl_fsgldiv_op - tbl_fsgldiv_op # 10981 short tbl_fsgldiv_op - tbl_fsgldiv_op # 10982 10983 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM 10984 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO 10985 short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF 10986 short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN 10987 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM 10988 short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN 10989 short tbl_fsgldiv_op - tbl_fsgldiv_op # 10990 short tbl_fsgldiv_op - tbl_fsgldiv_op # 10991 10992 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM 10993 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO 10994 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF 10995 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN 10996 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM 10997 short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN 10998 short tbl_fsgldiv_op - tbl_fsgldiv_op # 10999 short tbl_fsgldiv_op - tbl_fsgldiv_op # 11000 11001 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM 11002 short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO 11003 short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF 11004 short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN 11005 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM 11006 short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN 11007 short tbl_fsgldiv_op - tbl_fsgldiv_op # 11008 short tbl_fsgldiv_op - tbl_fsgldiv_op # 11009 11010 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM 11011 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO 11012 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF 11013 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN 11014 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM 11015 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN 11016 short tbl_fsgldiv_op - tbl_fsgldiv_op # 11017 short tbl_fsgldiv_op - tbl_fsgldiv_op # 11018 11019fsgldiv_res_qnan: 11020 bra.l res_qnan 11021fsgldiv_res_snan: 11022 bra.l res_snan 11023fsgldiv_res_operr: 11024 bra.l res_operr 11025fsgldiv_inf_load: 11026 bra.l fdiv_inf_load 11027fsgldiv_zero_load: 11028 bra.l fdiv_zero_load 11029fsgldiv_inf_dst: 11030 bra.l fdiv_inf_dst 11031 11032######################################################################### 11033# XDEF **************************************************************** # 11034# fadd(): emulates the fadd instruction # 11035# fsadd(): emulates the fadd instruction # 11036# fdadd(): emulates the fdadd instruction # 11037# # 11038# XREF **************************************************************** # 11039# addsub_scaler2() - scale the operands so they won't take exc # 11040# ovf_res() - return default overflow result # 11041# unf_res() - return default underflow result # 11042# res_qnan() - set QNAN result # 11043# res_snan() - set SNAN result # 11044# res_operr() - set OPERR result # 11045# scale_to_zero_src() - set src operand exponent equal to zero # 11046# scale_to_zero_dst() - set dst operand exponent equal to zero # 11047# # 11048# INPUT *************************************************************** # 11049# a0 = pointer to extended precision source operand # 11050# a1 = pointer to extended precision destination operand # 11051# # 11052# OUTPUT ************************************************************** # 11053# fp0 = result # 11054# fp1 = EXOP (if exception occurred) # 11055# # 11056# ALGORITHM *********************************************************** # 11057# Handle NANs, infinities, and zeroes as special cases. Divide # 11058# norms into extended, single, and double precision. # 11059# Do addition after scaling exponents such that exception won't # 11060# occur. Then, check result exponent to see if exception would have # 11061# occurred. If so, return default result and maybe EXOP. Else, insert # 11062# the correct result exponent and return. Set FPSR bits as appropriate. # 11063# # 11064######################################################################### 11065 11066 global fsadd 11067fsadd: 11068 andi.b &0x30,%d0 # clear rnd prec 11069 ori.b &s_mode*0x10,%d0 # insert sgl prec 11070 bra.b fadd 11071 11072 global fdadd 11073fdadd: 11074 andi.b &0x30,%d0 # clear rnd prec 11075 ori.b &d_mode*0x10,%d0 # insert dbl prec 11076 11077 global fadd 11078fadd: 11079 mov.l %d0,L_SCR3(%a6) # store rnd info 11080 11081 clr.w %d1 11082 mov.b DTAG(%a6),%d1 11083 lsl.b &0x3,%d1 11084 or.b STAG(%a6),%d1 # combine src tags 11085 11086 bne.w fadd_not_norm # optimize on non-norm input 11087 11088# 11089# ADD: norms and denorms 11090# 11091fadd_norm: 11092 bsr.l addsub_scaler2 # scale exponents 11093 11094fadd_zero_entry: 11095 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11096 11097 fmov.l &0x0,%fpsr # clear FPSR 11098 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11099 11100 fadd.x FP_SCR0(%a6),%fp0 # execute add 11101 11102 fmov.l &0x0,%fpcr # clear FPCR 11103 fmov.l %fpsr,%d1 # fetch INEX2,N,Z 11104 11105 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits 11106 11107 fbeq.w fadd_zero_exit # if result is zero, end now 11108 11109 mov.l %d2,-(%sp) # save d2 11110 11111 fmovm.x &0x01,-(%sp) # save result to stack 11112 11113 mov.w 2+L_SCR3(%a6),%d1 11114 lsr.b &0x6,%d1 11115 11116 mov.w (%sp),%d2 # fetch new sign, exp 11117 andi.l &0x7fff,%d2 # strip sign 11118 sub.l %d0,%d2 # add scale factor 11119 11120 cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow? 11121 bge.b fadd_ovfl # yes 11122 11123 cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow? 11124 blt.w fadd_unfl # yes 11125 beq.w fadd_may_unfl # maybe; go find out 11126 11127fadd_normal: 11128 mov.w (%sp),%d1 11129 andi.w &0x8000,%d1 # keep sign 11130 or.w %d2,%d1 # concat sign,new exp 11131 mov.w %d1,(%sp) # insert new exponent 11132 11133 fmovm.x (%sp)+,&0x80 # return result in fp0 11134 11135 mov.l (%sp)+,%d2 # restore d2 11136 rts 11137 11138fadd_zero_exit: 11139# fmov.s &0x00000000,%fp0 # return zero in fp0 11140 rts 11141 11142tbl_fadd_ovfl: 11143 long 0x7fff # ext ovfl 11144 long 0x407f # sgl ovfl 11145 long 0x43ff # dbl ovfl 11146 11147tbl_fadd_unfl: 11148 long 0x0000 # ext unfl 11149 long 0x3f81 # sgl unfl 11150 long 0x3c01 # dbl unfl 11151 11152fadd_ovfl: 11153 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 11154 11155 mov.b FPCR_ENABLE(%a6),%d1 11156 andi.b &0x13,%d1 # is OVFL or INEX enabled? 11157 bne.b fadd_ovfl_ena # yes 11158 11159 add.l &0xc,%sp 11160fadd_ovfl_dis: 11161 btst &neg_bit,FPSR_CC(%a6) # is result negative? 11162 sne %d1 # set sign param accordingly 11163 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 11164 bsr.l ovf_res # calculate default result 11165 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 11166 fmovm.x (%a0),&0x80 # return default result in fp0 11167 mov.l (%sp)+,%d2 # restore d2 11168 rts 11169 11170fadd_ovfl_ena: 11171 mov.b L_SCR3(%a6),%d1 11172 andi.b &0xc0,%d1 # is precision extended? 11173 bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl 11174 11175fadd_ovfl_ena_cont: 11176 mov.w (%sp),%d1 11177 andi.w &0x8000,%d1 # keep sign 11178 subi.l &0x6000,%d2 # add extra bias 11179 andi.w &0x7fff,%d2 11180 or.w %d2,%d1 # concat sign,new exp 11181 mov.w %d1,(%sp) # insert new exponent 11182 11183 fmovm.x (%sp)+,&0x40 # return EXOP in fp1 11184 bra.b fadd_ovfl_dis 11185 11186fadd_ovfl_ena_sd: 11187 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11188 11189 mov.l L_SCR3(%a6),%d1 11190 andi.b &0x30,%d1 # keep rnd mode 11191 fmov.l %d1,%fpcr # set FPCR 11192 11193 fadd.x FP_SCR0(%a6),%fp0 # execute add 11194 11195 fmov.l &0x0,%fpcr # clear FPCR 11196 11197 add.l &0xc,%sp 11198 fmovm.x &0x01,-(%sp) 11199 bra.b fadd_ovfl_ena_cont 11200 11201fadd_unfl: 11202 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 11203 11204 add.l &0xc,%sp 11205 11206 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11207 11208 fmov.l &rz_mode*0x10,%fpcr # set FPCR 11209 fmov.l &0x0,%fpsr # clear FPSR 11210 11211 fadd.x FP_SCR0(%a6),%fp0 # execute add 11212 11213 fmov.l &0x0,%fpcr # clear FPCR 11214 fmov.l %fpsr,%d1 # save status 11215 11216 or.l %d1,USER_FPSR(%a6) # save INEX,N 11217 11218 mov.b FPCR_ENABLE(%a6),%d1 11219 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 11220 bne.b fadd_unfl_ena # yes 11221 11222fadd_unfl_dis: 11223 fmovm.x &0x80,FP_SCR0(%a6) # store out result 11224 11225 lea FP_SCR0(%a6),%a0 # pass: result addr 11226 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 11227 bsr.l unf_res # calculate default result 11228 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set 11229 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 11230 mov.l (%sp)+,%d2 # restore d2 11231 rts 11232 11233fadd_unfl_ena: 11234 fmovm.x FP_SCR1(%a6),&0x40 # load dst op 11235 11236 mov.l L_SCR3(%a6),%d1 11237 andi.b &0xc0,%d1 # is precision extended? 11238 bne.b fadd_unfl_ena_sd # no; sgl or dbl 11239 11240 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11241 11242fadd_unfl_ena_cont: 11243 fmov.l &0x0,%fpsr # clear FPSR 11244 11245 fadd.x FP_SCR0(%a6),%fp1 # execute multiply 11246 11247 fmov.l &0x0,%fpcr # clear FPCR 11248 11249 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack 11250 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 11251 mov.l %d1,%d2 # make a copy 11252 andi.l &0x7fff,%d1 # strip sign 11253 andi.w &0x8000,%d2 # keep old sign 11254 sub.l %d0,%d1 # add scale factor 11255 addi.l &0x6000,%d1 # add new bias 11256 andi.w &0x7fff,%d1 # clear top bit 11257 or.w %d2,%d1 # concat sign,new exp 11258 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 11259 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 11260 bra.w fadd_unfl_dis 11261 11262fadd_unfl_ena_sd: 11263 mov.l L_SCR3(%a6),%d1 11264 andi.b &0x30,%d1 # use only rnd mode 11265 fmov.l %d1,%fpcr # set FPCR 11266 11267 bra.b fadd_unfl_ena_cont 11268 11269# 11270# result is equal to the smallest normalized number in the selected precision 11271# if the precision is extended, this result could not have come from an 11272# underflow that rounded up. 11273# 11274fadd_may_unfl: 11275 mov.l L_SCR3(%a6),%d1 11276 andi.b &0xc0,%d1 11277 beq.w fadd_normal # yes; no underflow occurred 11278 11279 mov.l 0x4(%sp),%d1 # extract hi(man) 11280 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000? 11281 bne.w fadd_normal # no; no underflow occurred 11282 11283 tst.l 0x8(%sp) # is lo(man) = 0x0? 11284 bne.w fadd_normal # no; no underflow occurred 11285 11286 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 11287 beq.w fadd_normal # no; no underflow occurred 11288 11289# 11290# ok, so now the result has a exponent equal to the smallest normalized 11291# exponent for the selected precision. also, the mantissa is equal to 11292# 0x8000000000000000 and this mantissa is the result of rounding non-zero 11293# g,r,s. 11294# now, we must determine whether the pre-rounded result was an underflow 11295# rounded "up" or a normalized number rounded "down". 11296# so, we do this be re-executing the add using RZ as the rounding mode and 11297# seeing if the new result is smaller or equal to the current result. 11298# 11299 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 11300 11301 mov.l L_SCR3(%a6),%d1 11302 andi.b &0xc0,%d1 # keep rnd prec 11303 ori.b &rz_mode*0x10,%d1 # insert rnd mode 11304 fmov.l %d1,%fpcr # set FPCR 11305 fmov.l &0x0,%fpsr # clear FPSR 11306 11307 fadd.x FP_SCR0(%a6),%fp1 # execute add 11308 11309 fmov.l &0x0,%fpcr # clear FPCR 11310 11311 fabs.x %fp0 # compare absolute values 11312 fabs.x %fp1 11313 fcmp.x %fp0,%fp1 # is first result > second? 11314 11315 fbgt.w fadd_unfl # yes; it's an underflow 11316 bra.w fadd_normal # no; it's not an underflow 11317 11318########################################################################## 11319 11320# 11321# Add: inputs are not both normalized; what are they? 11322# 11323fadd_not_norm: 11324 mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1 11325 jmp (tbl_fadd_op.b,%pc,%d1.w*1) 11326 11327 swbeg &48 11328tbl_fadd_op: 11329 short fadd_norm - tbl_fadd_op # NORM + NORM 11330 short fadd_zero_src - tbl_fadd_op # NORM + ZERO 11331 short fadd_inf_src - tbl_fadd_op # NORM + INF 11332 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 11333 short fadd_norm - tbl_fadd_op # NORM + DENORM 11334 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 11335 short tbl_fadd_op - tbl_fadd_op # 11336 short tbl_fadd_op - tbl_fadd_op # 11337 11338 short fadd_zero_dst - tbl_fadd_op # ZERO + NORM 11339 short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO 11340 short fadd_inf_src - tbl_fadd_op # ZERO + INF 11341 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 11342 short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM 11343 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 11344 short tbl_fadd_op - tbl_fadd_op # 11345 short tbl_fadd_op - tbl_fadd_op # 11346 11347 short fadd_inf_dst - tbl_fadd_op # INF + NORM 11348 short fadd_inf_dst - tbl_fadd_op # INF + ZERO 11349 short fadd_inf_2 - tbl_fadd_op # INF + INF 11350 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 11351 short fadd_inf_dst - tbl_fadd_op # INF + DENORM 11352 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 11353 short tbl_fadd_op - tbl_fadd_op # 11354 short tbl_fadd_op - tbl_fadd_op # 11355 11356 short fadd_res_qnan - tbl_fadd_op # QNAN + NORM 11357 short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO 11358 short fadd_res_qnan - tbl_fadd_op # QNAN + INF 11359 short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN 11360 short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM 11361 short fadd_res_snan - tbl_fadd_op # QNAN + SNAN 11362 short tbl_fadd_op - tbl_fadd_op # 11363 short tbl_fadd_op - tbl_fadd_op # 11364 11365 short fadd_norm - tbl_fadd_op # DENORM + NORM 11366 short fadd_zero_src - tbl_fadd_op # DENORM + ZERO 11367 short fadd_inf_src - tbl_fadd_op # DENORM + INF 11368 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN 11369 short fadd_norm - tbl_fadd_op # DENORM + DENORM 11370 short fadd_res_snan - tbl_fadd_op # NORM + SNAN 11371 short tbl_fadd_op - tbl_fadd_op # 11372 short tbl_fadd_op - tbl_fadd_op # 11373 11374 short fadd_res_snan - tbl_fadd_op # SNAN + NORM 11375 short fadd_res_snan - tbl_fadd_op # SNAN + ZERO 11376 short fadd_res_snan - tbl_fadd_op # SNAN + INF 11377 short fadd_res_snan - tbl_fadd_op # SNAN + QNAN 11378 short fadd_res_snan - tbl_fadd_op # SNAN + DENORM 11379 short fadd_res_snan - tbl_fadd_op # SNAN + SNAN 11380 short tbl_fadd_op - tbl_fadd_op # 11381 short tbl_fadd_op - tbl_fadd_op # 11382 11383fadd_res_qnan: 11384 bra.l res_qnan 11385fadd_res_snan: 11386 bra.l res_snan 11387 11388# 11389# both operands are ZEROes 11390# 11391fadd_zero_2: 11392 mov.b SRC_EX(%a0),%d0 # are the signs opposite 11393 mov.b DST_EX(%a1),%d1 11394 eor.b %d0,%d1 11395 bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO) 11396 11397# the signs are the same. so determine whether they are positive or negative 11398# and return the appropriately signed zero. 11399 tst.b %d0 # are ZEROes positive or negative? 11400 bmi.b fadd_zero_rm # negative 11401 fmov.s &0x00000000,%fp0 # return +ZERO 11402 mov.b &z_bmask,FPSR_CC(%a6) # set Z 11403 rts 11404 11405# 11406# the ZEROes have opposite signs: 11407# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP. 11408# - -ZERO is returned in the case of RM. 11409# 11410fadd_zero_2_chk_rm: 11411 mov.b 3+L_SCR3(%a6),%d1 11412 andi.b &0x30,%d1 # extract rnd mode 11413 cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM? 11414 beq.b fadd_zero_rm # yes 11415 fmov.s &0x00000000,%fp0 # return +ZERO 11416 mov.b &z_bmask,FPSR_CC(%a6) # set Z 11417 rts 11418 11419fadd_zero_rm: 11420 fmov.s &0x80000000,%fp0 # return -ZERO 11421 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z 11422 rts 11423 11424# 11425# one operand is a ZERO and the other is a DENORM or NORM. scale 11426# the DENORM or NORM and jump to the regular fadd routine. 11427# 11428fadd_zero_dst: 11429 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 11430 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 11431 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 11432 bsr.l scale_to_zero_src # scale the operand 11433 clr.w FP_SCR1_EX(%a6) 11434 clr.l FP_SCR1_HI(%a6) 11435 clr.l FP_SCR1_LO(%a6) 11436 bra.w fadd_zero_entry # go execute fadd 11437 11438fadd_zero_src: 11439 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 11440 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 11441 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 11442 bsr.l scale_to_zero_dst # scale the operand 11443 clr.w FP_SCR0_EX(%a6) 11444 clr.l FP_SCR0_HI(%a6) 11445 clr.l FP_SCR0_LO(%a6) 11446 bra.w fadd_zero_entry # go execute fadd 11447 11448# 11449# both operands are INFs. an OPERR will result if the INFs have 11450# different signs. else, an INF of the same sign is returned 11451# 11452fadd_inf_2: 11453 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 11454 mov.b DST_EX(%a1),%d1 11455 eor.b %d1,%d0 11456 bmi.l res_operr # weed out (-INF)+(+INF) 11457 11458# ok, so it's not an OPERR. but, we do have to remember to return the 11459# src INF since that's where the 881/882 gets the j-bit from... 11460 11461# 11462# operands are INF and one of {ZERO, INF, DENORM, NORM} 11463# 11464fadd_inf_src: 11465 fmovm.x SRC(%a0),&0x80 # return src INF 11466 tst.b SRC_EX(%a0) # is INF positive? 11467 bpl.b fadd_inf_done # yes; we're done 11468 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 11469 rts 11470 11471# 11472# operands are INF and one of {ZERO, INF, DENORM, NORM} 11473# 11474fadd_inf_dst: 11475 fmovm.x DST(%a1),&0x80 # return dst INF 11476 tst.b DST_EX(%a1) # is INF positive? 11477 bpl.b fadd_inf_done # yes; we're done 11478 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 11479 rts 11480 11481fadd_inf_done: 11482 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 11483 rts 11484 11485######################################################################### 11486# XDEF **************************************************************** # 11487# fsub(): emulates the fsub instruction # 11488# fssub(): emulates the fssub instruction # 11489# fdsub(): emulates the fdsub instruction # 11490# # 11491# XREF **************************************************************** # 11492# addsub_scaler2() - scale the operands so they won't take exc # 11493# ovf_res() - return default overflow result # 11494# unf_res() - return default underflow result # 11495# res_qnan() - set QNAN result # 11496# res_snan() - set SNAN result # 11497# res_operr() - set OPERR result # 11498# scale_to_zero_src() - set src operand exponent equal to zero # 11499# scale_to_zero_dst() - set dst operand exponent equal to zero # 11500# # 11501# INPUT *************************************************************** # 11502# a0 = pointer to extended precision source operand # 11503# a1 = pointer to extended precision destination operand # 11504# # 11505# OUTPUT ************************************************************** # 11506# fp0 = result # 11507# fp1 = EXOP (if exception occurred) # 11508# # 11509# ALGORITHM *********************************************************** # 11510# Handle NANs, infinities, and zeroes as special cases. Divide # 11511# norms into extended, single, and double precision. # 11512# Do subtraction after scaling exponents such that exception won't# 11513# occur. Then, check result exponent to see if exception would have # 11514# occurred. If so, return default result and maybe EXOP. Else, insert # 11515# the correct result exponent and return. Set FPSR bits as appropriate. # 11516# # 11517######################################################################### 11518 11519 global fssub 11520fssub: 11521 andi.b &0x30,%d0 # clear rnd prec 11522 ori.b &s_mode*0x10,%d0 # insert sgl prec 11523 bra.b fsub 11524 11525 global fdsub 11526fdsub: 11527 andi.b &0x30,%d0 # clear rnd prec 11528 ori.b &d_mode*0x10,%d0 # insert dbl prec 11529 11530 global fsub 11531fsub: 11532 mov.l %d0,L_SCR3(%a6) # store rnd info 11533 11534 clr.w %d1 11535 mov.b DTAG(%a6),%d1 11536 lsl.b &0x3,%d1 11537 or.b STAG(%a6),%d1 # combine src tags 11538 11539 bne.w fsub_not_norm # optimize on non-norm input 11540 11541# 11542# SUB: norms and denorms 11543# 11544fsub_norm: 11545 bsr.l addsub_scaler2 # scale exponents 11546 11547fsub_zero_entry: 11548 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11549 11550 fmov.l &0x0,%fpsr # clear FPSR 11551 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11552 11553 fsub.x FP_SCR0(%a6),%fp0 # execute subtract 11554 11555 fmov.l &0x0,%fpcr # clear FPCR 11556 fmov.l %fpsr,%d1 # fetch INEX2, N, Z 11557 11558 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits 11559 11560 fbeq.w fsub_zero_exit # if result zero, end now 11561 11562 mov.l %d2,-(%sp) # save d2 11563 11564 fmovm.x &0x01,-(%sp) # save result to stack 11565 11566 mov.w 2+L_SCR3(%a6),%d1 11567 lsr.b &0x6,%d1 11568 11569 mov.w (%sp),%d2 # fetch new exponent 11570 andi.l &0x7fff,%d2 # strip sign 11571 sub.l %d0,%d2 # add scale factor 11572 11573 cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow? 11574 bge.b fsub_ovfl # yes 11575 11576 cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow? 11577 blt.w fsub_unfl # yes 11578 beq.w fsub_may_unfl # maybe; go find out 11579 11580fsub_normal: 11581 mov.w (%sp),%d1 11582 andi.w &0x8000,%d1 # keep sign 11583 or.w %d2,%d1 # insert new exponent 11584 mov.w %d1,(%sp) # insert new exponent 11585 11586 fmovm.x (%sp)+,&0x80 # return result in fp0 11587 11588 mov.l (%sp)+,%d2 # restore d2 11589 rts 11590 11591fsub_zero_exit: 11592# fmov.s &0x00000000,%fp0 # return zero in fp0 11593 rts 11594 11595tbl_fsub_ovfl: 11596 long 0x7fff # ext ovfl 11597 long 0x407f # sgl ovfl 11598 long 0x43ff # dbl ovfl 11599 11600tbl_fsub_unfl: 11601 long 0x0000 # ext unfl 11602 long 0x3f81 # sgl unfl 11603 long 0x3c01 # dbl unfl 11604 11605fsub_ovfl: 11606 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 11607 11608 mov.b FPCR_ENABLE(%a6),%d1 11609 andi.b &0x13,%d1 # is OVFL or INEX enabled? 11610 bne.b fsub_ovfl_ena # yes 11611 11612 add.l &0xc,%sp 11613fsub_ovfl_dis: 11614 btst &neg_bit,FPSR_CC(%a6) # is result negative? 11615 sne %d1 # set sign param accordingly 11616 mov.l L_SCR3(%a6),%d0 # pass prec:rnd 11617 bsr.l ovf_res # calculate default result 11618 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 11619 fmovm.x (%a0),&0x80 # return default result in fp0 11620 mov.l (%sp)+,%d2 # restore d2 11621 rts 11622 11623fsub_ovfl_ena: 11624 mov.b L_SCR3(%a6),%d1 11625 andi.b &0xc0,%d1 # is precision extended? 11626 bne.b fsub_ovfl_ena_sd # no 11627 11628fsub_ovfl_ena_cont: 11629 mov.w (%sp),%d1 # fetch {sgn,exp} 11630 andi.w &0x8000,%d1 # keep sign 11631 subi.l &0x6000,%d2 # subtract new bias 11632 andi.w &0x7fff,%d2 # clear top bit 11633 or.w %d2,%d1 # concat sign,exp 11634 mov.w %d1,(%sp) # insert new exponent 11635 11636 fmovm.x (%sp)+,&0x40 # return EXOP in fp1 11637 bra.b fsub_ovfl_dis 11638 11639fsub_ovfl_ena_sd: 11640 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11641 11642 mov.l L_SCR3(%a6),%d1 11643 andi.b &0x30,%d1 # clear rnd prec 11644 fmov.l %d1,%fpcr # set FPCR 11645 11646 fsub.x FP_SCR0(%a6),%fp0 # execute subtract 11647 11648 fmov.l &0x0,%fpcr # clear FPCR 11649 11650 add.l &0xc,%sp 11651 fmovm.x &0x01,-(%sp) 11652 bra.b fsub_ovfl_ena_cont 11653 11654fsub_unfl: 11655 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 11656 11657 add.l &0xc,%sp 11658 11659 fmovm.x FP_SCR1(%a6),&0x80 # load dst op 11660 11661 fmov.l &rz_mode*0x10,%fpcr # set FPCR 11662 fmov.l &0x0,%fpsr # clear FPSR 11663 11664 fsub.x FP_SCR0(%a6),%fp0 # execute subtract 11665 11666 fmov.l &0x0,%fpcr # clear FPCR 11667 fmov.l %fpsr,%d1 # save status 11668 11669 or.l %d1,USER_FPSR(%a6) 11670 11671 mov.b FPCR_ENABLE(%a6),%d1 11672 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 11673 bne.b fsub_unfl_ena # yes 11674 11675fsub_unfl_dis: 11676 fmovm.x &0x80,FP_SCR0(%a6) # store out result 11677 11678 lea FP_SCR0(%a6),%a0 # pass: result addr 11679 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 11680 bsr.l unf_res # calculate default result 11681 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set 11682 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 11683 mov.l (%sp)+,%d2 # restore d2 11684 rts 11685 11686fsub_unfl_ena: 11687 fmovm.x FP_SCR1(%a6),&0x40 11688 11689 mov.l L_SCR3(%a6),%d1 11690 andi.b &0xc0,%d1 # is precision extended? 11691 bne.b fsub_unfl_ena_sd # no 11692 11693 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11694 11695fsub_unfl_ena_cont: 11696 fmov.l &0x0,%fpsr # clear FPSR 11697 11698 fsub.x FP_SCR0(%a6),%fp1 # execute subtract 11699 11700 fmov.l &0x0,%fpcr # clear FPCR 11701 11702 fmovm.x &0x40,FP_SCR0(%a6) # store result to stack 11703 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 11704 mov.l %d1,%d2 # make a copy 11705 andi.l &0x7fff,%d1 # strip sign 11706 andi.w &0x8000,%d2 # keep old sign 11707 sub.l %d0,%d1 # add scale factor 11708 addi.l &0x6000,%d1 # subtract new bias 11709 andi.w &0x7fff,%d1 # clear top bit 11710 or.w %d2,%d1 # concat sgn,exp 11711 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 11712 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 11713 bra.w fsub_unfl_dis 11714 11715fsub_unfl_ena_sd: 11716 mov.l L_SCR3(%a6),%d1 11717 andi.b &0x30,%d1 # clear rnd prec 11718 fmov.l %d1,%fpcr # set FPCR 11719 11720 bra.b fsub_unfl_ena_cont 11721 11722# 11723# result is equal to the smallest normalized number in the selected precision 11724# if the precision is extended, this result could not have come from an 11725# underflow that rounded up. 11726# 11727fsub_may_unfl: 11728 mov.l L_SCR3(%a6),%d1 11729 andi.b &0xc0,%d1 # fetch rnd prec 11730 beq.w fsub_normal # yes; no underflow occurred 11731 11732 mov.l 0x4(%sp),%d1 11733 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000? 11734 bne.w fsub_normal # no; no underflow occurred 11735 11736 tst.l 0x8(%sp) # is lo(man) = 0x0? 11737 bne.w fsub_normal # no; no underflow occurred 11738 11739 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set? 11740 beq.w fsub_normal # no; no underflow occurred 11741 11742# 11743# ok, so now the result has a exponent equal to the smallest normalized 11744# exponent for the selected precision. also, the mantissa is equal to 11745# 0x8000000000000000 and this mantissa is the result of rounding non-zero 11746# g,r,s. 11747# now, we must determine whether the pre-rounded result was an underflow 11748# rounded "up" or a normalized number rounded "down". 11749# so, we do this be re-executing the add using RZ as the rounding mode and 11750# seeing if the new result is smaller or equal to the current result. 11751# 11752 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1 11753 11754 mov.l L_SCR3(%a6),%d1 11755 andi.b &0xc0,%d1 # keep rnd prec 11756 ori.b &rz_mode*0x10,%d1 # insert rnd mode 11757 fmov.l %d1,%fpcr # set FPCR 11758 fmov.l &0x0,%fpsr # clear FPSR 11759 11760 fsub.x FP_SCR0(%a6),%fp1 # execute subtract 11761 11762 fmov.l &0x0,%fpcr # clear FPCR 11763 11764 fabs.x %fp0 # compare absolute values 11765 fabs.x %fp1 11766 fcmp.x %fp0,%fp1 # is first result > second? 11767 11768 fbgt.w fsub_unfl # yes; it's an underflow 11769 bra.w fsub_normal # no; it's not an underflow 11770 11771########################################################################## 11772 11773# 11774# Sub: inputs are not both normalized; what are they? 11775# 11776fsub_not_norm: 11777 mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1 11778 jmp (tbl_fsub_op.b,%pc,%d1.w*1) 11779 11780 swbeg &48 11781tbl_fsub_op: 11782 short fsub_norm - tbl_fsub_op # NORM - NORM 11783 short fsub_zero_src - tbl_fsub_op # NORM - ZERO 11784 short fsub_inf_src - tbl_fsub_op # NORM - INF 11785 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 11786 short fsub_norm - tbl_fsub_op # NORM - DENORM 11787 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 11788 short tbl_fsub_op - tbl_fsub_op # 11789 short tbl_fsub_op - tbl_fsub_op # 11790 11791 short fsub_zero_dst - tbl_fsub_op # ZERO - NORM 11792 short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO 11793 short fsub_inf_src - tbl_fsub_op # ZERO - INF 11794 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 11795 short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM 11796 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 11797 short tbl_fsub_op - tbl_fsub_op # 11798 short tbl_fsub_op - tbl_fsub_op # 11799 11800 short fsub_inf_dst - tbl_fsub_op # INF - NORM 11801 short fsub_inf_dst - tbl_fsub_op # INF - ZERO 11802 short fsub_inf_2 - tbl_fsub_op # INF - INF 11803 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 11804 short fsub_inf_dst - tbl_fsub_op # INF - DENORM 11805 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 11806 short tbl_fsub_op - tbl_fsub_op # 11807 short tbl_fsub_op - tbl_fsub_op # 11808 11809 short fsub_res_qnan - tbl_fsub_op # QNAN - NORM 11810 short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO 11811 short fsub_res_qnan - tbl_fsub_op # QNAN - INF 11812 short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN 11813 short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM 11814 short fsub_res_snan - tbl_fsub_op # QNAN - SNAN 11815 short tbl_fsub_op - tbl_fsub_op # 11816 short tbl_fsub_op - tbl_fsub_op # 11817 11818 short fsub_norm - tbl_fsub_op # DENORM - NORM 11819 short fsub_zero_src - tbl_fsub_op # DENORM - ZERO 11820 short fsub_inf_src - tbl_fsub_op # DENORM - INF 11821 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN 11822 short fsub_norm - tbl_fsub_op # DENORM - DENORM 11823 short fsub_res_snan - tbl_fsub_op # NORM - SNAN 11824 short tbl_fsub_op - tbl_fsub_op # 11825 short tbl_fsub_op - tbl_fsub_op # 11826 11827 short fsub_res_snan - tbl_fsub_op # SNAN - NORM 11828 short fsub_res_snan - tbl_fsub_op # SNAN - ZERO 11829 short fsub_res_snan - tbl_fsub_op # SNAN - INF 11830 short fsub_res_snan - tbl_fsub_op # SNAN - QNAN 11831 short fsub_res_snan - tbl_fsub_op # SNAN - DENORM 11832 short fsub_res_snan - tbl_fsub_op # SNAN - SNAN 11833 short tbl_fsub_op - tbl_fsub_op # 11834 short tbl_fsub_op - tbl_fsub_op # 11835 11836fsub_res_qnan: 11837 bra.l res_qnan 11838fsub_res_snan: 11839 bra.l res_snan 11840 11841# 11842# both operands are ZEROes 11843# 11844fsub_zero_2: 11845 mov.b SRC_EX(%a0),%d0 11846 mov.b DST_EX(%a1),%d1 11847 eor.b %d1,%d0 11848 bpl.b fsub_zero_2_chk_rm 11849 11850# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO 11851 tst.b %d0 # is dst negative? 11852 bmi.b fsub_zero_2_rm # yes 11853 fmov.s &0x00000000,%fp0 # no; return +ZERO 11854 mov.b &z_bmask,FPSR_CC(%a6) # set Z 11855 rts 11856 11857# 11858# the ZEROes have the same signs: 11859# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP 11860# - -ZERO is returned in the case of RM. 11861# 11862fsub_zero_2_chk_rm: 11863 mov.b 3+L_SCR3(%a6),%d1 11864 andi.b &0x30,%d1 # extract rnd mode 11865 cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM? 11866 beq.b fsub_zero_2_rm # yes 11867 fmov.s &0x00000000,%fp0 # no; return +ZERO 11868 mov.b &z_bmask,FPSR_CC(%a6) # set Z 11869 rts 11870 11871fsub_zero_2_rm: 11872 fmov.s &0x80000000,%fp0 # return -ZERO 11873 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG 11874 rts 11875 11876# 11877# one operand is a ZERO and the other is a DENORM or a NORM. 11878# scale the DENORM or NORM and jump to the regular fsub routine. 11879# 11880fsub_zero_dst: 11881 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 11882 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 11883 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 11884 bsr.l scale_to_zero_src # scale the operand 11885 clr.w FP_SCR1_EX(%a6) 11886 clr.l FP_SCR1_HI(%a6) 11887 clr.l FP_SCR1_LO(%a6) 11888 bra.w fsub_zero_entry # go execute fsub 11889 11890fsub_zero_src: 11891 mov.w DST_EX(%a1),FP_SCR1_EX(%a6) 11892 mov.l DST_HI(%a1),FP_SCR1_HI(%a6) 11893 mov.l DST_LO(%a1),FP_SCR1_LO(%a6) 11894 bsr.l scale_to_zero_dst # scale the operand 11895 clr.w FP_SCR0_EX(%a6) 11896 clr.l FP_SCR0_HI(%a6) 11897 clr.l FP_SCR0_LO(%a6) 11898 bra.w fsub_zero_entry # go execute fsub 11899 11900# 11901# both operands are INFs. an OPERR will result if the INFs have the 11902# same signs. else, 11903# 11904fsub_inf_2: 11905 mov.b SRC_EX(%a0),%d0 # exclusive or the signs 11906 mov.b DST_EX(%a1),%d1 11907 eor.b %d1,%d0 11908 bpl.l res_operr # weed out (-INF)+(+INF) 11909 11910# ok, so it's not an OPERR. but we do have to remember to return 11911# the src INF since that's where the 881/882 gets the j-bit. 11912 11913fsub_inf_src: 11914 fmovm.x SRC(%a0),&0x80 # return src INF 11915 fneg.x %fp0 # invert sign 11916 fbge.w fsub_inf_done # sign is now positive 11917 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 11918 rts 11919 11920fsub_inf_dst: 11921 fmovm.x DST(%a1),&0x80 # return dst INF 11922 tst.b DST_EX(%a1) # is INF negative? 11923 bpl.b fsub_inf_done # no 11924 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG 11925 rts 11926 11927fsub_inf_done: 11928 mov.b &inf_bmask,FPSR_CC(%a6) # set INF 11929 rts 11930 11931######################################################################### 11932# XDEF **************************************************************** # 11933# fsqrt(): emulates the fsqrt instruction # 11934# fssqrt(): emulates the fssqrt instruction # 11935# fdsqrt(): emulates the fdsqrt instruction # 11936# # 11937# XREF **************************************************************** # 11938# scale_sqrt() - scale the source operand # 11939# unf_res() - return default underflow result # 11940# ovf_res() - return default overflow result # 11941# res_qnan_1op() - return QNAN result # 11942# res_snan_1op() - return SNAN result # 11943# # 11944# INPUT *************************************************************** # 11945# a0 = pointer to extended precision source operand # 11946# d0 rnd prec,mode # 11947# # 11948# OUTPUT ************************************************************** # 11949# fp0 = result # 11950# fp1 = EXOP (if exception occurred) # 11951# # 11952# ALGORITHM *********************************************************** # 11953# Handle NANs, infinities, and zeroes as special cases. Divide # 11954# norms/denorms into ext/sgl/dbl precision. # 11955# For norms/denorms, scale the exponents such that a sqrt # 11956# instruction won't cause an exception. Use the regular fsqrt to # 11957# compute a result. Check if the regular operands would have taken # 11958# an exception. If so, return the default overflow/underflow result # 11959# and return the EXOP if exceptions are enabled. Else, scale the # 11960# result operand to the proper exponent. # 11961# # 11962######################################################################### 11963 11964 global fssqrt 11965fssqrt: 11966 andi.b &0x30,%d0 # clear rnd prec 11967 ori.b &s_mode*0x10,%d0 # insert sgl precision 11968 bra.b fsqrt 11969 11970 global fdsqrt 11971fdsqrt: 11972 andi.b &0x30,%d0 # clear rnd prec 11973 ori.b &d_mode*0x10,%d0 # insert dbl precision 11974 11975 global fsqrt 11976fsqrt: 11977 mov.l %d0,L_SCR3(%a6) # store rnd info 11978 clr.w %d1 11979 mov.b STAG(%a6),%d1 11980 bne.w fsqrt_not_norm # optimize on non-norm input 11981 11982# 11983# SQUARE ROOT: norms and denorms ONLY! 11984# 11985fsqrt_norm: 11986 tst.b SRC_EX(%a0) # is operand negative? 11987 bmi.l res_operr # yes 11988 11989 andi.b &0xc0,%d0 # is precision extended? 11990 bne.b fsqrt_not_ext # no; go handle sgl or dbl 11991 11992 fmov.l L_SCR3(%a6),%fpcr # set FPCR 11993 fmov.l &0x0,%fpsr # clear FPSR 11994 11995 fsqrt.x (%a0),%fp0 # execute square root 11996 11997 fmov.l %fpsr,%d1 11998 or.l %d1,USER_FPSR(%a6) # set N,INEX 11999 12000 rts 12001 12002fsqrt_denorm: 12003 tst.b SRC_EX(%a0) # is operand negative? 12004 bmi.l res_operr # yes 12005 12006 andi.b &0xc0,%d0 # is precision extended? 12007 bne.b fsqrt_not_ext # no; go handle sgl or dbl 12008 12009 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12010 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12011 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12012 12013 bsr.l scale_sqrt # calculate scale factor 12014 12015 bra.w fsqrt_sd_normal 12016 12017# 12018# operand is either single or double 12019# 12020fsqrt_not_ext: 12021 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec 12022 bne.w fsqrt_dbl 12023 12024# 12025# operand is to be rounded to single precision 12026# 12027fsqrt_sgl: 12028 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12029 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12030 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12031 12032 bsr.l scale_sqrt # calculate scale factor 12033 12034 cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow? 12035 beq.w fsqrt_sd_may_unfl 12036 bgt.w fsqrt_sd_unfl # yes; go handle underflow 12037 cmpi.l %d0,&0x3fff-0x407f # will move in overflow? 12038 beq.w fsqrt_sd_may_ovfl # maybe; go check 12039 blt.w fsqrt_sd_ovfl # yes; go handle overflow 12040 12041# 12042# operand will NOT overflow or underflow when moved in to the fp reg file 12043# 12044fsqrt_sd_normal: 12045 fmov.l &0x0,%fpsr # clear FPSR 12046 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12047 12048 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute 12049 12050 fmov.l %fpsr,%d1 # save FPSR 12051 fmov.l &0x0,%fpcr # clear FPCR 12052 12053 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12054 12055fsqrt_sd_normal_exit: 12056 mov.l %d2,-(%sp) # save d2 12057 fmovm.x &0x80,FP_SCR0(%a6) # store out result 12058 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp 12059 mov.l %d1,%d2 # make a copy 12060 andi.l &0x7fff,%d1 # strip sign 12061 sub.l %d0,%d1 # add scale factor 12062 andi.w &0x8000,%d2 # keep old sign 12063 or.w %d1,%d2 # concat old sign,new exp 12064 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent 12065 mov.l (%sp)+,%d2 # restore d2 12066 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0 12067 rts 12068 12069# 12070# operand is to be rounded to double precision 12071# 12072fsqrt_dbl: 12073 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) 12074 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) 12075 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) 12076 12077 bsr.l scale_sqrt # calculate scale factor 12078 12079 cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow? 12080 beq.w fsqrt_sd_may_unfl 12081 bgt.b fsqrt_sd_unfl # yes; go handle underflow 12082 cmpi.l %d0,&0x3fff-0x43ff # will move in overflow? 12083 beq.w fsqrt_sd_may_ovfl # maybe; go check 12084 blt.w fsqrt_sd_ovfl # yes; go handle overflow 12085 bra.w fsqrt_sd_normal # no; ho handle normalized op 12086 12087# we're on the line here and the distinguising characteristic is whether 12088# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number 12089# elsewise fall through to underflow. 12090fsqrt_sd_may_unfl: 12091 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff? 12092 bne.w fsqrt_sd_normal # yes, so no underflow 12093 12094# 12095# operand WILL underflow when moved in to the fp register file 12096# 12097fsqrt_sd_unfl: 12098 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit 12099 12100 fmov.l &rz_mode*0x10,%fpcr # set FPCR 12101 fmov.l &0x0,%fpsr # clear FPSR 12102 12103 fsqrt.x FP_SCR0(%a6),%fp0 # execute square root 12104 12105 fmov.l %fpsr,%d1 # save status 12106 fmov.l &0x0,%fpcr # clear FPCR 12107 12108 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12109 12110# if underflow or inexact is enabled, go calculate EXOP first. 12111 mov.b FPCR_ENABLE(%a6),%d1 12112 andi.b &0x0b,%d1 # is UNFL or INEX enabled? 12113 bne.b fsqrt_sd_unfl_ena # yes 12114 12115fsqrt_sd_unfl_dis: 12116 fmovm.x &0x80,FP_SCR0(%a6) # store out result 12117 12118 lea FP_SCR0(%a6),%a0 # pass: result addr 12119 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode 12120 bsr.l unf_res # calculate default result 12121 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode 12122 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0 12123 rts 12124 12125# 12126# operand will underflow AND underflow is enabled. 12127# Therefore, we must return the result rounded to extended precision. 12128# 12129fsqrt_sd_unfl_ena: 12130 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6) 12131 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6) 12132 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent 12133 12134 mov.l %d2,-(%sp) # save d2 12135 mov.l %d1,%d2 # make a copy 12136 andi.l &0x7fff,%d1 # strip sign 12137 andi.w &0x8000,%d2 # keep old sign 12138 sub.l %d0,%d1 # subtract scale factor 12139 addi.l &0x6000,%d1 # add new bias 12140 andi.w &0x7fff,%d1 12141 or.w %d2,%d1 # concat new sign,new exp 12142 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp 12143 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1 12144 mov.l (%sp)+,%d2 # restore d2 12145 bra.b fsqrt_sd_unfl_dis 12146 12147# 12148# operand WILL overflow. 12149# 12150fsqrt_sd_ovfl: 12151 fmov.l &0x0,%fpsr # clear FPSR 12152 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12153 12154 fsqrt.x FP_SCR0(%a6),%fp0 # perform square root 12155 12156 fmov.l &0x0,%fpcr # clear FPCR 12157 fmov.l %fpsr,%d1 # save FPSR 12158 12159 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12160 12161fsqrt_sd_ovfl_tst: 12162 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex 12163 12164 mov.b FPCR_ENABLE(%a6),%d1 12165 andi.b &0x13,%d1 # is OVFL or INEX enabled? 12166 bne.b fsqrt_sd_ovfl_ena # yes 12167 12168# 12169# OVFL is not enabled; therefore, we must create the default result by 12170# calling ovf_res(). 12171# 12172fsqrt_sd_ovfl_dis: 12173 btst &neg_bit,FPSR_CC(%a6) # is result negative? 12174 sne %d1 # set sign param accordingly 12175 mov.l L_SCR3(%a6),%d0 # pass: prec,mode 12176 bsr.l ovf_res # calculate default result 12177 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable 12178 fmovm.x (%a0),&0x80 # return default result in fp0 12179 rts 12180 12181# 12182# OVFL is enabled. 12183# the INEX2 bit has already been updated by the round to the correct precision. 12184# now, round to extended(and don't alter the FPSR). 12185# 12186fsqrt_sd_ovfl_ena: 12187 mov.l %d2,-(%sp) # save d2 12188 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp} 12189 mov.l %d1,%d2 # make a copy 12190 andi.l &0x7fff,%d1 # strip sign 12191 andi.w &0x8000,%d2 # keep old sign 12192 sub.l %d0,%d1 # add scale factor 12193 subi.l &0x6000,%d1 # subtract bias 12194 andi.w &0x7fff,%d1 12195 or.w %d2,%d1 # concat sign,exp 12196 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent 12197 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1 12198 mov.l (%sp)+,%d2 # restore d2 12199 bra.b fsqrt_sd_ovfl_dis 12200 12201# 12202# the move in MAY underflow. so... 12203# 12204fsqrt_sd_may_ovfl: 12205 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff? 12206 bne.w fsqrt_sd_ovfl # yes, so overflow 12207 12208 fmov.l &0x0,%fpsr # clear FPSR 12209 fmov.l L_SCR3(%a6),%fpcr # set FPCR 12210 12211 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute 12212 12213 fmov.l %fpsr,%d1 # save status 12214 fmov.l &0x0,%fpcr # clear FPCR 12215 12216 or.l %d1,USER_FPSR(%a6) # save INEX2,N 12217 12218 fmov.x %fp0,%fp1 # make a copy of result 12219 fcmp.b %fp1,&0x1 # is |result| >= 1.b? 12220 fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred 12221 12222# no, it didn't overflow; we have correct result 12223 bra.w fsqrt_sd_normal_exit 12224 12225########################################################################## 12226 12227# 12228# input is not normalized; what is it? 12229# 12230fsqrt_not_norm: 12231 cmpi.b %d1,&DENORM # weed out DENORM 12232 beq.w fsqrt_denorm 12233 cmpi.b %d1,&ZERO # weed out ZERO 12234 beq.b fsqrt_zero 12235 cmpi.b %d1,&INF # weed out INF 12236 beq.b fsqrt_inf 12237 cmpi.b %d1,&SNAN # weed out SNAN 12238 beq.l res_snan_1op 12239 bra.l res_qnan_1op 12240 12241# 12242# fsqrt(+0) = +0 12243# fsqrt(-0) = -0 12244# fsqrt(+INF) = +INF 12245# fsqrt(-INF) = OPERR 12246# 12247fsqrt_zero: 12248 tst.b SRC_EX(%a0) # is ZERO positive or negative? 12249 bmi.b fsqrt_zero_m # negative 12250fsqrt_zero_p: 12251 fmov.s &0x00000000,%fp0 # return +ZERO 12252 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit 12253 rts 12254fsqrt_zero_m: 12255 fmov.s &0x80000000,%fp0 # return -ZERO 12256 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits 12257 rts 12258 12259fsqrt_inf: 12260 tst.b SRC_EX(%a0) # is INF positive or negative? 12261 bmi.l res_operr # negative 12262fsqrt_inf_p: 12263 fmovm.x SRC(%a0),&0x80 # return +INF in fp0 12264 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit 12265 rts 12266 12267######################################################################### 12268# XDEF **************************************************************** # 12269# fetch_dreg(): fetch register according to index in d1 # 12270# # 12271# XREF **************************************************************** # 12272# None # 12273# # 12274# INPUT *************************************************************** # 12275# d1 = index of register to fetch from # 12276# # 12277# OUTPUT ************************************************************** # 12278# d0 = value of register fetched # 12279# # 12280# ALGORITHM *********************************************************** # 12281# According to the index value in d1 which can range from zero # 12282# to fifteen, load the corresponding register file value (where # 12283# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the # 12284# stack. The rest should still be in their original places. # 12285# # 12286######################################################################### 12287 12288# this routine leaves d1 intact for subsequent store_dreg calls. 12289 global fetch_dreg 12290fetch_dreg: 12291 mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0 12292 jmp (tbl_fdreg.b,%pc,%d0.w*1) 12293 12294tbl_fdreg: 12295 short fdreg0 - tbl_fdreg 12296 short fdreg1 - tbl_fdreg 12297 short fdreg2 - tbl_fdreg 12298 short fdreg3 - tbl_fdreg 12299 short fdreg4 - tbl_fdreg 12300 short fdreg5 - tbl_fdreg 12301 short fdreg6 - tbl_fdreg 12302 short fdreg7 - tbl_fdreg 12303 short fdreg8 - tbl_fdreg 12304 short fdreg9 - tbl_fdreg 12305 short fdrega - tbl_fdreg 12306 short fdregb - tbl_fdreg 12307 short fdregc - tbl_fdreg 12308 short fdregd - tbl_fdreg 12309 short fdrege - tbl_fdreg 12310 short fdregf - tbl_fdreg 12311 12312fdreg0: 12313 mov.l EXC_DREGS+0x0(%a6),%d0 12314 rts 12315fdreg1: 12316 mov.l EXC_DREGS+0x4(%a6),%d0 12317 rts 12318fdreg2: 12319 mov.l %d2,%d0 12320 rts 12321fdreg3: 12322 mov.l %d3,%d0 12323 rts 12324fdreg4: 12325 mov.l %d4,%d0 12326 rts 12327fdreg5: 12328 mov.l %d5,%d0 12329 rts 12330fdreg6: 12331 mov.l %d6,%d0 12332 rts 12333fdreg7: 12334 mov.l %d7,%d0 12335 rts 12336fdreg8: 12337 mov.l EXC_DREGS+0x8(%a6),%d0 12338 rts 12339fdreg9: 12340 mov.l EXC_DREGS+0xc(%a6),%d0 12341 rts 12342fdrega: 12343 mov.l %a2,%d0 12344 rts 12345fdregb: 12346 mov.l %a3,%d0 12347 rts 12348fdregc: 12349 mov.l %a4,%d0 12350 rts 12351fdregd: 12352 mov.l %a5,%d0 12353 rts 12354fdrege: 12355 mov.l (%a6),%d0 12356 rts 12357fdregf: 12358 mov.l EXC_A7(%a6),%d0 12359 rts 12360 12361######################################################################### 12362# XDEF **************************************************************** # 12363# store_dreg_l(): store longword to data register specified by d1 # 12364# # 12365# XREF **************************************************************** # 12366# None # 12367# # 12368# INPUT *************************************************************** # 12369# d0 = longowrd value to store # 12370# d1 = index of register to fetch from # 12371# # 12372# OUTPUT ************************************************************** # 12373# (data register is updated) # 12374# # 12375# ALGORITHM *********************************************************** # 12376# According to the index value in d1, store the longword value # 12377# in d0 to the corresponding data register. D0/D1 are on the stack # 12378# while the rest are in their initial places. # 12379# # 12380######################################################################### 12381 12382 global store_dreg_l 12383store_dreg_l: 12384 mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1 12385 jmp (tbl_sdregl.b,%pc,%d1.w*1) 12386 12387tbl_sdregl: 12388 short sdregl0 - tbl_sdregl 12389 short sdregl1 - tbl_sdregl 12390 short sdregl2 - tbl_sdregl 12391 short sdregl3 - tbl_sdregl 12392 short sdregl4 - tbl_sdregl 12393 short sdregl5 - tbl_sdregl 12394 short sdregl6 - tbl_sdregl 12395 short sdregl7 - tbl_sdregl 12396 12397sdregl0: 12398 mov.l %d0,EXC_DREGS+0x0(%a6) 12399 rts 12400sdregl1: 12401 mov.l %d0,EXC_DREGS+0x4(%a6) 12402 rts 12403sdregl2: 12404 mov.l %d0,%d2 12405 rts 12406sdregl3: 12407 mov.l %d0,%d3 12408 rts 12409sdregl4: 12410 mov.l %d0,%d4 12411 rts 12412sdregl5: 12413 mov.l %d0,%d5 12414 rts 12415sdregl6: 12416 mov.l %d0,%d6 12417 rts 12418sdregl7: 12419 mov.l %d0,%d7 12420 rts 12421 12422######################################################################### 12423# XDEF **************************************************************** # 12424# store_dreg_w(): store word to data register specified by d1 # 12425# # 12426# XREF **************************************************************** # 12427# None # 12428# # 12429# INPUT *************************************************************** # 12430# d0 = word value to store # 12431# d1 = index of register to fetch from # 12432# # 12433# OUTPUT ************************************************************** # 12434# (data register is updated) # 12435# # 12436# ALGORITHM *********************************************************** # 12437# According to the index value in d1, store the word value # 12438# in d0 to the corresponding data register. D0/D1 are on the stack # 12439# while the rest are in their initial places. # 12440# # 12441######################################################################### 12442 12443 global store_dreg_w 12444store_dreg_w: 12445 mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1 12446 jmp (tbl_sdregw.b,%pc,%d1.w*1) 12447 12448tbl_sdregw: 12449 short sdregw0 - tbl_sdregw 12450 short sdregw1 - tbl_sdregw 12451 short sdregw2 - tbl_sdregw 12452 short sdregw3 - tbl_sdregw 12453 short sdregw4 - tbl_sdregw 12454 short sdregw5 - tbl_sdregw 12455 short sdregw6 - tbl_sdregw 12456 short sdregw7 - tbl_sdregw 12457 12458sdregw0: 12459 mov.w %d0,2+EXC_DREGS+0x0(%a6) 12460 rts 12461sdregw1: 12462 mov.w %d0,2+EXC_DREGS+0x4(%a6) 12463 rts 12464sdregw2: 12465 mov.w %d0,%d2 12466 rts 12467sdregw3: 12468 mov.w %d0,%d3 12469 rts 12470sdregw4: 12471 mov.w %d0,%d4 12472 rts 12473sdregw5: 12474 mov.w %d0,%d5 12475 rts 12476sdregw6: 12477 mov.w %d0,%d6 12478 rts 12479sdregw7: 12480 mov.w %d0,%d7 12481 rts 12482 12483######################################################################### 12484# XDEF **************************************************************** # 12485# store_dreg_b(): store byte to data register specified by d1 # 12486# # 12487# XREF **************************************************************** # 12488# None # 12489# # 12490# INPUT *************************************************************** # 12491# d0 = byte value to store # 12492# d1 = index of register to fetch from # 12493# # 12494# OUTPUT ************************************************************** # 12495# (data register is updated) # 12496# # 12497# ALGORITHM *********************************************************** # 12498# According to the index value in d1, store the byte value # 12499# in d0 to the corresponding data register. D0/D1 are on the stack # 12500# while the rest are in their initial places. # 12501# # 12502######################################################################### 12503 12504 global store_dreg_b 12505store_dreg_b: 12506 mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1 12507 jmp (tbl_sdregb.b,%pc,%d1.w*1) 12508 12509tbl_sdregb: 12510 short sdregb0 - tbl_sdregb 12511 short sdregb1 - tbl_sdregb 12512 short sdregb2 - tbl_sdregb 12513 short sdregb3 - tbl_sdregb 12514 short sdregb4 - tbl_sdregb 12515 short sdregb5 - tbl_sdregb 12516 short sdregb6 - tbl_sdregb 12517 short sdregb7 - tbl_sdregb 12518 12519sdregb0: 12520 mov.b %d0,3+EXC_DREGS+0x0(%a6) 12521 rts 12522sdregb1: 12523 mov.b %d0,3+EXC_DREGS+0x4(%a6) 12524 rts 12525sdregb2: 12526 mov.b %d0,%d2 12527 rts 12528sdregb3: 12529 mov.b %d0,%d3 12530 rts 12531sdregb4: 12532 mov.b %d0,%d4 12533 rts 12534sdregb5: 12535 mov.b %d0,%d5 12536 rts 12537sdregb6: 12538 mov.b %d0,%d6 12539 rts 12540sdregb7: 12541 mov.b %d0,%d7 12542 rts 12543 12544######################################################################### 12545# XDEF **************************************************************** # 12546# inc_areg(): increment an address register by the value in d0 # 12547# # 12548# XREF **************************************************************** # 12549# None # 12550# # 12551# INPUT *************************************************************** # 12552# d0 = amount to increment by # 12553# d1 = index of address register to increment # 12554# # 12555# OUTPUT ************************************************************** # 12556# (address register is updated) # 12557# # 12558# ALGORITHM *********************************************************** # 12559# Typically used for an instruction w/ a post-increment <ea>, # 12560# this routine adds the increment value in d0 to the address register # 12561# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside # 12562# in their original places. # 12563# For a7, if the increment amount is one, then we have to # 12564# increment by two. For any a7 update, set the mia7_flag so that if # 12565# an access error exception occurs later in emulation, this address # 12566# register update can be undone. # 12567# # 12568######################################################################### 12569 12570 global inc_areg 12571inc_areg: 12572 mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1 12573 jmp (tbl_iareg.b,%pc,%d1.w*1) 12574 12575tbl_iareg: 12576 short iareg0 - tbl_iareg 12577 short iareg1 - tbl_iareg 12578 short iareg2 - tbl_iareg 12579 short iareg3 - tbl_iareg 12580 short iareg4 - tbl_iareg 12581 short iareg5 - tbl_iareg 12582 short iareg6 - tbl_iareg 12583 short iareg7 - tbl_iareg 12584 12585iareg0: add.l %d0,EXC_DREGS+0x8(%a6) 12586 rts 12587iareg1: add.l %d0,EXC_DREGS+0xc(%a6) 12588 rts 12589iareg2: add.l %d0,%a2 12590 rts 12591iareg3: add.l %d0,%a3 12592 rts 12593iareg4: add.l %d0,%a4 12594 rts 12595iareg5: add.l %d0,%a5 12596 rts 12597iareg6: add.l %d0,(%a6) 12598 rts 12599iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6) 12600 cmpi.b %d0,&0x1 12601 beq.b iareg7b 12602 add.l %d0,EXC_A7(%a6) 12603 rts 12604iareg7b: 12605 addq.l &0x2,EXC_A7(%a6) 12606 rts 12607 12608######################################################################### 12609# XDEF **************************************************************** # 12610# dec_areg(): decrement an address register by the value in d0 # 12611# # 12612# XREF **************************************************************** # 12613# None # 12614# # 12615# INPUT *************************************************************** # 12616# d0 = amount to decrement by # 12617# d1 = index of address register to decrement # 12618# # 12619# OUTPUT ************************************************************** # 12620# (address register is updated) # 12621# # 12622# ALGORITHM *********************************************************** # 12623# Typically used for an instruction w/ a pre-decrement <ea>, # 12624# this routine adds the decrement value in d0 to the address register # 12625# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside # 12626# in their original places. # 12627# For a7, if the decrement amount is one, then we have to # 12628# decrement by two. For any a7 update, set the mda7_flag so that if # 12629# an access error exception occurs later in emulation, this address # 12630# register update can be undone. # 12631# # 12632######################################################################### 12633 12634 global dec_areg 12635dec_areg: 12636 mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1 12637 jmp (tbl_dareg.b,%pc,%d1.w*1) 12638 12639tbl_dareg: 12640 short dareg0 - tbl_dareg 12641 short dareg1 - tbl_dareg 12642 short dareg2 - tbl_dareg 12643 short dareg3 - tbl_dareg 12644 short dareg4 - tbl_dareg 12645 short dareg5 - tbl_dareg 12646 short dareg6 - tbl_dareg 12647 short dareg7 - tbl_dareg 12648 12649dareg0: sub.l %d0,EXC_DREGS+0x8(%a6) 12650 rts 12651dareg1: sub.l %d0,EXC_DREGS+0xc(%a6) 12652 rts 12653dareg2: sub.l %d0,%a2 12654 rts 12655dareg3: sub.l %d0,%a3 12656 rts 12657dareg4: sub.l %d0,%a4 12658 rts 12659dareg5: sub.l %d0,%a5 12660 rts 12661dareg6: sub.l %d0,(%a6) 12662 rts 12663dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6) 12664 cmpi.b %d0,&0x1 12665 beq.b dareg7b 12666 sub.l %d0,EXC_A7(%a6) 12667 rts 12668dareg7b: 12669 subq.l &0x2,EXC_A7(%a6) 12670 rts 12671 12672############################################################################## 12673 12674######################################################################### 12675# XDEF **************************************************************** # 12676# load_fpn1(): load FP register value into FP_SRC(a6). # 12677# # 12678# XREF **************************************************************** # 12679# None # 12680# # 12681# INPUT *************************************************************** # 12682# d0 = index of FP register to load # 12683# # 12684# OUTPUT ************************************************************** # 12685# FP_SRC(a6) = value loaded from FP register file # 12686# # 12687# ALGORITHM *********************************************************** # 12688# Using the index in d0, load FP_SRC(a6) with a number from the # 12689# FP register file. # 12690# # 12691######################################################################### 12692 12693 global load_fpn1 12694load_fpn1: 12695 mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0 12696 jmp (tbl_load_fpn1.b,%pc,%d0.w*1) 12697 12698tbl_load_fpn1: 12699 short load_fpn1_0 - tbl_load_fpn1 12700 short load_fpn1_1 - tbl_load_fpn1 12701 short load_fpn1_2 - tbl_load_fpn1 12702 short load_fpn1_3 - tbl_load_fpn1 12703 short load_fpn1_4 - tbl_load_fpn1 12704 short load_fpn1_5 - tbl_load_fpn1 12705 short load_fpn1_6 - tbl_load_fpn1 12706 short load_fpn1_7 - tbl_load_fpn1 12707 12708load_fpn1_0: 12709 mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6) 12710 mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6) 12711 mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6) 12712 lea FP_SRC(%a6), %a0 12713 rts 12714load_fpn1_1: 12715 mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6) 12716 mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6) 12717 mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6) 12718 lea FP_SRC(%a6), %a0 12719 rts 12720load_fpn1_2: 12721 fmovm.x &0x20, FP_SRC(%a6) 12722 lea FP_SRC(%a6), %a0 12723 rts 12724load_fpn1_3: 12725 fmovm.x &0x10, FP_SRC(%a6) 12726 lea FP_SRC(%a6), %a0 12727 rts 12728load_fpn1_4: 12729 fmovm.x &0x08, FP_SRC(%a6) 12730 lea FP_SRC(%a6), %a0 12731 rts 12732load_fpn1_5: 12733 fmovm.x &0x04, FP_SRC(%a6) 12734 lea FP_SRC(%a6), %a0 12735 rts 12736load_fpn1_6: 12737 fmovm.x &0x02, FP_SRC(%a6) 12738 lea FP_SRC(%a6), %a0 12739 rts 12740load_fpn1_7: 12741 fmovm.x &0x01, FP_SRC(%a6) 12742 lea FP_SRC(%a6), %a0 12743 rts 12744 12745############################################################################# 12746 12747######################################################################### 12748# XDEF **************************************************************** # 12749# load_fpn2(): load FP register value into FP_DST(a6). # 12750# # 12751# XREF **************************************************************** # 12752# None # 12753# # 12754# INPUT *************************************************************** # 12755# d0 = index of FP register to load # 12756# # 12757# OUTPUT ************************************************************** # 12758# FP_DST(a6) = value loaded from FP register file # 12759# # 12760# ALGORITHM *********************************************************** # 12761# Using the index in d0, load FP_DST(a6) with a number from the # 12762# FP register file. # 12763# # 12764######################################################################### 12765 12766 global load_fpn2 12767load_fpn2: 12768 mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0 12769 jmp (tbl_load_fpn2.b,%pc,%d0.w*1) 12770 12771tbl_load_fpn2: 12772 short load_fpn2_0 - tbl_load_fpn2 12773 short load_fpn2_1 - tbl_load_fpn2 12774 short load_fpn2_2 - tbl_load_fpn2 12775 short load_fpn2_3 - tbl_load_fpn2 12776 short load_fpn2_4 - tbl_load_fpn2 12777 short load_fpn2_5 - tbl_load_fpn2 12778 short load_fpn2_6 - tbl_load_fpn2 12779 short load_fpn2_7 - tbl_load_fpn2 12780 12781load_fpn2_0: 12782 mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6) 12783 mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6) 12784 mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6) 12785 lea FP_DST(%a6), %a0 12786 rts 12787load_fpn2_1: 12788 mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6) 12789 mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6) 12790 mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6) 12791 lea FP_DST(%a6), %a0 12792 rts 12793load_fpn2_2: 12794 fmovm.x &0x20, FP_DST(%a6) 12795 lea FP_DST(%a6), %a0 12796 rts 12797load_fpn2_3: 12798 fmovm.x &0x10, FP_DST(%a6) 12799 lea FP_DST(%a6), %a0 12800 rts 12801load_fpn2_4: 12802 fmovm.x &0x08, FP_DST(%a6) 12803 lea FP_DST(%a6), %a0 12804 rts 12805load_fpn2_5: 12806 fmovm.x &0x04, FP_DST(%a6) 12807 lea FP_DST(%a6), %a0 12808 rts 12809load_fpn2_6: 12810 fmovm.x &0x02, FP_DST(%a6) 12811 lea FP_DST(%a6), %a0 12812 rts 12813load_fpn2_7: 12814 fmovm.x &0x01, FP_DST(%a6) 12815 lea FP_DST(%a6), %a0 12816 rts 12817 12818############################################################################# 12819 12820######################################################################### 12821# XDEF **************************************************************** # 12822# store_fpreg(): store an fp value to the fpreg designated d0. # 12823# # 12824# XREF **************************************************************** # 12825# None # 12826# # 12827# INPUT *************************************************************** # 12828# fp0 = extended precision value to store # 12829# d0 = index of floating-point register # 12830# # 12831# OUTPUT ************************************************************** # 12832# None # 12833# # 12834# ALGORITHM *********************************************************** # 12835# Store the value in fp0 to the FP register designated by the # 12836# value in d0. The FP number can be DENORM or SNAN so we have to be # 12837# careful that we don't take an exception here. # 12838# # 12839######################################################################### 12840 12841 global store_fpreg 12842store_fpreg: 12843 mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0 12844 jmp (tbl_store_fpreg.b,%pc,%d0.w*1) 12845 12846tbl_store_fpreg: 12847 short store_fpreg_0 - tbl_store_fpreg 12848 short store_fpreg_1 - tbl_store_fpreg 12849 short store_fpreg_2 - tbl_store_fpreg 12850 short store_fpreg_3 - tbl_store_fpreg 12851 short store_fpreg_4 - tbl_store_fpreg 12852 short store_fpreg_5 - tbl_store_fpreg 12853 short store_fpreg_6 - tbl_store_fpreg 12854 short store_fpreg_7 - tbl_store_fpreg 12855 12856store_fpreg_0: 12857 fmovm.x &0x80, EXC_FP0(%a6) 12858 rts 12859store_fpreg_1: 12860 fmovm.x &0x80, EXC_FP1(%a6) 12861 rts 12862store_fpreg_2: 12863 fmovm.x &0x01, -(%sp) 12864 fmovm.x (%sp)+, &0x20 12865 rts 12866store_fpreg_3: 12867 fmovm.x &0x01, -(%sp) 12868 fmovm.x (%sp)+, &0x10 12869 rts 12870store_fpreg_4: 12871 fmovm.x &0x01, -(%sp) 12872 fmovm.x (%sp)+, &0x08 12873 rts 12874store_fpreg_5: 12875 fmovm.x &0x01, -(%sp) 12876 fmovm.x (%sp)+, &0x04 12877 rts 12878store_fpreg_6: 12879 fmovm.x &0x01, -(%sp) 12880 fmovm.x (%sp)+, &0x02 12881 rts 12882store_fpreg_7: 12883 fmovm.x &0x01, -(%sp) 12884 fmovm.x (%sp)+, &0x01 12885 rts 12886 12887######################################################################### 12888# XDEF **************************************************************** # 12889# get_packed(): fetch a packed operand from memory and then # 12890# convert it to a floating-point binary number. # 12891# # 12892# XREF **************************************************************** # 12893# _dcalc_ea() - calculate the correct <ea> # 12894# _mem_read() - fetch the packed operand from memory # 12895# facc_in_x() - the fetch failed so jump to special exit code # 12896# decbin() - convert packed to binary extended precision # 12897# # 12898# INPUT *************************************************************** # 12899# None # 12900# # 12901# OUTPUT ************************************************************** # 12902# If no failure on _mem_read(): # 12903# FP_SRC(a6) = packed operand now as a binary FP number # 12904# # 12905# ALGORITHM *********************************************************** # 12906# Get the correct <ea> which is the value on the exception stack # 12907# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. # 12908# Then, fetch the operand from memory. If the fetch fails, exit # 12909# through facc_in_x(). # 12910# If the packed operand is a ZERO,NAN, or INF, convert it to # 12911# its binary representation here. Else, call decbin() which will # 12912# convert the packed value to an extended precision binary value. # 12913# # 12914######################################################################### 12915 12916# the stacked <ea> for packed is correct except for -(An). 12917# the base reg must be updated for both -(An) and (An)+. 12918 global get_packed 12919get_packed: 12920 mov.l &0xc,%d0 # packed is 12 bytes 12921 bsr.l _dcalc_ea # fetch <ea>; correct An 12922 12923 lea FP_SRC(%a6),%a1 # pass: ptr to super dst 12924 mov.l &0xc,%d0 # pass: 12 bytes 12925 bsr.l _dmem_read # read packed operand 12926 12927 tst.l %d1 # did dfetch fail? 12928 bne.l facc_in_x # yes 12929 12930# The packed operand is an INF or a NAN if the exponent field is all ones. 12931 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp 12932 cmpi.w %d0,&0x7fff # INF or NAN? 12933 bne.b gp_try_zero # no 12934 rts # operand is an INF or NAN 12935 12936# The packed operand is a zero if the mantissa is all zero, else it's 12937# a normal packed op. 12938gp_try_zero: 12939 mov.b 3+FP_SRC(%a6),%d0 # get byte 4 12940 andi.b &0x0f,%d0 # clear all but last nybble 12941 bne.b gp_not_spec # not a zero 12942 tst.l FP_SRC_HI(%a6) # is lw 2 zero? 12943 bne.b gp_not_spec # not a zero 12944 tst.l FP_SRC_LO(%a6) # is lw 3 zero? 12945 bne.b gp_not_spec # not a zero 12946 rts # operand is a ZERO 12947gp_not_spec: 12948 lea FP_SRC(%a6),%a0 # pass: ptr to packed op 12949 bsr.l decbin # convert to extended 12950 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop 12951 rts 12952 12953######################################################################### 12954# decbin(): Converts normalized packed bcd value pointed to by register # 12955# a0 to extended-precision value in fp0. # 12956# # 12957# INPUT *************************************************************** # 12958# a0 = pointer to normalized packed bcd value # 12959# # 12960# OUTPUT ************************************************************** # 12961# fp0 = exact fp representation of the packed bcd value. # 12962# # 12963# ALGORITHM *********************************************************** # 12964# Expected is a normal bcd (i.e. non-exceptional; all inf, zero, # 12965# and NaN operands are dispatched without entering this routine) # 12966# value in 68881/882 format at location (a0). # 12967# # 12968# A1. Convert the bcd exponent to binary by successive adds and # 12969# muls. Set the sign according to SE. Subtract 16 to compensate # 12970# for the mantissa which is to be interpreted as 17 integer # 12971# digits, rather than 1 integer and 16 fraction digits. # 12972# Note: this operation can never overflow. # 12973# # 12974# A2. Convert the bcd mantissa to binary by successive # 12975# adds and muls in FP0. Set the sign according to SM. # 12976# The mantissa digits will be converted with the decimal point # 12977# assumed following the least-significant digit. # 12978# Note: this operation can never overflow. # 12979# # 12980# A3. Count the number of leading/trailing zeros in the # 12981# bcd string. If SE is positive, count the leading zeros; # 12982# if negative, count the trailing zeros. Set the adjusted # 12983# exponent equal to the exponent from A1 and the zero count # 12984# added if SM = 1 and subtracted if SM = 0. Scale the # 12985# mantissa the equivalent of forcing in the bcd value: # 12986# # 12987# SM = 0 a non-zero digit in the integer position # 12988# SM = 1 a non-zero digit in Mant0, lsd of the fraction # 12989# # 12990# this will insure that any value, regardless of its # 12991# representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted # 12992# consistently. # 12993# # 12994# A4. Calculate the factor 10^exp in FP1 using a table of # 12995# 10^(2^n) values. To reduce the error in forming factors # 12996# greater than 10^27, a directed rounding scheme is used with # 12997# tables rounded to RN, RM, and RP, according to the table # 12998# in the comments of the pwrten section. # 12999# # 13000# A5. Form the final binary number by scaling the mantissa by # 13001# the exponent factor. This is done by multiplying the # 13002# mantissa in FP0 by the factor in FP1 if the adjusted # 13003# exponent sign is positive, and dividing FP0 by FP1 if # 13004# it is negative. # 13005# # 13006# Clean up and return. Check if the final mul or div was inexact. # 13007# If so, set INEX1 in USER_FPSR. # 13008# # 13009######################################################################### 13010 13011# 13012# PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded 13013# to nearest, minus, and plus, respectively. The tables include 13014# 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding 13015# is required until the power is greater than 27, however, all 13016# tables include the first 5 for ease of indexing. 13017# 13018RTABLE: 13019 byte 0,0,0,0 13020 byte 2,3,2,3 13021 byte 2,3,3,2 13022 byte 3,2,2,3 13023 13024 set FNIBS,7 13025 set FSTRT,0 13026 13027 set ESTRT,4 13028 set EDIGITS,2 13029 13030 global decbin 13031decbin: 13032 mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input 13033 mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it 13034 mov.l 0x8(%a0),FP_SCR0_LO(%a6) 13035 13036 lea FP_SCR0(%a6),%a0 13037 13038 movm.l &0x3c00,-(%sp) # save d2-d5 13039 fmovm.x &0x1,-(%sp) # save fp1 13040# 13041# Calculate exponent: 13042# 1. Copy bcd value in memory for use as a working copy. 13043# 2. Calculate absolute value of exponent in d1 by mul and add. 13044# 3. Correct for exponent sign. 13045# 4. Subtract 16 to compensate for interpreting the mant as all integer digits. 13046# (i.e., all digits assumed left of the decimal point.) 13047# 13048# Register usage: 13049# 13050# calc_e: 13051# (*) d0: temp digit storage 13052# (*) d1: accumulator for binary exponent 13053# (*) d2: digit count 13054# (*) d3: offset pointer 13055# ( ) d4: first word of bcd 13056# ( ) a0: pointer to working bcd value 13057# ( ) a6: pointer to original bcd value 13058# (*) FP_SCR1: working copy of original bcd value 13059# (*) L_SCR1: copy of original exponent word 13060# 13061calc_e: 13062 mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part 13063 mov.l &ESTRT,%d3 # counter to pick up digits 13064 mov.l (%a0),%d4 # get first word of bcd 13065 clr.l %d1 # zero d1 for accumulator 13066e_gd: 13067 mulu.l &0xa,%d1 # mul partial product by one digit place 13068 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0 13069 add.l %d0,%d1 # d1 = d1 + d0 13070 addq.b &4,%d3 # advance d3 to the next digit 13071 dbf.w %d2,e_gd # if we have used all 3 digits, exit loop 13072 btst &30,%d4 # get SE 13073 beq.b e_pos # don't negate if pos 13074 neg.l %d1 # negate before subtracting 13075e_pos: 13076 sub.l &16,%d1 # sub to compensate for shift of mant 13077 bge.b e_save # if still pos, do not neg 13078 neg.l %d1 # now negative, make pos and set SE 13079 or.l &0x40000000,%d4 # set SE in d4, 13080 or.l &0x40000000,(%a0) # and in working bcd 13081e_save: 13082 mov.l %d1,-(%sp) # save exp on stack 13083# 13084# 13085# Calculate mantissa: 13086# 1. Calculate absolute value of mantissa in fp0 by mul and add. 13087# 2. Correct for mantissa sign. 13088# (i.e., all digits assumed left of the decimal point.) 13089# 13090# Register usage: 13091# 13092# calc_m: 13093# (*) d0: temp digit storage 13094# (*) d1: lword counter 13095# (*) d2: digit count 13096# (*) d3: offset pointer 13097# ( ) d4: words 2 and 3 of bcd 13098# ( ) a0: pointer to working bcd value 13099# ( ) a6: pointer to original bcd value 13100# (*) fp0: mantissa accumulator 13101# ( ) FP_SCR1: working copy of original bcd value 13102# ( ) L_SCR1: copy of original exponent word 13103# 13104calc_m: 13105 mov.l &1,%d1 # word counter, init to 1 13106 fmov.s &0x00000000,%fp0 # accumulator 13107# 13108# 13109# Since the packed number has a long word between the first & second parts, 13110# get the integer digit then skip down & get the rest of the 13111# mantissa. We will unroll the loop once. 13112# 13113 bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word 13114 fadd.b %d0,%fp0 # add digit to sum in fp0 13115# 13116# 13117# Get the rest of the mantissa. 13118# 13119loadlw: 13120 mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4 13121 mov.l &FSTRT,%d3 # counter to pick up digits 13122 mov.l &FNIBS,%d2 # reset number of digits per a0 ptr 13123md2b: 13124 fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10 13125 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend 13126 fadd.b %d0,%fp0 # fp0 = fp0 + digit 13127# 13128# 13129# If all the digits (8) in that long word have been converted (d2=0), 13130# then inc d1 (=2) to point to the next long word and reset d3 to 0 13131# to initialize the digit offset, and set d2 to 7 for the digit count; 13132# else continue with this long word. 13133# 13134 addq.b &4,%d3 # advance d3 to the next digit 13135 dbf.w %d2,md2b # check for last digit in this lw 13136nextlw: 13137 addq.l &1,%d1 # inc lw pointer in mantissa 13138 cmp.l %d1,&2 # test for last lw 13139 ble.b loadlw # if not, get last one 13140# 13141# Check the sign of the mant and make the value in fp0 the same sign. 13142# 13143m_sign: 13144 btst &31,(%a0) # test sign of the mantissa 13145 beq.b ap_st_z # if clear, go to append/strip zeros 13146 fneg.x %fp0 # if set, negate fp0 13147# 13148# Append/strip zeros: 13149# 13150# For adjusted exponents which have an absolute value greater than 27*, 13151# this routine calculates the amount needed to normalize the mantissa 13152# for the adjusted exponent. That number is subtracted from the exp 13153# if the exp was positive, and added if it was negative. The purpose 13154# of this is to reduce the value of the exponent and the possibility 13155# of error in calculation of pwrten. 13156# 13157# 1. Branch on the sign of the adjusted exponent. 13158# 2p.(positive exp) 13159# 2. Check M16 and the digits in lwords 2 and 3 in descending order. 13160# 3. Add one for each zero encountered until a non-zero digit. 13161# 4. Subtract the count from the exp. 13162# 5. Check if the exp has crossed zero in #3 above; make the exp abs 13163# and set SE. 13164# 6. Multiply the mantissa by 10**count. 13165# 2n.(negative exp) 13166# 2. Check the digits in lwords 3 and 2 in descending order. 13167# 3. Add one for each zero encountered until a non-zero digit. 13168# 4. Add the count to the exp. 13169# 5. Check if the exp has crossed zero in #3 above; clear SE. 13170# 6. Divide the mantissa by 10**count. 13171# 13172# *Why 27? If the adjusted exponent is within -28 < expA < 28, than 13173# any adjustment due to append/strip zeros will drive the resultane 13174# exponent towards zero. Since all pwrten constants with a power 13175# of 27 or less are exact, there is no need to use this routine to 13176# attempt to lessen the resultant exponent. 13177# 13178# Register usage: 13179# 13180# ap_st_z: 13181# (*) d0: temp digit storage 13182# (*) d1: zero count 13183# (*) d2: digit count 13184# (*) d3: offset pointer 13185# ( ) d4: first word of bcd 13186# (*) d5: lword counter 13187# ( ) a0: pointer to working bcd value 13188# ( ) FP_SCR1: working copy of original bcd value 13189# ( ) L_SCR1: copy of original exponent word 13190# 13191# 13192# First check the absolute value of the exponent to see if this 13193# routine is necessary. If so, then check the sign of the exponent 13194# and do append (+) or strip (-) zeros accordingly. 13195# This section handles a positive adjusted exponent. 13196# 13197ap_st_z: 13198 mov.l (%sp),%d1 # load expA for range test 13199 cmp.l %d1,&27 # test is with 27 13200 ble.w pwrten # if abs(expA) <28, skip ap/st zeros 13201 btst &30,(%a0) # check sign of exp 13202 bne.b ap_st_n # if neg, go to neg side 13203 clr.l %d1 # zero count reg 13204 mov.l (%a0),%d4 # load lword 1 to d4 13205 bfextu %d4{&28:&4},%d0 # get M16 in d0 13206 bne.b ap_p_fx # if M16 is non-zero, go fix exp 13207 addq.l &1,%d1 # inc zero count 13208 mov.l &1,%d5 # init lword counter 13209 mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4 13210 bne.b ap_p_cl # if lw 2 is zero, skip it 13211 addq.l &8,%d1 # and inc count by 8 13212 addq.l &1,%d5 # inc lword counter 13213 mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4 13214ap_p_cl: 13215 clr.l %d3 # init offset reg 13216 mov.l &7,%d2 # init digit counter 13217ap_p_gd: 13218 bfextu %d4{%d3:&4},%d0 # get digit 13219 bne.b ap_p_fx # if non-zero, go to fix exp 13220 addq.l &4,%d3 # point to next digit 13221 addq.l &1,%d1 # inc digit counter 13222 dbf.w %d2,ap_p_gd # get next digit 13223ap_p_fx: 13224 mov.l %d1,%d0 # copy counter to d2 13225 mov.l (%sp),%d1 # get adjusted exp from memory 13226 sub.l %d0,%d1 # subtract count from exp 13227 bge.b ap_p_fm # if still pos, go to pwrten 13228 neg.l %d1 # now its neg; get abs 13229 mov.l (%a0),%d4 # load lword 1 to d4 13230 or.l &0x40000000,%d4 # and set SE in d4 13231 or.l &0x40000000,(%a0) # and in memory 13232# 13233# Calculate the mantissa multiplier to compensate for the striping of 13234# zeros from the mantissa. 13235# 13236ap_p_fm: 13237 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table 13238 clr.l %d3 # init table index 13239 fmov.s &0x3f800000,%fp1 # init fp1 to 1 13240 mov.l &3,%d2 # init d2 to count bits in counter 13241ap_p_el: 13242 asr.l &1,%d0 # shift lsb into carry 13243 bcc.b ap_p_en # if 1, mul fp1 by pwrten factor 13244 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 13245ap_p_en: 13246 add.l &12,%d3 # inc d3 to next rtable entry 13247 tst.l %d0 # check if d0 is zero 13248 bne.b ap_p_el # if not, get next bit 13249 fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted) 13250 bra.b pwrten # go calc pwrten 13251# 13252# This section handles a negative adjusted exponent. 13253# 13254ap_st_n: 13255 clr.l %d1 # clr counter 13256 mov.l &2,%d5 # set up d5 to point to lword 3 13257 mov.l (%a0,%d5.L*4),%d4 # get lword 3 13258 bne.b ap_n_cl # if not zero, check digits 13259 sub.l &1,%d5 # dec d5 to point to lword 2 13260 addq.l &8,%d1 # inc counter by 8 13261 mov.l (%a0,%d5.L*4),%d4 # get lword 2 13262ap_n_cl: 13263 mov.l &28,%d3 # point to last digit 13264 mov.l &7,%d2 # init digit counter 13265ap_n_gd: 13266 bfextu %d4{%d3:&4},%d0 # get digit 13267 bne.b ap_n_fx # if non-zero, go to exp fix 13268 subq.l &4,%d3 # point to previous digit 13269 addq.l &1,%d1 # inc digit counter 13270 dbf.w %d2,ap_n_gd # get next digit 13271ap_n_fx: 13272 mov.l %d1,%d0 # copy counter to d0 13273 mov.l (%sp),%d1 # get adjusted exp from memory 13274 sub.l %d0,%d1 # subtract count from exp 13275 bgt.b ap_n_fm # if still pos, go fix mantissa 13276 neg.l %d1 # take abs of exp and clr SE 13277 mov.l (%a0),%d4 # load lword 1 to d4 13278 and.l &0xbfffffff,%d4 # and clr SE in d4 13279 and.l &0xbfffffff,(%a0) # and in memory 13280# 13281# Calculate the mantissa multiplier to compensate for the appending of 13282# zeros to the mantissa. 13283# 13284ap_n_fm: 13285 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table 13286 clr.l %d3 # init table index 13287 fmov.s &0x3f800000,%fp1 # init fp1 to 1 13288 mov.l &3,%d2 # init d2 to count bits in counter 13289ap_n_el: 13290 asr.l &1,%d0 # shift lsb into carry 13291 bcc.b ap_n_en # if 1, mul fp1 by pwrten factor 13292 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 13293ap_n_en: 13294 add.l &12,%d3 # inc d3 to next rtable entry 13295 tst.l %d0 # check if d0 is zero 13296 bne.b ap_n_el # if not, get next bit 13297 fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted) 13298# 13299# 13300# Calculate power-of-ten factor from adjusted and shifted exponent. 13301# 13302# Register usage: 13303# 13304# pwrten: 13305# (*) d0: temp 13306# ( ) d1: exponent 13307# (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp 13308# (*) d3: FPCR work copy 13309# ( ) d4: first word of bcd 13310# (*) a1: RTABLE pointer 13311# calc_p: 13312# (*) d0: temp 13313# ( ) d1: exponent 13314# (*) d3: PWRTxx table index 13315# ( ) a0: pointer to working copy of bcd 13316# (*) a1: PWRTxx pointer 13317# (*) fp1: power-of-ten accumulator 13318# 13319# Pwrten calculates the exponent factor in the selected rounding mode 13320# according to the following table: 13321# 13322# Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode 13323# 13324# ANY ANY RN RN 13325# 13326# + + RP RP 13327# - + RP RM 13328# + - RP RM 13329# - - RP RP 13330# 13331# + + RM RM 13332# - + RM RP 13333# + - RM RP 13334# - - RM RM 13335# 13336# + + RZ RM 13337# - + RZ RM 13338# + - RZ RP 13339# - - RZ RP 13340# 13341# 13342pwrten: 13343 mov.l USER_FPCR(%a6),%d3 # get user's FPCR 13344 bfextu %d3{&26:&2},%d2 # isolate rounding mode bits 13345 mov.l (%a0),%d4 # reload 1st bcd word to d4 13346 asl.l &2,%d2 # format d2 to be 13347 bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE} 13348 add.l %d0,%d2 # in d2 as index into RTABLE 13349 lea.l RTABLE(%pc),%a1 # load rtable base 13350 mov.b (%a1,%d2),%d0 # load new rounding bits from table 13351 clr.l %d3 # clear d3 to force no exc and extended 13352 bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR 13353 fmov.l %d3,%fpcr # write new FPCR 13354 asr.l &1,%d0 # write correct PTENxx table 13355 bcc.b not_rp # to a1 13356 lea.l PTENRP(%pc),%a1 # it is RP 13357 bra.b calc_p # go to init section 13358not_rp: 13359 asr.l &1,%d0 # keep checking 13360 bcc.b not_rm 13361 lea.l PTENRM(%pc),%a1 # it is RM 13362 bra.b calc_p # go to init section 13363not_rm: 13364 lea.l PTENRN(%pc),%a1 # it is RN 13365calc_p: 13366 mov.l %d1,%d0 # copy exp to d0;use d0 13367 bpl.b no_neg # if exp is negative, 13368 neg.l %d0 # invert it 13369 or.l &0x40000000,(%a0) # and set SE bit 13370no_neg: 13371 clr.l %d3 # table index 13372 fmov.s &0x3f800000,%fp1 # init fp1 to 1 13373e_loop: 13374 asr.l &1,%d0 # shift next bit into carry 13375 bcc.b e_next # if zero, skip the mul 13376 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 13377e_next: 13378 add.l &12,%d3 # inc d3 to next rtable entry 13379 tst.l %d0 # check if d0 is zero 13380 bne.b e_loop # not zero, continue shifting 13381# 13382# 13383# Check the sign of the adjusted exp and make the value in fp0 the 13384# same sign. If the exp was pos then multiply fp1*fp0; 13385# else divide fp0/fp1. 13386# 13387# Register Usage: 13388# norm: 13389# ( ) a0: pointer to working bcd value 13390# (*) fp0: mantissa accumulator 13391# ( ) fp1: scaling factor - 10**(abs(exp)) 13392# 13393pnorm: 13394 btst &30,(%a0) # test the sign of the exponent 13395 beq.b mul # if clear, go to multiply 13396div: 13397 fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp 13398 bra.b end_dec 13399mul: 13400 fmul.x %fp1,%fp0 # exp is positive, so multiply by exp 13401# 13402# 13403# Clean up and return with result in fp0. 13404# 13405# If the final mul/div in decbin incurred an inex exception, 13406# it will be inex2, but will be reported as inex1 by get_op. 13407# 13408end_dec: 13409 fmov.l %fpsr,%d0 # get status register 13410 bclr &inex2_bit+8,%d0 # test for inex2 and clear it 13411 beq.b no_exc # skip this if no exc 13412 ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX 13413no_exc: 13414 add.l &0x4,%sp # clear 1 lw param 13415 fmovm.x (%sp)+,&0x40 # restore fp1 13416 movm.l (%sp)+,&0x3c # restore d2-d5 13417 fmov.l &0x0,%fpcr 13418 fmov.l &0x0,%fpsr 13419 rts 13420 13421######################################################################### 13422# bindec(): Converts an input in extended precision format to bcd format# 13423# # 13424# INPUT *************************************************************** # 13425# a0 = pointer to the input extended precision value in memory. # 13426# the input may be either normalized, unnormalized, or # 13427# denormalized. # 13428# d0 = contains the k-factor sign-extended to 32-bits. # 13429# # 13430# OUTPUT ************************************************************** # 13431# FP_SCR0(a6) = bcd format result on the stack. # 13432# # 13433# ALGORITHM *********************************************************** # 13434# # 13435# A1. Set RM and size ext; Set SIGMA = sign of input. # 13436# The k-factor is saved for use in d7. Clear the # 13437# BINDEC_FLG for separating normalized/denormalized # 13438# input. If input is unnormalized or denormalized, # 13439# normalize it. # 13440# # 13441# A2. Set X = abs(input). # 13442# # 13443# A3. Compute ILOG. # 13444# ILOG is the log base 10 of the input value. It is # 13445# approximated by adding e + 0.f when the original # 13446# value is viewed as 2^^e * 1.f in extended precision. # 13447# This value is stored in d6. # 13448# # 13449# A4. Clr INEX bit. # 13450# The operation in A3 above may have set INEX2. # 13451# # 13452# A5. Set ICTR = 0; # 13453# ICTR is a flag used in A13. It must be set before the # 13454# loop entry A6. # 13455# # 13456# A6. Calculate LEN. # 13457# LEN is the number of digits to be displayed. The # 13458# k-factor can dictate either the total number of digits, # 13459# if it is a positive number, or the number of digits # 13460# after the decimal point which are to be included as # 13461# significant. See the 68882 manual for examples. # 13462# If LEN is computed to be greater than 17, set OPERR in # 13463# USER_FPSR. LEN is stored in d4. # 13464# # 13465# A7. Calculate SCALE. # 13466# SCALE is equal to 10^ISCALE, where ISCALE is the number # 13467# of decimal places needed to insure LEN integer digits # 13468# in the output before conversion to bcd. LAMBDA is the # 13469# sign of ISCALE, used in A9. Fp1 contains # 13470# 10^^(abs(ISCALE)) using a rounding mode which is a # 13471# function of the original rounding mode and the signs # 13472# of ISCALE and X. A table is given in the code. # 13473# # 13474# A8. Clr INEX; Force RZ. # 13475# The operation in A3 above may have set INEX2. # 13476# RZ mode is forced for the scaling operation to insure # 13477# only one rounding error. The grs bits are collected in # 13478# the INEX flag for use in A10. # 13479# # 13480# A9. Scale X -> Y. # 13481# The mantissa is scaled to the desired number of # 13482# significant digits. The excess digits are collected # 13483# in INEX2. # 13484# # 13485# A10. Or in INEX. # 13486# If INEX is set, round error occurred. This is # 13487# compensated for by 'or-ing' in the INEX2 flag to # 13488# the lsb of Y. # 13489# # 13490# A11. Restore original FPCR; set size ext. # 13491# Perform FINT operation in the user's rounding mode. # 13492# Keep the size to extended. # 13493# # 13494# A12. Calculate YINT = FINT(Y) according to user's rounding # 13495# mode. The FPSP routine sintd0 is used. The output # 13496# is in fp0. # 13497# # 13498# A13. Check for LEN digits. # 13499# If the int operation results in more than LEN digits, # 13500# or less than LEN -1 digits, adjust ILOG and repeat from # 13501# A6. This test occurs only on the first pass. If the # 13502# result is exactly 10^LEN, decrement ILOG and divide # 13503# the mantissa by 10. # 13504# # 13505# A14. Convert the mantissa to bcd. # 13506# The binstr routine is used to convert the LEN digit # 13507# mantissa to bcd in memory. The input to binstr is # 13508# to be a fraction; i.e. (mantissa)/10^LEN and adjusted # 13509# such that the decimal point is to the left of bit 63. # 13510# The bcd digits are stored in the correct position in # 13511# the final string area in memory. # 13512# # 13513# A15. Convert the exponent to bcd. # 13514# As in A14 above, the exp is converted to bcd and the # 13515# digits are stored in the final string. # 13516# Test the length of the final exponent string. If the # 13517# length is 4, set operr. # 13518# # 13519# A16. Write sign bits to final string. # 13520# # 13521######################################################################### 13522 13523set BINDEC_FLG, EXC_TEMP # DENORM flag 13524 13525# Constants in extended precision 13526PLOG2: 13527 long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000 13528PLOG2UP1: 13529 long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000 13530 13531# Constants in single precision 13532FONE: 13533 long 0x3F800000,0x00000000,0x00000000,0x00000000 13534FTWO: 13535 long 0x40000000,0x00000000,0x00000000,0x00000000 13536FTEN: 13537 long 0x41200000,0x00000000,0x00000000,0x00000000 13538F4933: 13539 long 0x459A2800,0x00000000,0x00000000,0x00000000 13540 13541RBDTBL: 13542 byte 0,0,0,0 13543 byte 3,3,2,2 13544 byte 3,2,2,3 13545 byte 2,3,3,2 13546 13547# Implementation Notes: 13548# 13549# The registers are used as follows: 13550# 13551# d0: scratch; LEN input to binstr 13552# d1: scratch 13553# d2: upper 32-bits of mantissa for binstr 13554# d3: scratch;lower 32-bits of mantissa for binstr 13555# d4: LEN 13556# d5: LAMBDA/ICTR 13557# d6: ILOG 13558# d7: k-factor 13559# a0: ptr for original operand/final result 13560# a1: scratch pointer 13561# a2: pointer to FP_X; abs(original value) in ext 13562# fp0: scratch 13563# fp1: scratch 13564# fp2: scratch 13565# F_SCR1: 13566# F_SCR2: 13567# L_SCR1: 13568# L_SCR2: 13569 13570 global bindec 13571bindec: 13572 movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2} 13573 fmovm.x &0x7,-(%sp) # {%fp0-%fp2} 13574 13575# A1. Set RM and size ext. Set SIGMA = sign input; 13576# The k-factor is saved for use in d7. Clear BINDEC_FLG for 13577# separating normalized/denormalized input. If the input 13578# is a denormalized number, set the BINDEC_FLG memory word 13579# to signal denorm. If the input is unnormalized, normalize 13580# the input and test for denormalized result. 13581# 13582 fmov.l &rm_mode*0x10,%fpcr # set RM and ext 13583 mov.l (%a0),L_SCR2(%a6) # save exponent for sign check 13584 mov.l %d0,%d7 # move k-factor to d7 13585 13586 clr.b BINDEC_FLG(%a6) # clr norm/denorm flag 13587 cmpi.b STAG(%a6),&DENORM # is input a DENORM? 13588 bne.w A2_str # no; input is a NORM 13589 13590# 13591# Normalize the denorm 13592# 13593un_de_norm: 13594 mov.w (%a0),%d0 13595 and.w &0x7fff,%d0 # strip sign of normalized exp 13596 mov.l 4(%a0),%d1 13597 mov.l 8(%a0),%d2 13598norm_loop: 13599 sub.w &1,%d0 13600 lsl.l &1,%d2 13601 roxl.l &1,%d1 13602 tst.l %d1 13603 bge.b norm_loop 13604# 13605# Test if the normalized input is denormalized 13606# 13607 tst.w %d0 13608 bgt.b pos_exp # if greater than zero, it is a norm 13609 st BINDEC_FLG(%a6) # set flag for denorm 13610pos_exp: 13611 and.w &0x7fff,%d0 # strip sign of normalized exp 13612 mov.w %d0,(%a0) 13613 mov.l %d1,4(%a0) 13614 mov.l %d2,8(%a0) 13615 13616# A2. Set X = abs(input). 13617# 13618A2_str: 13619 mov.l (%a0),FP_SCR1(%a6) # move input to work space 13620 mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space 13621 mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space 13622 and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X) 13623 13624# A3. Compute ILOG. 13625# ILOG is the log base 10 of the input value. It is approx- 13626# imated by adding e + 0.f when the original value is viewed 13627# as 2^^e * 1.f in extended precision. This value is stored 13628# in d6. 13629# 13630# Register usage: 13631# Input/Output 13632# d0: k-factor/exponent 13633# d2: x/x 13634# d3: x/x 13635# d4: x/x 13636# d5: x/x 13637# d6: x/ILOG 13638# d7: k-factor/Unchanged 13639# a0: ptr for original operand/final result 13640# a1: x/x 13641# a2: x/x 13642# fp0: x/float(ILOG) 13643# fp1: x/x 13644# fp2: x/x 13645# F_SCR1:x/x 13646# F_SCR2:Abs(X)/Abs(X) with $3fff exponent 13647# L_SCR1:x/x 13648# L_SCR2:first word of X packed/Unchanged 13649 13650 tst.b BINDEC_FLG(%a6) # check for denorm 13651 beq.b A3_cont # if clr, continue with norm 13652 mov.l &-4933,%d6 # force ILOG = -4933 13653 bra.b A4_str 13654A3_cont: 13655 mov.w FP_SCR1(%a6),%d0 # move exp to d0 13656 mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff 13657 fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f 13658 sub.w &0x3fff,%d0 # strip off bias 13659 fadd.w %d0,%fp0 # add in exp 13660 fsub.s FONE(%pc),%fp0 # subtract off 1.0 13661 fbge.w pos_res # if pos, branch 13662 fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1 13663 fmov.l %fp0,%d6 # put ILOG in d6 as a lword 13664 bra.b A4_str # go move out ILOG 13665pos_res: 13666 fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2 13667 fmov.l %fp0,%d6 # put ILOG in d6 as a lword 13668 13669 13670# A4. Clr INEX bit. 13671# The operation in A3 above may have set INEX2. 13672 13673A4_str: 13674 fmov.l &0,%fpsr # zero all of fpsr - nothing needed 13675 13676 13677# A5. Set ICTR = 0; 13678# ICTR is a flag used in A13. It must be set before the 13679# loop entry A6. The lower word of d5 is used for ICTR. 13680 13681 clr.w %d5 # clear ICTR 13682 13683# A6. Calculate LEN. 13684# LEN is the number of digits to be displayed. The k-factor 13685# can dictate either the total number of digits, if it is 13686# a positive number, or the number of digits after the 13687# original decimal point which are to be included as 13688# significant. See the 68882 manual for examples. 13689# If LEN is computed to be greater than 17, set OPERR in 13690# USER_FPSR. LEN is stored in d4. 13691# 13692# Register usage: 13693# Input/Output 13694# d0: exponent/Unchanged 13695# d2: x/x/scratch 13696# d3: x/x 13697# d4: exc picture/LEN 13698# d5: ICTR/Unchanged 13699# d6: ILOG/Unchanged 13700# d7: k-factor/Unchanged 13701# a0: ptr for original operand/final result 13702# a1: x/x 13703# a2: x/x 13704# fp0: float(ILOG)/Unchanged 13705# fp1: x/x 13706# fp2: x/x 13707# F_SCR1:x/x 13708# F_SCR2:Abs(X) with $3fff exponent/Unchanged 13709# L_SCR1:x/x 13710# L_SCR2:first word of X packed/Unchanged 13711 13712A6_str: 13713 tst.l %d7 # branch on sign of k 13714 ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k 13715 mov.l %d7,%d4 # if k > 0, LEN = k 13716 bra.b len_ck # skip to LEN check 13717k_neg: 13718 mov.l %d6,%d4 # first load ILOG to d4 13719 sub.l %d7,%d4 # subtract off k 13720 addq.l &1,%d4 # add in the 1 13721len_ck: 13722 tst.l %d4 # LEN check: branch on sign of LEN 13723 ble.b LEN_ng # if neg, set LEN = 1 13724 cmp.l %d4,&17 # test if LEN > 17 13725 ble.b A7_str # if not, forget it 13726 mov.l &17,%d4 # set max LEN = 17 13727 tst.l %d7 # if negative, never set OPERR 13728 ble.b A7_str # if positive, continue 13729 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR 13730 bra.b A7_str # finished here 13731LEN_ng: 13732 mov.l &1,%d4 # min LEN is 1 13733 13734 13735# A7. Calculate SCALE. 13736# SCALE is equal to 10^ISCALE, where ISCALE is the number 13737# of decimal places needed to insure LEN integer digits 13738# in the output before conversion to bcd. LAMBDA is the sign 13739# of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using 13740# the rounding mode as given in the following table (see 13741# Coonen, p. 7.23 as ref.; however, the SCALE variable is 13742# of opposite sign in bindec.sa from Coonen). 13743# 13744# Initial USE 13745# FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5] 13746# ---------------------------------------------- 13747# RN 00 0 0 00/0 RN 13748# RN 00 0 1 00/0 RN 13749# RN 00 1 0 00/0 RN 13750# RN 00 1 1 00/0 RN 13751# RZ 01 0 0 11/3 RP 13752# RZ 01 0 1 11/3 RP 13753# RZ 01 1 0 10/2 RM 13754# RZ 01 1 1 10/2 RM 13755# RM 10 0 0 11/3 RP 13756# RM 10 0 1 10/2 RM 13757# RM 10 1 0 10/2 RM 13758# RM 10 1 1 11/3 RP 13759# RP 11 0 0 10/2 RM 13760# RP 11 0 1 11/3 RP 13761# RP 11 1 0 11/3 RP 13762# RP 11 1 1 10/2 RM 13763# 13764# Register usage: 13765# Input/Output 13766# d0: exponent/scratch - final is 0 13767# d2: x/0 or 24 for A9 13768# d3: x/scratch - offset ptr into PTENRM array 13769# d4: LEN/Unchanged 13770# d5: 0/ICTR:LAMBDA 13771# d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k)) 13772# d7: k-factor/Unchanged 13773# a0: ptr for original operand/final result 13774# a1: x/ptr to PTENRM array 13775# a2: x/x 13776# fp0: float(ILOG)/Unchanged 13777# fp1: x/10^ISCALE 13778# fp2: x/x 13779# F_SCR1:x/x 13780# F_SCR2:Abs(X) with $3fff exponent/Unchanged 13781# L_SCR1:x/x 13782# L_SCR2:first word of X packed/Unchanged 13783 13784A7_str: 13785 tst.l %d7 # test sign of k 13786 bgt.b k_pos # if pos and > 0, skip this 13787 cmp.l %d7,%d6 # test k - ILOG 13788 blt.b k_pos # if ILOG >= k, skip this 13789 mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k 13790k_pos: 13791 mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0 13792 addq.l &1,%d0 # add the 1 13793 sub.l %d4,%d0 # sub off LEN 13794 swap %d5 # use upper word of d5 for LAMBDA 13795 clr.w %d5 # set it zero initially 13796 clr.w %d2 # set up d2 for very small case 13797 tst.l %d0 # test sign of ISCALE 13798 bge.b iscale # if pos, skip next inst 13799 addq.w &1,%d5 # if neg, set LAMBDA true 13800 cmp.l %d0,&0xffffecd4 # test iscale <= -4908 13801 bgt.b no_inf # if false, skip rest 13802 add.l &24,%d0 # add in 24 to iscale 13803 mov.l &24,%d2 # put 24 in d2 for A9 13804no_inf: 13805 neg.l %d0 # and take abs of ISCALE 13806iscale: 13807 fmov.s FONE(%pc),%fp1 # init fp1 to 1 13808 bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits 13809 lsl.w &1,%d1 # put them in bits 2:1 13810 add.w %d5,%d1 # add in LAMBDA 13811 lsl.w &1,%d1 # put them in bits 3:1 13812 tst.l L_SCR2(%a6) # test sign of original x 13813 bge.b x_pos # if pos, don't set bit 0 13814 addq.l &1,%d1 # if neg, set bit 0 13815x_pos: 13816 lea.l RBDTBL(%pc),%a2 # load rbdtbl base 13817 mov.b (%a2,%d1),%d3 # load d3 with new rmode 13818 lsl.l &4,%d3 # put bits in proper position 13819 fmov.l %d3,%fpcr # load bits into fpu 13820 lsr.l &4,%d3 # put bits in proper position 13821 tst.b %d3 # decode new rmode for pten table 13822 bne.b not_rn # if zero, it is RN 13823 lea.l PTENRN(%pc),%a1 # load a1 with RN table base 13824 bra.b rmode # exit decode 13825not_rn: 13826 lsr.b &1,%d3 # get lsb in carry 13827 bcc.b not_rp2 # if carry clear, it is RM 13828 lea.l PTENRP(%pc),%a1 # load a1 with RP table base 13829 bra.b rmode # exit decode 13830not_rp2: 13831 lea.l PTENRM(%pc),%a1 # load a1 with RM table base 13832rmode: 13833 clr.l %d3 # clr table index 13834e_loop2: 13835 lsr.l &1,%d0 # shift next bit into carry 13836 bcc.b e_next2 # if zero, skip the mul 13837 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no) 13838e_next2: 13839 add.l &12,%d3 # inc d3 to next pwrten table entry 13840 tst.l %d0 # test if ISCALE is zero 13841 bne.b e_loop2 # if not, loop 13842 13843# A8. Clr INEX; Force RZ. 13844# The operation in A3 above may have set INEX2. 13845# RZ mode is forced for the scaling operation to insure 13846# only one rounding error. The grs bits are collected in 13847# the INEX flag for use in A10. 13848# 13849# Register usage: 13850# Input/Output 13851 13852 fmov.l &0,%fpsr # clr INEX 13853 fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode 13854 13855# A9. Scale X -> Y. 13856# The mantissa is scaled to the desired number of significant 13857# digits. The excess digits are collected in INEX2. If mul, 13858# Check d2 for excess 10 exponential value. If not zero, 13859# the iscale value would have caused the pwrten calculation 13860# to overflow. Only a negative iscale can cause this, so 13861# multiply by 10^(d2), which is now only allowed to be 24, 13862# with a multiply by 10^8 and 10^16, which is exact since 13863# 10^24 is exact. If the input was denormalized, we must 13864# create a busy stack frame with the mul command and the 13865# two operands, and allow the fpu to complete the multiply. 13866# 13867# Register usage: 13868# Input/Output 13869# d0: FPCR with RZ mode/Unchanged 13870# d2: 0 or 24/unchanged 13871# d3: x/x 13872# d4: LEN/Unchanged 13873# d5: ICTR:LAMBDA 13874# d6: ILOG/Unchanged 13875# d7: k-factor/Unchanged 13876# a0: ptr for original operand/final result 13877# a1: ptr to PTENRM array/Unchanged 13878# a2: x/x 13879# fp0: float(ILOG)/X adjusted for SCALE (Y) 13880# fp1: 10^ISCALE/Unchanged 13881# fp2: x/x 13882# F_SCR1:x/x 13883# F_SCR2:Abs(X) with $3fff exponent/Unchanged 13884# L_SCR1:x/x 13885# L_SCR2:first word of X packed/Unchanged 13886 13887A9_str: 13888 fmov.x (%a0),%fp0 # load X from memory 13889 fabs.x %fp0 # use abs(X) 13890 tst.w %d5 # LAMBDA is in lower word of d5 13891 bne.b sc_mul # if neg (LAMBDA = 1), scale by mul 13892 fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0 13893 bra.w A10_st # branch to A10 13894 13895sc_mul: 13896 tst.b BINDEC_FLG(%a6) # check for denorm 13897 beq.w A9_norm # if norm, continue with mul 13898 13899# for DENORM, we must calculate: 13900# fp0 = input_op * 10^ISCALE * 10^24 13901# since the input operand is a DENORM, we can't multiply it directly. 13902# so, we do the multiplication of the exponents and mantissas separately. 13903# in this way, we avoid underflow on intermediate stages of the 13904# multiplication and guarantee a result without exception. 13905 fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack 13906 13907 mov.w (%sp),%d3 # grab exponent 13908 andi.w &0x7fff,%d3 # clear sign 13909 ori.w &0x8000,(%a0) # make DENORM exp negative 13910 add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp 13911 subi.w &0x3fff,%d3 # subtract BIAS 13912 add.w 36(%a1),%d3 13913 subi.w &0x3fff,%d3 # subtract BIAS 13914 add.w 48(%a1),%d3 13915 subi.w &0x3fff,%d3 # subtract BIAS 13916 13917 bmi.w sc_mul_err # is result is DENORM, punt!!! 13918 13919 andi.w &0x8000,(%sp) # keep sign 13920 or.w %d3,(%sp) # insert new exponent 13921 andi.w &0x7fff,(%a0) # clear sign bit on DENORM again 13922 mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk 13923 mov.l 0x4(%a0),-(%sp) 13924 mov.l &0x3fff0000,-(%sp) # force exp to zero 13925 fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0 13926 fmul.x (%sp)+,%fp0 13927 13928# fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8 13929# fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16 13930 mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa 13931 mov.l 36+4(%a1),-(%sp) 13932 mov.l &0x3fff0000,-(%sp) # force exp to zero 13933 mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa 13934 mov.l 48+4(%a1),-(%sp) 13935 mov.l &0x3fff0000,-(%sp)# force exp to zero 13936 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8 13937 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16 13938 bra.b A10_st 13939 13940sc_mul_err: 13941 bra.b sc_mul_err 13942 13943A9_norm: 13944 tst.w %d2 # test for small exp case 13945 beq.b A9_con # if zero, continue as normal 13946 fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8 13947 fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16 13948A9_con: 13949 fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0 13950 13951# A10. Or in INEX. 13952# If INEX is set, round error occurred. This is compensated 13953# for by 'or-ing' in the INEX2 flag to the lsb of Y. 13954# 13955# Register usage: 13956# Input/Output 13957# d0: FPCR with RZ mode/FPSR with INEX2 isolated 13958# d2: x/x 13959# d3: x/x 13960# d4: LEN/Unchanged 13961# d5: ICTR:LAMBDA 13962# d6: ILOG/Unchanged 13963# d7: k-factor/Unchanged 13964# a0: ptr for original operand/final result 13965# a1: ptr to PTENxx array/Unchanged 13966# a2: x/ptr to FP_SCR1(a6) 13967# fp0: Y/Y with lsb adjusted 13968# fp1: 10^ISCALE/Unchanged 13969# fp2: x/x 13970 13971A10_st: 13972 fmov.l %fpsr,%d0 # get FPSR 13973 fmov.x %fp0,FP_SCR1(%a6) # move Y to memory 13974 lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1 13975 btst &9,%d0 # check if INEX2 set 13976 beq.b A11_st # if clear, skip rest 13977 or.l &1,8(%a2) # or in 1 to lsb of mantissa 13978 fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu 13979 13980 13981# A11. Restore original FPCR; set size ext. 13982# Perform FINT operation in the user's rounding mode. Keep 13983# the size to extended. The sintdo entry point in the sint 13984# routine expects the FPCR value to be in USER_FPCR for 13985# mode and precision. The original FPCR is saved in L_SCR1. 13986 13987A11_st: 13988 mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later 13989 and.l &0x00000030,USER_FPCR(%a6) # set size to ext, 13990# ;block exceptions 13991 13992 13993# A12. Calculate YINT = FINT(Y) according to user's rounding mode. 13994# The FPSP routine sintd0 is used. The output is in fp0. 13995# 13996# Register usage: 13997# Input/Output 13998# d0: FPSR with AINEX cleared/FPCR with size set to ext 13999# d2: x/x/scratch 14000# d3: x/x 14001# d4: LEN/Unchanged 14002# d5: ICTR:LAMBDA/Unchanged 14003# d6: ILOG/Unchanged 14004# d7: k-factor/Unchanged 14005# a0: ptr for original operand/src ptr for sintdo 14006# a1: ptr to PTENxx array/Unchanged 14007# a2: ptr to FP_SCR1(a6)/Unchanged 14008# a6: temp pointer to FP_SCR1(a6) - orig value saved and restored 14009# fp0: Y/YINT 14010# fp1: 10^ISCALE/Unchanged 14011# fp2: x/x 14012# F_SCR1:x/x 14013# F_SCR2:Y adjusted for inex/Y with original exponent 14014# L_SCR1:x/original USER_FPCR 14015# L_SCR2:first word of X packed/Unchanged 14016 14017A12_st: 14018 movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1} 14019 mov.l L_SCR1(%a6),-(%sp) 14020 mov.l L_SCR2(%a6),-(%sp) 14021 14022 lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6) 14023 fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6) 14024 tst.l L_SCR2(%a6) # test sign of original operand 14025 bge.b do_fint12 # if pos, use Y 14026 or.l &0x80000000,(%a0) # if neg, use -Y 14027do_fint12: 14028 mov.l USER_FPSR(%a6),-(%sp) 14029# bsr sintdo # sint routine returns int in fp0 14030 14031 fmov.l USER_FPCR(%a6),%fpcr 14032 fmov.l &0x0,%fpsr # clear the AEXC bits!!! 14033## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode 14034## andi.l &0x00000030,%d0 14035## fmov.l %d0,%fpcr 14036 fint.x FP_SCR1(%a6),%fp0 # do fint() 14037 fmov.l %fpsr,%d0 14038 or.w %d0,FPSR_EXCEPT(%a6) 14039## fmov.l &0x0,%fpcr 14040## fmov.l %fpsr,%d0 # don't keep ccodes 14041## or.w %d0,FPSR_EXCEPT(%a6) 14042 14043 mov.b (%sp),USER_FPSR(%a6) 14044 add.l &4,%sp 14045 14046 mov.l (%sp)+,L_SCR2(%a6) 14047 mov.l (%sp)+,L_SCR1(%a6) 14048 movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1} 14049 14050 mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent 14051 mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR 14052 14053# A13. Check for LEN digits. 14054# If the int operation results in more than LEN digits, 14055# or less than LEN -1 digits, adjust ILOG and repeat from 14056# A6. This test occurs only on the first pass. If the 14057# result is exactly 10^LEN, decrement ILOG and divide 14058# the mantissa by 10. The calculation of 10^LEN cannot 14059# be inexact, since all powers of ten up to 10^27 are exact 14060# in extended precision, so the use of a previous power-of-ten 14061# table will introduce no error. 14062# 14063# 14064# Register usage: 14065# Input/Output 14066# d0: FPCR with size set to ext/scratch final = 0 14067# d2: x/x 14068# d3: x/scratch final = x 14069# d4: LEN/LEN adjusted 14070# d5: ICTR:LAMBDA/LAMBDA:ICTR 14071# d6: ILOG/ILOG adjusted 14072# d7: k-factor/Unchanged 14073# a0: pointer into memory for packed bcd string formation 14074# a1: ptr to PTENxx array/Unchanged 14075# a2: ptr to FP_SCR1(a6)/Unchanged 14076# fp0: int portion of Y/abs(YINT) adjusted 14077# fp1: 10^ISCALE/Unchanged 14078# fp2: x/10^LEN 14079# F_SCR1:x/x 14080# F_SCR2:Y with original exponent/Unchanged 14081# L_SCR1:original USER_FPCR/Unchanged 14082# L_SCR2:first word of X packed/Unchanged 14083 14084A13_st: 14085 swap %d5 # put ICTR in lower word of d5 14086 tst.w %d5 # check if ICTR = 0 14087 bne not_zr # if non-zero, go to second test 14088# 14089# Compute 10^(LEN-1) 14090# 14091 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0 14092 mov.l %d4,%d0 # put LEN in d0 14093 subq.l &1,%d0 # d0 = LEN -1 14094 clr.l %d3 # clr table index 14095l_loop: 14096 lsr.l &1,%d0 # shift next bit into carry 14097 bcc.b l_next # if zero, skip the mul 14098 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no) 14099l_next: 14100 add.l &12,%d3 # inc d3 to next pwrten table entry 14101 tst.l %d0 # test if LEN is zero 14102 bne.b l_loop # if not, loop 14103# 14104# 10^LEN-1 is computed for this test and A14. If the input was 14105# denormalized, check only the case in which YINT > 10^LEN. 14106# 14107 tst.b BINDEC_FLG(%a6) # check if input was norm 14108 beq.b A13_con # if norm, continue with checking 14109 fabs.x %fp0 # take abs of YINT 14110 bra test_2 14111# 14112# Compare abs(YINT) to 10^(LEN-1) and 10^LEN 14113# 14114A13_con: 14115 fabs.x %fp0 # take abs of YINT 14116 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1) 14117 fbge.w test_2 # if greater, do next test 14118 subq.l &1,%d6 # subtract 1 from ILOG 14119 mov.w &1,%d5 # set ICTR 14120 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM 14121 fmul.s FTEN(%pc),%fp2 # compute 10^LEN 14122 bra.w A6_str # return to A6 and recompute YINT 14123test_2: 14124 fmul.s FTEN(%pc),%fp2 # compute 10^LEN 14125 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN 14126 fblt.w A14_st # if less, all is ok, go to A14 14127 fbgt.w fix_ex # if greater, fix and redo 14128 fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10 14129 addq.l &1,%d6 # and inc ILOG 14130 bra.b A14_st # and continue elsewhere 14131fix_ex: 14132 addq.l &1,%d6 # increment ILOG by 1 14133 mov.w &1,%d5 # set ICTR 14134 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM 14135 bra.w A6_str # return to A6 and recompute YINT 14136# 14137# Since ICTR <> 0, we have already been through one adjustment, 14138# and shouldn't have another; this is to check if abs(YINT) = 10^LEN 14139# 10^LEN is again computed using whatever table is in a1 since the 14140# value calculated cannot be inexact. 14141# 14142not_zr: 14143 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0 14144 mov.l %d4,%d0 # put LEN in d0 14145 clr.l %d3 # clr table index 14146z_loop: 14147 lsr.l &1,%d0 # shift next bit into carry 14148 bcc.b z_next # if zero, skip the mul 14149 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no) 14150z_next: 14151 add.l &12,%d3 # inc d3 to next pwrten table entry 14152 tst.l %d0 # test if LEN is zero 14153 bne.b z_loop # if not, loop 14154 fabs.x %fp0 # get abs(YINT) 14155 fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN 14156 fbneq.w A14_st # if not, skip this 14157 fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10 14158 addq.l &1,%d6 # and inc ILOG by 1 14159 addq.l &1,%d4 # and inc LEN 14160 fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN 14161 14162# A14. Convert the mantissa to bcd. 14163# The binstr routine is used to convert the LEN digit 14164# mantissa to bcd in memory. The input to binstr is 14165# to be a fraction; i.e. (mantissa)/10^LEN and adjusted 14166# such that the decimal point is to the left of bit 63. 14167# The bcd digits are stored in the correct position in 14168# the final string area in memory. 14169# 14170# 14171# Register usage: 14172# Input/Output 14173# d0: x/LEN call to binstr - final is 0 14174# d1: x/0 14175# d2: x/ms 32-bits of mant of abs(YINT) 14176# d3: x/ls 32-bits of mant of abs(YINT) 14177# d4: LEN/Unchanged 14178# d5: ICTR:LAMBDA/LAMBDA:ICTR 14179# d6: ILOG 14180# d7: k-factor/Unchanged 14181# a0: pointer into memory for packed bcd string formation 14182# /ptr to first mantissa byte in result string 14183# a1: ptr to PTENxx array/Unchanged 14184# a2: ptr to FP_SCR1(a6)/Unchanged 14185# fp0: int portion of Y/abs(YINT) adjusted 14186# fp1: 10^ISCALE/Unchanged 14187# fp2: 10^LEN/Unchanged 14188# F_SCR1:x/Work area for final result 14189# F_SCR2:Y with original exponent/Unchanged 14190# L_SCR1:original USER_FPCR/Unchanged 14191# L_SCR2:first word of X packed/Unchanged 14192 14193A14_st: 14194 fmov.l &rz_mode*0x10,%fpcr # force rz for conversion 14195 fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN 14196 lea.l FP_SCR0(%a6),%a0 14197 fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory 14198 mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2 14199 mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3 14200 clr.l 4(%a0) # zero word 2 of FP_RES 14201 clr.l 8(%a0) # zero word 3 of FP_RES 14202 mov.l (%a0),%d0 # move exponent to d0 14203 swap %d0 # put exponent in lower word 14204 beq.b no_sft # if zero, don't shift 14205 sub.l &0x3ffd,%d0 # sub bias less 2 to make fract 14206 tst.l %d0 # check if > 1 14207 bgt.b no_sft # if so, don't shift 14208 neg.l %d0 # make exp positive 14209m_loop: 14210 lsr.l &1,%d2 # shift d2:d3 right, add 0s 14211 roxr.l &1,%d3 # the number of places 14212 dbf.w %d0,m_loop # given in d0 14213no_sft: 14214 tst.l %d2 # check for mantissa of zero 14215 bne.b no_zr # if not, go on 14216 tst.l %d3 # continue zero check 14217 beq.b zer_m # if zero, go directly to binstr 14218no_zr: 14219 clr.l %d1 # put zero in d1 for addx 14220 add.l &0x00000080,%d3 # inc at bit 7 14221 addx.l %d1,%d2 # continue inc 14222 and.l &0xffffff80,%d3 # strip off lsb not used by 882 14223zer_m: 14224 mov.l %d4,%d0 # put LEN in d0 for binstr call 14225 addq.l &3,%a0 # a0 points to M16 byte in result 14226 bsr binstr # call binstr to convert mant 14227 14228 14229# A15. Convert the exponent to bcd. 14230# As in A14 above, the exp is converted to bcd and the 14231# digits are stored in the final string. 14232# 14233# Digits are stored in L_SCR1(a6) on return from BINDEC as: 14234# 14235# 32 16 15 0 14236# ----------------------------------------- 14237# | 0 | e3 | e2 | e1 | e4 | X | X | X | 14238# ----------------------------------------- 14239# 14240# And are moved into their proper places in FP_SCR0. If digit e4 14241# is non-zero, OPERR is signaled. In all cases, all 4 digits are 14242# written as specified in the 881/882 manual for packed decimal. 14243# 14244# Register usage: 14245# Input/Output 14246# d0: x/LEN call to binstr - final is 0 14247# d1: x/scratch (0);shift count for final exponent packing 14248# d2: x/ms 32-bits of exp fraction/scratch 14249# d3: x/ls 32-bits of exp fraction 14250# d4: LEN/Unchanged 14251# d5: ICTR:LAMBDA/LAMBDA:ICTR 14252# d6: ILOG 14253# d7: k-factor/Unchanged 14254# a0: ptr to result string/ptr to L_SCR1(a6) 14255# a1: ptr to PTENxx array/Unchanged 14256# a2: ptr to FP_SCR1(a6)/Unchanged 14257# fp0: abs(YINT) adjusted/float(ILOG) 14258# fp1: 10^ISCALE/Unchanged 14259# fp2: 10^LEN/Unchanged 14260# F_SCR1:Work area for final result/BCD result 14261# F_SCR2:Y with original exponent/ILOG/10^4 14262# L_SCR1:original USER_FPCR/Exponent digits on return from binstr 14263# L_SCR2:first word of X packed/Unchanged 14264 14265A15_st: 14266 tst.b BINDEC_FLG(%a6) # check for denorm 14267 beq.b not_denorm 14268 ftest.x %fp0 # test for zero 14269 fbeq.w den_zero # if zero, use k-factor or 4933 14270 fmov.l %d6,%fp0 # float ILOG 14271 fabs.x %fp0 # get abs of ILOG 14272 bra.b convrt 14273den_zero: 14274 tst.l %d7 # check sign of the k-factor 14275 blt.b use_ilog # if negative, use ILOG 14276 fmov.s F4933(%pc),%fp0 # force exponent to 4933 14277 bra.b convrt # do it 14278use_ilog: 14279 fmov.l %d6,%fp0 # float ILOG 14280 fabs.x %fp0 # get abs of ILOG 14281 bra.b convrt 14282not_denorm: 14283 ftest.x %fp0 # test for zero 14284 fbneq.w not_zero # if zero, force exponent 14285 fmov.s FONE(%pc),%fp0 # force exponent to 1 14286 bra.b convrt # do it 14287not_zero: 14288 fmov.l %d6,%fp0 # float ILOG 14289 fabs.x %fp0 # get abs of ILOG 14290convrt: 14291 fdiv.x 24(%a1),%fp0 # compute ILOG/10^4 14292 fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory 14293 mov.l 4(%a2),%d2 # move word 2 to d2 14294 mov.l 8(%a2),%d3 # move word 3 to d3 14295 mov.w (%a2),%d0 # move exp to d0 14296 beq.b x_loop_fin # if zero, skip the shift 14297 sub.w &0x3ffd,%d0 # subtract off bias 14298 neg.w %d0 # make exp positive 14299x_loop: 14300 lsr.l &1,%d2 # shift d2:d3 right 14301 roxr.l &1,%d3 # the number of places 14302 dbf.w %d0,x_loop # given in d0 14303x_loop_fin: 14304 clr.l %d1 # put zero in d1 for addx 14305 add.l &0x00000080,%d3 # inc at bit 6 14306 addx.l %d1,%d2 # continue inc 14307 and.l &0xffffff80,%d3 # strip off lsb not used by 882 14308 mov.l &4,%d0 # put 4 in d0 for binstr call 14309 lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits 14310 bsr binstr # call binstr to convert exp 14311 mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0 14312 mov.l &12,%d1 # use d1 for shift count 14313 lsr.l %d1,%d0 # shift d0 right by 12 14314 bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0 14315 lsr.l %d1,%d0 # shift d0 right by 12 14316 bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0 14317 tst.b %d0 # check if e4 is zero 14318 beq.b A16_st # if zero, skip rest 14319 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR 14320 14321 14322# A16. Write sign bits to final string. 14323# Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG). 14324# 14325# Register usage: 14326# Input/Output 14327# d0: x/scratch - final is x 14328# d2: x/x 14329# d3: x/x 14330# d4: LEN/Unchanged 14331# d5: ICTR:LAMBDA/LAMBDA:ICTR 14332# d6: ILOG/ILOG adjusted 14333# d7: k-factor/Unchanged 14334# a0: ptr to L_SCR1(a6)/Unchanged 14335# a1: ptr to PTENxx array/Unchanged 14336# a2: ptr to FP_SCR1(a6)/Unchanged 14337# fp0: float(ILOG)/Unchanged 14338# fp1: 10^ISCALE/Unchanged 14339# fp2: 10^LEN/Unchanged 14340# F_SCR1:BCD result with correct signs 14341# F_SCR2:ILOG/10^4 14342# L_SCR1:Exponent digits on return from binstr 14343# L_SCR2:first word of X packed/Unchanged 14344 14345A16_st: 14346 clr.l %d0 # clr d0 for collection of signs 14347 and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0 14348 tst.l L_SCR2(%a6) # check sign of original mantissa 14349 bge.b mant_p # if pos, don't set SM 14350 mov.l &2,%d0 # move 2 in to d0 for SM 14351mant_p: 14352 tst.l %d6 # check sign of ILOG 14353 bge.b wr_sgn # if pos, don't set SE 14354 addq.l &1,%d0 # set bit 0 in d0 for SE 14355wr_sgn: 14356 bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0 14357 14358# Clean up and restore all registers used. 14359 14360 fmov.l &0,%fpsr # clear possible inex2/ainex bits 14361 fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2} 14362 movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2} 14363 rts 14364 14365 global PTENRN 14366PTENRN: 14367 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 14368 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 14369 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 14370 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 14371 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 14372 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32 14373 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64 14374 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128 14375 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256 14376 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512 14377 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024 14378 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048 14379 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096 14380 14381 global PTENRP 14382PTENRP: 14383 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 14384 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 14385 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 14386 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 14387 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 14388 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32 14389 long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64 14390 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128 14391 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256 14392 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512 14393 long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024 14394 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048 14395 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096 14396 14397 global PTENRM 14398PTENRM: 14399 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1 14400 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2 14401 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4 14402 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8 14403 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16 14404 long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32 14405 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64 14406 long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128 14407 long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256 14408 long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512 14409 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024 14410 long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048 14411 long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096 14412 14413######################################################################### 14414# binstr(): Converts a 64-bit binary integer to bcd. # 14415# # 14416# INPUT *************************************************************** # 14417# d2:d3 = 64-bit binary integer # 14418# d0 = desired length (LEN) # 14419# a0 = pointer to start in memory for bcd characters # 14420# (This pointer must point to byte 4 of the first # 14421# lword of the packed decimal memory string.) # 14422# # 14423# OUTPUT ************************************************************** # 14424# a0 = pointer to LEN bcd digits representing the 64-bit integer. # 14425# # 14426# ALGORITHM *********************************************************** # 14427# The 64-bit binary is assumed to have a decimal point before # 14428# bit 63. The fraction is multiplied by 10 using a mul by 2 # 14429# shift and a mul by 8 shift. The bits shifted out of the # 14430# msb form a decimal digit. This process is iterated until # 14431# LEN digits are formed. # 14432# # 14433# A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the # 14434# digit formed will be assumed the least significant. This is # 14435# to force the first byte formed to have a 0 in the upper 4 bits. # 14436# # 14437# A2. Beginning of the loop: # 14438# Copy the fraction in d2:d3 to d4:d5. # 14439# # 14440# A3. Multiply the fraction in d2:d3 by 8 using bit-field # 14441# extracts and shifts. The three msbs from d2 will go into d1. # 14442# # 14443# A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb # 14444# will be collected by the carry. # 14445# # 14446# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 # 14447# into d2:d3. D1 will contain the bcd digit formed. # 14448# # 14449# A6. Test d7. If zero, the digit formed is the ms digit. If non- # 14450# zero, it is the ls digit. Put the digit in its place in the # 14451# upper word of d0. If it is the ls digit, write the word # 14452# from d0 to memory. # 14453# # 14454# A7. Decrement d6 (LEN counter) and repeat the loop until zero. # 14455# # 14456######################################################################### 14457 14458# Implementation Notes: 14459# 14460# The registers are used as follows: 14461# 14462# d0: LEN counter 14463# d1: temp used to form the digit 14464# d2: upper 32-bits of fraction for mul by 8 14465# d3: lower 32-bits of fraction for mul by 8 14466# d4: upper 32-bits of fraction for mul by 2 14467# d5: lower 32-bits of fraction for mul by 2 14468# d6: temp for bit-field extracts 14469# d7: byte digit formation word;digit count {0,1} 14470# a0: pointer into memory for packed bcd string formation 14471# 14472 14473 global binstr 14474binstr: 14475 movm.l &0xff00,-(%sp) # {%d0-%d7} 14476 14477# 14478# A1: Init d7 14479# 14480 mov.l &1,%d7 # init d7 for second digit 14481 subq.l &1,%d0 # for dbf d0 would have LEN+1 passes 14482# 14483# A2. Copy d2:d3 to d4:d5. Start loop. 14484# 14485loop: 14486 mov.l %d2,%d4 # copy the fraction before muls 14487 mov.l %d3,%d5 # to d4:d5 14488# 14489# A3. Multiply d2:d3 by 8; extract msbs into d1. 14490# 14491 bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1 14492 asl.l &3,%d2 # shift d2 left by 3 places 14493 bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6 14494 asl.l &3,%d3 # shift d3 left by 3 places 14495 or.l %d6,%d2 # or in msbs from d3 into d2 14496# 14497# A4. Multiply d4:d5 by 2; add carry out to d1. 14498# 14499 asl.l &1,%d5 # mul d5 by 2 14500 roxl.l &1,%d4 # mul d4 by 2 14501 swap %d6 # put 0 in d6 lower word 14502 addx.w %d6,%d1 # add in extend from mul by 2 14503# 14504# A5. Add mul by 8 to mul by 2. D1 contains the digit formed. 14505# 14506 add.l %d5,%d3 # add lower 32 bits 14507 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90) 14508 addx.l %d4,%d2 # add with extend upper 32 bits 14509 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90) 14510 addx.w %d6,%d1 # add in extend from add to d1 14511 swap %d6 # with d6 = 0; put 0 in upper word 14512# 14513# A6. Test d7 and branch. 14514# 14515 tst.w %d7 # if zero, store digit & to loop 14516 beq.b first_d # if non-zero, form byte & write 14517sec_d: 14518 swap %d7 # bring first digit to word d7b 14519 asl.w &4,%d7 # first digit in upper 4 bits d7b 14520 add.w %d1,%d7 # add in ls digit to d7b 14521 mov.b %d7,(%a0)+ # store d7b byte in memory 14522 swap %d7 # put LEN counter in word d7a 14523 clr.w %d7 # set d7a to signal no digits done 14524 dbf.w %d0,loop # do loop some more! 14525 bra.b end_bstr # finished, so exit 14526first_d: 14527 swap %d7 # put digit word in d7b 14528 mov.w %d1,%d7 # put new digit in d7b 14529 swap %d7 # put LEN counter in word d7a 14530 addq.w &1,%d7 # set d7a to signal first digit done 14531 dbf.w %d0,loop # do loop some more! 14532 swap %d7 # put last digit in string 14533 lsl.w &4,%d7 # move it to upper 4 bits 14534 mov.b %d7,(%a0)+ # store it in memory string 14535# 14536# Clean up and return with result in fp0. 14537# 14538end_bstr: 14539 movm.l (%sp)+,&0xff # {%d0-%d7} 14540 rts 14541 14542######################################################################### 14543# XDEF **************************************************************** # 14544# facc_in_b(): dmem_read_byte failed # 14545# facc_in_w(): dmem_read_word failed # 14546# facc_in_l(): dmem_read_long failed # 14547# facc_in_d(): dmem_read of dbl prec failed # 14548# facc_in_x(): dmem_read of ext prec failed # 14549# # 14550# facc_out_b(): dmem_write_byte failed # 14551# facc_out_w(): dmem_write_word failed # 14552# facc_out_l(): dmem_write_long failed # 14553# facc_out_d(): dmem_write of dbl prec failed # 14554# facc_out_x(): dmem_write of ext prec failed # 14555# # 14556# XREF **************************************************************** # 14557# _real_access() - exit through access error handler # 14558# # 14559# INPUT *************************************************************** # 14560# None # 14561# # 14562# OUTPUT ************************************************************** # 14563# None # 14564# # 14565# ALGORITHM *********************************************************** # 14566# Flow jumps here when an FP data fetch call gets an error # 14567# result. This means the operating system wants an access error frame # 14568# made out of the current exception stack frame. # 14569# So, we first call restore() which makes sure that any updated # 14570# -(an)+ register gets returned to its pre-exception value and then # 14571# we change the stack to an access error stack frame. # 14572# # 14573######################################################################### 14574 14575facc_in_b: 14576 movq.l &0x1,%d0 # one byte 14577 bsr.w restore # fix An 14578 14579 mov.w &0x0121,EXC_VOFF(%a6) # set FSLW 14580 bra.w facc_finish 14581 14582facc_in_w: 14583 movq.l &0x2,%d0 # two bytes 14584 bsr.w restore # fix An 14585 14586 mov.w &0x0141,EXC_VOFF(%a6) # set FSLW 14587 bra.b facc_finish 14588 14589facc_in_l: 14590 movq.l &0x4,%d0 # four bytes 14591 bsr.w restore # fix An 14592 14593 mov.w &0x0101,EXC_VOFF(%a6) # set FSLW 14594 bra.b facc_finish 14595 14596facc_in_d: 14597 movq.l &0x8,%d0 # eight bytes 14598 bsr.w restore # fix An 14599 14600 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW 14601 bra.b facc_finish 14602 14603facc_in_x: 14604 movq.l &0xc,%d0 # twelve bytes 14605 bsr.w restore # fix An 14606 14607 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW 14608 bra.b facc_finish 14609 14610################################################################ 14611 14612facc_out_b: 14613 movq.l &0x1,%d0 # one byte 14614 bsr.w restore # restore An 14615 14616 mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW 14617 bra.b facc_finish 14618 14619facc_out_w: 14620 movq.l &0x2,%d0 # two bytes 14621 bsr.w restore # restore An 14622 14623 mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW 14624 bra.b facc_finish 14625 14626facc_out_l: 14627 movq.l &0x4,%d0 # four bytes 14628 bsr.w restore # restore An 14629 14630 mov.w &0x0081,EXC_VOFF(%a6) # set FSLW 14631 bra.b facc_finish 14632 14633facc_out_d: 14634 movq.l &0x8,%d0 # eight bytes 14635 bsr.w restore # restore An 14636 14637 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW 14638 bra.b facc_finish 14639 14640facc_out_x: 14641 mov.l &0xc,%d0 # twelve bytes 14642 bsr.w restore # restore An 14643 14644 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW 14645 14646# here's where we actually create the access error frame from the 14647# current exception stack frame. 14648facc_finish: 14649 mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC 14650 14651 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 14652 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs 14653 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 14654 14655 unlk %a6 14656 14657 mov.l (%sp),-(%sp) # store SR, hi(PC) 14658 mov.l 0x8(%sp),0x4(%sp) # store lo(PC) 14659 mov.l 0xc(%sp),0x8(%sp) # store EA 14660 mov.l &0x00000001,0xc(%sp) # store FSLW 14661 mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size) 14662 mov.w &0x4008,0x6(%sp) # store voff 14663 14664 btst &0x5,(%sp) # supervisor or user mode? 14665 beq.b facc_out2 # user 14666 bset &0x2,0xd(%sp) # set supervisor TM bit 14667 14668facc_out2: 14669 bra.l _real_access 14670 14671################################################################## 14672 14673# if the effective addressing mode was predecrement or postincrement, 14674# the emulation has already changed its value to the correct post- 14675# instruction value. but since we're exiting to the access error 14676# handler, then AN must be returned to its pre-instruction value. 14677# we do that here. 14678restore: 14679 mov.b EXC_OPWORD+0x1(%a6),%d1 14680 andi.b &0x38,%d1 # extract opmode 14681 cmpi.b %d1,&0x18 # postinc? 14682 beq.w rest_inc 14683 cmpi.b %d1,&0x20 # predec? 14684 beq.w rest_dec 14685 rts 14686 14687rest_inc: 14688 mov.b EXC_OPWORD+0x1(%a6),%d1 14689 andi.w &0x0007,%d1 # fetch An 14690 14691 mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1 14692 jmp (tbl_rest_inc.b,%pc,%d1.w*1) 14693 14694tbl_rest_inc: 14695 short ri_a0 - tbl_rest_inc 14696 short ri_a1 - tbl_rest_inc 14697 short ri_a2 - tbl_rest_inc 14698 short ri_a3 - tbl_rest_inc 14699 short ri_a4 - tbl_rest_inc 14700 short ri_a5 - tbl_rest_inc 14701 short ri_a6 - tbl_rest_inc 14702 short ri_a7 - tbl_rest_inc 14703 14704ri_a0: 14705 sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0 14706 rts 14707ri_a1: 14708 sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1 14709 rts 14710ri_a2: 14711 sub.l %d0,%a2 # fix a2 14712 rts 14713ri_a3: 14714 sub.l %d0,%a3 # fix a3 14715 rts 14716ri_a4: 14717 sub.l %d0,%a4 # fix a4 14718 rts 14719ri_a5: 14720 sub.l %d0,%a5 # fix a5 14721 rts 14722ri_a6: 14723 sub.l %d0,(%a6) # fix stacked a6 14724 rts 14725# if it's a fmove out instruction, we don't have to fix a7 14726# because we hadn't changed it yet. if it's an opclass two 14727# instruction (data moved in) and the exception was in supervisor 14728# mode, then also also wasn't updated. if it was user mode, then 14729# restore the correct a7 which is in the USP currently. 14730ri_a7: 14731 cmpi.b EXC_VOFF(%a6),&0x30 # move in or out? 14732 bne.b ri_a7_done # out 14733 14734 btst &0x5,EXC_SR(%a6) # user or supervisor? 14735 bne.b ri_a7_done # supervisor 14736 movc %usp,%a0 # restore USP 14737 sub.l %d0,%a0 14738 movc %a0,%usp 14739ri_a7_done: 14740 rts 14741 14742# need to invert adjustment value if the <ea> was predec 14743rest_dec: 14744 neg.l %d0 14745 bra.b rest_inc