cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

pfpsp.S (462530B)


      1~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      2MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
      3M68000 Hi-Performance Microprocessor Division
      4M68060 Software Package
      5Production Release P1.00 -- October 10, 1994
      6
      7M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
      8
      9THE SOFTWARE is provided on an "AS IS" basis and without warranty.
     10To the maximum extent permitted by applicable law,
     11MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
     12INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
     13and any warranty against infringement with regard to the SOFTWARE
     14(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
     15
     16To the maximum extent permitted by applicable law,
     17IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
     18(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
     19BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
     20ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
     21Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
     22
     23You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
     24so long as this entire notice is retained without alteration in any modified and/or
     25redistributed versions, and that such modified versions are clearly identified as such.
     26No licenses are granted by implication, estoppel or otherwise under any patents
     27or trademarks of Motorola, Inc.
     28~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     29# freal.s:
     30#	This file is appended to the top of the 060FPSP package
     31# and contains the entry points into the package. The user, in
     32# effect, branches to one of the branch table entries located
     33# after _060FPSP_TABLE.
     34#	Also, subroutine stubs exist in this file (_fpsp_done for
     35# example) that are referenced by the FPSP package itself in order
     36# to call a given routine. The stub routine actually performs the
     37# callout. The FPSP code does a "bsr" to the stub routine. This
     38# extra layer of hierarchy adds a slight performance penalty but
     39# it makes the FPSP code easier to read and more mainatinable.
     40#
     41
     42set	_off_bsun,	0x00
     43set	_off_snan,	0x04
     44set	_off_operr,	0x08
     45set	_off_ovfl,	0x0c
     46set	_off_unfl,	0x10
     47set	_off_dz,	0x14
     48set	_off_inex,	0x18
     49set	_off_fline,	0x1c
     50set	_off_fpu_dis,	0x20
     51set	_off_trap,	0x24
     52set	_off_trace,	0x28
     53set	_off_access,	0x2c
     54set	_off_done,	0x30
     55
     56set	_off_imr,	0x40
     57set	_off_dmr,	0x44
     58set	_off_dmw,	0x48
     59set	_off_irw,	0x4c
     60set	_off_irl,	0x50
     61set	_off_drb,	0x54
     62set	_off_drw,	0x58
     63set	_off_drl,	0x5c
     64set	_off_dwb,	0x60
     65set	_off_dww,	0x64
     66set	_off_dwl,	0x68
     67
     68_060FPSP_TABLE:
     69
     70###############################################################
     71
     72# Here's the table of ENTRY POINTS for those linking the package.
     73	bra.l		_fpsp_snan
     74	short		0x0000
     75	bra.l		_fpsp_operr
     76	short		0x0000
     77	bra.l		_fpsp_ovfl
     78	short		0x0000
     79	bra.l		_fpsp_unfl
     80	short		0x0000
     81	bra.l		_fpsp_dz
     82	short		0x0000
     83	bra.l		_fpsp_inex
     84	short		0x0000
     85	bra.l		_fpsp_fline
     86	short		0x0000
     87	bra.l		_fpsp_unsupp
     88	short		0x0000
     89	bra.l		_fpsp_effadd
     90	short		0x0000
     91
     92	space		56
     93
     94###############################################################
     95	global		_fpsp_done
     96_fpsp_done:
     97	mov.l		%d0,-(%sp)
     98	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
     99	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    100	mov.l		0x4(%sp),%d0
    101	rtd		&0x4
    102
    103	global		_real_ovfl
    104_real_ovfl:
    105	mov.l		%d0,-(%sp)
    106	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
    107	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    108	mov.l		0x4(%sp),%d0
    109	rtd		&0x4
    110
    111	global		_real_unfl
    112_real_unfl:
    113	mov.l		%d0,-(%sp)
    114	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
    115	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    116	mov.l		0x4(%sp),%d0
    117	rtd		&0x4
    118
    119	global		_real_inex
    120_real_inex:
    121	mov.l		%d0,-(%sp)
    122	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
    123	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    124	mov.l		0x4(%sp),%d0
    125	rtd		&0x4
    126
    127	global		_real_bsun
    128_real_bsun:
    129	mov.l		%d0,-(%sp)
    130	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
    131	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    132	mov.l		0x4(%sp),%d0
    133	rtd		&0x4
    134
    135	global		_real_operr
    136_real_operr:
    137	mov.l		%d0,-(%sp)
    138	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
    139	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    140	mov.l		0x4(%sp),%d0
    141	rtd		&0x4
    142
    143	global		_real_snan
    144_real_snan:
    145	mov.l		%d0,-(%sp)
    146	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
    147	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    148	mov.l		0x4(%sp),%d0
    149	rtd		&0x4
    150
    151	global		_real_dz
    152_real_dz:
    153	mov.l		%d0,-(%sp)
    154	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
    155	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    156	mov.l		0x4(%sp),%d0
    157	rtd		&0x4
    158
    159	global		_real_fline
    160_real_fline:
    161	mov.l		%d0,-(%sp)
    162	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
    163	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    164	mov.l		0x4(%sp),%d0
    165	rtd		&0x4
    166
    167	global		_real_fpu_disabled
    168_real_fpu_disabled:
    169	mov.l		%d0,-(%sp)
    170	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
    171	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    172	mov.l		0x4(%sp),%d0
    173	rtd		&0x4
    174
    175	global		_real_trap
    176_real_trap:
    177	mov.l		%d0,-(%sp)
    178	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
    179	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    180	mov.l		0x4(%sp),%d0
    181	rtd		&0x4
    182
    183	global		_real_trace
    184_real_trace:
    185	mov.l		%d0,-(%sp)
    186	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
    187	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    188	mov.l		0x4(%sp),%d0
    189	rtd		&0x4
    190
    191	global		_real_access
    192_real_access:
    193	mov.l		%d0,-(%sp)
    194	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
    195	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    196	mov.l		0x4(%sp),%d0
    197	rtd		&0x4
    198
    199#######################################
    200
    201	global		_imem_read
    202_imem_read:
    203	mov.l		%d0,-(%sp)
    204	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
    205	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    206	mov.l		0x4(%sp),%d0
    207	rtd		&0x4
    208
    209	global		_dmem_read
    210_dmem_read:
    211	mov.l		%d0,-(%sp)
    212	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
    213	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    214	mov.l		0x4(%sp),%d0
    215	rtd		&0x4
    216
    217	global		_dmem_write
    218_dmem_write:
    219	mov.l		%d0,-(%sp)
    220	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
    221	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    222	mov.l		0x4(%sp),%d0
    223	rtd		&0x4
    224
    225	global		_imem_read_word
    226_imem_read_word:
    227	mov.l		%d0,-(%sp)
    228	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
    229	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    230	mov.l		0x4(%sp),%d0
    231	rtd		&0x4
    232
    233	global		_imem_read_long
    234_imem_read_long:
    235	mov.l		%d0,-(%sp)
    236	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
    237	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    238	mov.l		0x4(%sp),%d0
    239	rtd		&0x4
    240
    241	global		_dmem_read_byte
    242_dmem_read_byte:
    243	mov.l		%d0,-(%sp)
    244	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
    245	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    246	mov.l		0x4(%sp),%d0
    247	rtd		&0x4
    248
    249	global		_dmem_read_word
    250_dmem_read_word:
    251	mov.l		%d0,-(%sp)
    252	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
    253	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    254	mov.l		0x4(%sp),%d0
    255	rtd		&0x4
    256
    257	global		_dmem_read_long
    258_dmem_read_long:
    259	mov.l		%d0,-(%sp)
    260	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
    261	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    262	mov.l		0x4(%sp),%d0
    263	rtd		&0x4
    264
    265	global		_dmem_write_byte
    266_dmem_write_byte:
    267	mov.l		%d0,-(%sp)
    268	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
    269	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    270	mov.l		0x4(%sp),%d0
    271	rtd		&0x4
    272
    273	global		_dmem_write_word
    274_dmem_write_word:
    275	mov.l		%d0,-(%sp)
    276	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
    277	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    278	mov.l		0x4(%sp),%d0
    279	rtd		&0x4
    280
    281	global		_dmem_write_long
    282_dmem_write_long:
    283	mov.l		%d0,-(%sp)
    284	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
    285	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    286	mov.l		0x4(%sp),%d0
    287	rtd		&0x4
    288
    289#
    290# This file contains a set of define statements for constants
    291# in order to promote readability within the corecode itself.
    292#
    293
    294set LOCAL_SIZE,		192			# stack frame size(bytes)
    295set LV,			-LOCAL_SIZE		# stack offset
    296
    297set EXC_SR,		0x4			# stack status register
    298set EXC_PC,		0x6			# stack pc
    299set EXC_VOFF,		0xa			# stacked vector offset
    300set EXC_EA,		0xc			# stacked <ea>
    301
    302set EXC_FP,		0x0			# frame pointer
    303
    304set EXC_AREGS,		-68			# offset of all address regs
    305set EXC_DREGS,		-100			# offset of all data regs
    306set EXC_FPREGS,		-36			# offset of all fp regs
    307
    308set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
    309set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
    310set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
    311set EXC_A5,		EXC_AREGS+(5*4)
    312set EXC_A4,		EXC_AREGS+(4*4)
    313set EXC_A3,		EXC_AREGS+(3*4)
    314set EXC_A2,		EXC_AREGS+(2*4)
    315set EXC_A1,		EXC_AREGS+(1*4)
    316set EXC_A0,		EXC_AREGS+(0*4)
    317set EXC_D7,		EXC_DREGS+(7*4)
    318set EXC_D6,		EXC_DREGS+(6*4)
    319set EXC_D5,		EXC_DREGS+(5*4)
    320set EXC_D4,		EXC_DREGS+(4*4)
    321set EXC_D3,		EXC_DREGS+(3*4)
    322set EXC_D2,		EXC_DREGS+(2*4)
    323set EXC_D1,		EXC_DREGS+(1*4)
    324set EXC_D0,		EXC_DREGS+(0*4)
    325
    326set EXC_FP0,		EXC_FPREGS+(0*12)	# offset of saved fp0
    327set EXC_FP1,		EXC_FPREGS+(1*12)	# offset of saved fp1
    328set EXC_FP2,		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
    329
    330set FP_SCR1,		LV+80			# fp scratch 1
    331set FP_SCR1_EX,		FP_SCR1+0
    332set FP_SCR1_SGN,	FP_SCR1+2
    333set FP_SCR1_HI,		FP_SCR1+4
    334set FP_SCR1_LO,		FP_SCR1+8
    335
    336set FP_SCR0,		LV+68			# fp scratch 0
    337set FP_SCR0_EX,		FP_SCR0+0
    338set FP_SCR0_SGN,	FP_SCR0+2
    339set FP_SCR0_HI,		FP_SCR0+4
    340set FP_SCR0_LO,		FP_SCR0+8
    341
    342set FP_DST,		LV+56			# fp destination operand
    343set FP_DST_EX,		FP_DST+0
    344set FP_DST_SGN,		FP_DST+2
    345set FP_DST_HI,		FP_DST+4
    346set FP_DST_LO,		FP_DST+8
    347
    348set FP_SRC,		LV+44			# fp source operand
    349set FP_SRC_EX,		FP_SRC+0
    350set FP_SRC_SGN,		FP_SRC+2
    351set FP_SRC_HI,		FP_SRC+4
    352set FP_SRC_LO,		FP_SRC+8
    353
    354set USER_FPIAR,		LV+40			# FP instr address register
    355
    356set USER_FPSR,		LV+36			# FP status register
    357set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
    358set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
    359set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
    360set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
    361
    362set USER_FPCR,		LV+32			# FP control register
    363set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
    364set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
    365
    366set L_SCR3,		LV+28			# integer scratch 3
    367set L_SCR2,		LV+24			# integer scratch 2
    368set L_SCR1,		LV+20			# integer scratch 1
    369
    370set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
    371
    372set EXC_TEMP2,		LV+24			# temporary space
    373set EXC_TEMP,		LV+16			# temporary space
    374
    375set DTAG,		LV+15			# destination operand type
    376set STAG,		LV+14			# source operand type
    377
    378set SPCOND_FLG,		LV+10			# flag: special case (see below)
    379
    380set EXC_CC,		LV+8			# saved condition codes
    381set EXC_EXTWPTR,	LV+4			# saved current PC (active)
    382set EXC_EXTWORD,	LV+2			# saved extension word
    383set EXC_CMDREG,		LV+2			# saved extension word
    384set EXC_OPWORD,		LV+0			# saved operation word
    385
    386################################
    387
    388# Helpful macros
    389
    390set FTEMP,		0			# offsets within an
    391set FTEMP_EX,		0			# extended precision
    392set FTEMP_SGN,		2			# value saved in memory.
    393set FTEMP_HI,		4
    394set FTEMP_LO,		8
    395set FTEMP_GRS,		12
    396
    397set LOCAL,		0			# offsets within an
    398set LOCAL_EX,		0			# extended precision
    399set LOCAL_SGN,		2			# value saved in memory.
    400set LOCAL_HI,		4
    401set LOCAL_LO,		8
    402set LOCAL_GRS,		12
    403
    404set DST,		0			# offsets within an
    405set DST_EX,		0			# extended precision
    406set DST_HI,		4			# value saved in memory.
    407set DST_LO,		8
    408
    409set SRC,		0			# offsets within an
    410set SRC_EX,		0			# extended precision
    411set SRC_HI,		4			# value saved in memory.
    412set SRC_LO,		8
    413
    414set SGL_LO,		0x3f81			# min sgl prec exponent
    415set SGL_HI,		0x407e			# max sgl prec exponent
    416set DBL_LO,		0x3c01			# min dbl prec exponent
    417set DBL_HI,		0x43fe			# max dbl prec exponent
    418set EXT_LO,		0x0			# min ext prec exponent
    419set EXT_HI,		0x7ffe			# max ext prec exponent
    420
    421set EXT_BIAS,		0x3fff			# extended precision bias
    422set SGL_BIAS,		0x007f			# single precision bias
    423set DBL_BIAS,		0x03ff			# double precision bias
    424
    425set NORM,		0x00			# operand type for STAG/DTAG
    426set ZERO,		0x01			# operand type for STAG/DTAG
    427set INF,		0x02			# operand type for STAG/DTAG
    428set QNAN,		0x03			# operand type for STAG/DTAG
    429set DENORM,		0x04			# operand type for STAG/DTAG
    430set SNAN,		0x05			# operand type for STAG/DTAG
    431set UNNORM,		0x06			# operand type for STAG/DTAG
    432
    433##################
    434# FPSR/FPCR bits #
    435##################
    436set neg_bit,		0x3			# negative result
    437set z_bit,		0x2			# zero result
    438set inf_bit,		0x1			# infinite result
    439set nan_bit,		0x0			# NAN result
    440
    441set q_sn_bit,		0x7			# sign bit of quotient byte
    442
    443set bsun_bit,		7			# branch on unordered
    444set snan_bit,		6			# signalling NAN
    445set operr_bit,		5			# operand error
    446set ovfl_bit,		4			# overflow
    447set unfl_bit,		3			# underflow
    448set dz_bit,		2			# divide by zero
    449set inex2_bit,		1			# inexact result 2
    450set inex1_bit,		0			# inexact result 1
    451
    452set aiop_bit,		7			# accrued inexact operation bit
    453set aovfl_bit,		6			# accrued overflow bit
    454set aunfl_bit,		5			# accrued underflow bit
    455set adz_bit,		4			# accrued dz bit
    456set ainex_bit,		3			# accrued inexact bit
    457
    458#############################
    459# FPSR individual bit masks #
    460#############################
    461set neg_mask,		0x08000000		# negative bit mask (lw)
    462set inf_mask,		0x02000000		# infinity bit mask (lw)
    463set z_mask,		0x04000000		# zero bit mask (lw)
    464set nan_mask,		0x01000000		# nan bit mask (lw)
    465
    466set neg_bmask,		0x08			# negative bit mask (byte)
    467set inf_bmask,		0x02			# infinity bit mask (byte)
    468set z_bmask,		0x04			# zero bit mask (byte)
    469set nan_bmask,		0x01			# nan bit mask (byte)
    470
    471set bsun_mask,		0x00008000		# bsun exception mask
    472set snan_mask,		0x00004000		# snan exception mask
    473set operr_mask,		0x00002000		# operr exception mask
    474set ovfl_mask,		0x00001000		# overflow exception mask
    475set unfl_mask,		0x00000800		# underflow exception mask
    476set dz_mask,		0x00000400		# dz exception mask
    477set inex2_mask,		0x00000200		# inex2 exception mask
    478set inex1_mask,		0x00000100		# inex1 exception mask
    479
    480set aiop_mask,		0x00000080		# accrued illegal operation
    481set aovfl_mask,		0x00000040		# accrued overflow
    482set aunfl_mask,		0x00000020		# accrued underflow
    483set adz_mask,		0x00000010		# accrued divide by zero
    484set ainex_mask,		0x00000008		# accrued inexact
    485
    486######################################
    487# FPSR combinations used in the FPSP #
    488######################################
    489set dzinf_mask,		inf_mask+dz_mask+adz_mask
    490set opnan_mask,		nan_mask+operr_mask+aiop_mask
    491set nzi_mask,		0x01ffffff		#clears N, Z, and I
    492set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
    493set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
    494set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
    495set inx1a_mask,		inex1_mask+ainex_mask
    496set inx2a_mask,		inex2_mask+ainex_mask
    497set snaniop_mask,	nan_mask+snan_mask+aiop_mask
    498set snaniop2_mask,	snan_mask+aiop_mask
    499set naniop_mask,	nan_mask+aiop_mask
    500set neginf_mask,	neg_mask+inf_mask
    501set infaiop_mask,	inf_mask+aiop_mask
    502set negz_mask,		neg_mask+z_mask
    503set opaop_mask,		operr_mask+aiop_mask
    504set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
    505set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
    506
    507#########
    508# misc. #
    509#########
    510set rnd_stky_bit,	29			# stky bit pos in longword
    511
    512set sign_bit,		0x7			# sign bit
    513set signan_bit,		0x6			# signalling nan bit
    514
    515set sgl_thresh,		0x3f81			# minimum sgl exponent
    516set dbl_thresh,		0x3c01			# minimum dbl exponent
    517
    518set x_mode,		0x0			# extended precision
    519set s_mode,		0x4			# single precision
    520set d_mode,		0x8			# double precision
    521
    522set rn_mode,		0x0			# round-to-nearest
    523set rz_mode,		0x1			# round-to-zero
    524set rm_mode,		0x2			# round-tp-minus-infinity
    525set rp_mode,		0x3			# round-to-plus-infinity
    526
    527set mantissalen,	64			# length of mantissa in bits
    528
    529set BYTE,		1			# len(byte) == 1 byte
    530set WORD,		2			# len(word) == 2 bytes
    531set LONG,		4			# len(longword) == 2 bytes
    532
    533set BSUN_VEC,		0xc0			# bsun    vector offset
    534set INEX_VEC,		0xc4			# inexact vector offset
    535set DZ_VEC,		0xc8			# dz      vector offset
    536set UNFL_VEC,		0xcc			# unfl    vector offset
    537set OPERR_VEC,		0xd0			# operr   vector offset
    538set OVFL_VEC,		0xd4			# ovfl    vector offset
    539set SNAN_VEC,		0xd8			# snan    vector offset
    540
    541###########################
    542# SPecial CONDition FLaGs #
    543###########################
    544set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
    545set fbsun_flg,		0x02			# flag bit: bsun exception
    546set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
    547set mda7_flg,		0x08			# flag bit: -(a7) <ea>
    548set fmovm_flg,		0x40			# flag bit: fmovm instruction
    549set immed_flg,		0x80			# flag bit: &<data> <ea>
    550
    551set ftrapcc_bit,	0x0
    552set fbsun_bit,		0x1
    553set mia7_bit,		0x2
    554set mda7_bit,		0x3
    555set immed_bit,		0x7
    556
    557##################################
    558# TRANSCENDENTAL "LAST-OP" FLAGS #
    559##################################
    560set FMUL_OP,		0x0			# fmul instr performed last
    561set FDIV_OP,		0x1			# fdiv performed last
    562set FADD_OP,		0x2			# fadd performed last
    563set FMOV_OP,		0x3			# fmov performed last
    564
    565#############
    566# CONSTANTS #
    567#############
    568T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
    569T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
    570
    571PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
    572PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
    573
    574TWOBYPI:
    575	long		0x3FE45F30,0x6DC9C883
    576
    577#########################################################################
    578# XDEF ****************************************************************	#
    579#	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
    580#									#
    581#	This handler should be the first code executed upon taking the	#
    582#	FP Overflow exception in an operating system.			#
    583#									#
    584# XREF ****************************************************************	#
    585#	_imem_read_long() - read instruction longword			#
    586#	fix_skewed_ops() - adjust src operand in fsave frame		#
    587#	set_tag_x() - determine optype of src/dst operands		#
    588#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
    589#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
    590#	load_fpn2() - load dst operand from FP regfile			#
    591#	fout() - emulate an opclass 3 instruction			#
    592#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
    593#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
    594#	_real_ovfl() - "callout" for Overflow exception enabled code	#
    595#	_real_inex() - "callout" for Inexact exception enabled code	#
    596#	_real_trace() - "callout" for Trace exception code		#
    597#									#
    598# INPUT ***************************************************************	#
    599#	- The system stack contains the FP Ovfl exception stack frame	#
    600#	- The fsave frame contains the source operand			#
    601#									#
    602# OUTPUT **************************************************************	#
    603#	Overflow Exception enabled:					#
    604#	- The system stack is unchanged					#
    605#	- The fsave frame contains the adjusted src op for opclass 0,2	#
    606#	Overflow Exception disabled:					#
    607#	- The system stack is unchanged					#
    608#	- The "exception present" flag in the fsave frame is cleared	#
    609#									#
    610# ALGORITHM ***********************************************************	#
    611#	On the 060, if an FP overflow is present as the result of any	#
    612# instruction, the 060 will take an overflow exception whether the	#
    613# exception is enabled or disabled in the FPCR. For the disabled case,	#
    614# This handler emulates the instruction to determine what the correct	#
    615# default result should be for the operation. This default result is	#
    616# then stored in either the FP regfile, data regfile, or memory.	#
    617# Finally, the handler exits through the "callout" _fpsp_done()		#
    618# denoting that no exceptional conditions exist within the machine.	#
    619#	If the exception is enabled, then this handler must create the	#
    620# exceptional operand and plave it in the fsave state frame, and store	#
    621# the default result (only if the instruction is opclass 3). For	#
    622# exceptions enabled, this handler must exit through the "callout"	#
    623# _real_ovfl() so that the operating system enabled overflow handler	#
    624# can handle this case.							#
    625#	Two other conditions exist. First, if overflow was disabled	#
    626# but the inexact exception was enabled, this handler must exit		#
    627# through the "callout" _real_inex() regardless of whether the result	#
    628# was inexact.								#
    629#	Also, in the case of an opclass three instruction where		#
    630# overflow was disabled and the trace exception was enabled, this	#
    631# handler must exit through the "callout" _real_trace().		#
    632#									#
    633#########################################################################
    634
    635	global		_fpsp_ovfl
    636_fpsp_ovfl:
    637
    638#$#	sub.l		&24,%sp			# make room for src/dst
    639
    640	link.w		%a6,&-LOCAL_SIZE	# init stack frame
    641
    642	fsave		FP_SRC(%a6)		# grab the "busy" frame
    643
    644	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
    645	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
    646	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
    647
    648# the FPIAR holds the "current PC" of the faulting instruction
    649	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
    650	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
    651	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
    652	bsr.l		_imem_read_long		# fetch the instruction words
    653	mov.l		%d0,EXC_OPWORD(%a6)
    654
    655##############################################################################
    656
    657	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
    658	bne.w		fovfl_out
    659
    660
    661	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    662	bsr.l		fix_skewed_ops		# fix src op
    663
    664# since, I believe, only NORMs and DENORMs can come through here,
    665# maybe we can avoid the subroutine call.
    666	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    667	bsr.l		set_tag_x		# tag the operand type
    668	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
    669
    670# bit five of the fp extension word separates the monadic and dyadic operations
    671# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
    672# will never take this exception.
    673	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
    674	beq.b		fovfl_extract		# monadic
    675
    676	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
    677	bsr.l		load_fpn2		# load dst into FP_DST
    678
    679	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
    680	bsr.l		set_tag_x		# tag the operand type
    681	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
    682	bne.b		fovfl_op2_done		# no
    683	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
    684fovfl_op2_done:
    685	mov.b		%d0,DTAG(%a6)		# save dst optype tag
    686
    687fovfl_extract:
    688
    689#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
    690#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
    691#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
    692#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
    693#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
    694#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
    695
    696	clr.l		%d0
    697	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
    698
    699	mov.b		1+EXC_CMDREG(%a6),%d1
    700	andi.w		&0x007f,%d1		# extract extension
    701
    702	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
    703
    704	fmov.l		&0x0,%fpcr		# zero current control regs
    705	fmov.l		&0x0,%fpsr
    706
    707	lea		FP_SRC(%a6),%a0
    708	lea		FP_DST(%a6),%a1
    709
    710# maybe we can make these entry points ONLY the OVFL entry points of each routine.
    711	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
    712	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
    713
    714# the operation has been emulated. the result is in fp0.
    715# the EXOP, if an exception occurred, is in fp1.
    716# we must save the default result regardless of whether
    717# traps are enabled or disabled.
    718	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
    719	bsr.l		store_fpreg
    720
    721# the exceptional possibilities we have left ourselves with are ONLY overflow
    722# and inexact. and, the inexact is such that overflow occurred and was disabled
    723# but inexact was enabled.
    724	btst		&ovfl_bit,FPCR_ENABLE(%a6)
    725	bne.b		fovfl_ovfl_on
    726
    727	btst		&inex2_bit,FPCR_ENABLE(%a6)
    728	bne.b		fovfl_inex_on
    729
    730	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    731	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    732	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    733
    734	unlk		%a6
    735#$#	add.l		&24,%sp
    736	bra.l		_fpsp_done
    737
    738# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
    739# in fp1. now, simply jump to _real_ovfl()!
    740fovfl_ovfl_on:
    741	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
    742
    743	mov.w		&0xe005,2+FP_SRC(%a6)	# save exc status
    744
    745	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    746	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    747	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    748
    749	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
    750
    751	unlk		%a6
    752
    753	bra.l		_real_ovfl
    754
    755# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
    756# we must jump to real_inex().
    757fovfl_inex_on:
    758
    759	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
    760
    761	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
    762	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
    763
    764	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    765	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    766	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    767
    768	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
    769
    770	unlk		%a6
    771
    772	bra.l		_real_inex
    773
    774########################################################################
    775fovfl_out:
    776
    777
    778#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
    779#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
    780#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
    781
    782# the src operand is definitely a NORM(!), so tag it as such
    783	mov.b		&NORM,STAG(%a6)		# set src optype tag
    784
    785	clr.l		%d0
    786	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
    787
    788	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
    789
    790	fmov.l		&0x0,%fpcr		# zero current control regs
    791	fmov.l		&0x0,%fpsr
    792
    793	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
    794
    795	bsr.l		fout
    796
    797	btst		&ovfl_bit,FPCR_ENABLE(%a6)
    798	bne.w		fovfl_ovfl_on
    799
    800	btst		&inex2_bit,FPCR_ENABLE(%a6)
    801	bne.w		fovfl_inex_on
    802
    803	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    804	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    805	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    806
    807	unlk		%a6
    808#$#	add.l		&24,%sp
    809
    810	btst		&0x7,(%sp)		# is trace on?
    811	beq.l		_fpsp_done		# no
    812
    813	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
    814	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
    815	bra.l		_real_trace
    816
    817#########################################################################
    818# XDEF ****************************************************************	#
    819#	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
    820#									#
    821#	This handler should be the first code executed upon taking the	#
    822#	FP Underflow exception in an operating system.			#
    823#									#
    824# XREF ****************************************************************	#
    825#	_imem_read_long() - read instruction longword			#
    826#	fix_skewed_ops() - adjust src operand in fsave frame		#
    827#	set_tag_x() - determine optype of src/dst operands		#
    828#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
    829#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
    830#	load_fpn2() - load dst operand from FP regfile			#
    831#	fout() - emulate an opclass 3 instruction			#
    832#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
    833#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
    834#	_real_ovfl() - "callout" for Overflow exception enabled code	#
    835#	_real_inex() - "callout" for Inexact exception enabled code	#
    836#	_real_trace() - "callout" for Trace exception code		#
    837#									#
    838# INPUT ***************************************************************	#
    839#	- The system stack contains the FP Unfl exception stack frame	#
    840#	- The fsave frame contains the source operand			#
    841#									#
    842# OUTPUT **************************************************************	#
    843#	Underflow Exception enabled:					#
    844#	- The system stack is unchanged					#
    845#	- The fsave frame contains the adjusted src op for opclass 0,2	#
    846#	Underflow Exception disabled:					#
    847#	- The system stack is unchanged					#
    848#	- The "exception present" flag in the fsave frame is cleared	#
    849#									#
    850# ALGORITHM ***********************************************************	#
    851#	On the 060, if an FP underflow is present as the result of any	#
    852# instruction, the 060 will take an underflow exception whether the	#
    853# exception is enabled or disabled in the FPCR. For the disabled case,	#
    854# This handler emulates the instruction to determine what the correct	#
    855# default result should be for the operation. This default result is	#
    856# then stored in either the FP regfile, data regfile, or memory.	#
    857# Finally, the handler exits through the "callout" _fpsp_done()		#
    858# denoting that no exceptional conditions exist within the machine.	#
    859#	If the exception is enabled, then this handler must create the	#
    860# exceptional operand and plave it in the fsave state frame, and store	#
    861# the default result (only if the instruction is opclass 3). For	#
    862# exceptions enabled, this handler must exit through the "callout"	#
    863# _real_unfl() so that the operating system enabled overflow handler	#
    864# can handle this case.							#
    865#	Two other conditions exist. First, if underflow was disabled	#
    866# but the inexact exception was enabled and the result was inexact,	#
    867# this handler must exit through the "callout" _real_inex().		#
    868# was inexact.								#
    869#	Also, in the case of an opclass three instruction where		#
    870# underflow was disabled and the trace exception was enabled, this	#
    871# handler must exit through the "callout" _real_trace().		#
    872#									#
    873#########################################################################
    874
    875	global		_fpsp_unfl
    876_fpsp_unfl:
    877
    878#$#	sub.l		&24,%sp			# make room for src/dst
    879
    880	link.w		%a6,&-LOCAL_SIZE	# init stack frame
    881
    882	fsave		FP_SRC(%a6)		# grab the "busy" frame
    883
    884	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
    885	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
    886	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
    887
    888# the FPIAR holds the "current PC" of the faulting instruction
    889	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
    890	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
    891	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
    892	bsr.l		_imem_read_long		# fetch the instruction words
    893	mov.l		%d0,EXC_OPWORD(%a6)
    894
    895##############################################################################
    896
    897	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
    898	bne.w		funfl_out
    899
    900
    901	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    902	bsr.l		fix_skewed_ops		# fix src op
    903
    904	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    905	bsr.l		set_tag_x		# tag the operand type
    906	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
    907
    908# bit five of the fp ext word separates the monadic and dyadic operations
    909# that can pass through fpsp_unfl(). remember that fcmp, and ftst
    910# will never take this exception.
    911	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
    912	beq.b		funfl_extract		# monadic
    913
    914# now, what's left that's not dyadic is fsincos. we can distinguish it
    915# from all dyadics by the '0110xxx pattern
    916	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
    917	bne.b		funfl_extract		# yes
    918
    919	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
    920	bsr.l		load_fpn2		# load dst into FP_DST
    921
    922	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
    923	bsr.l		set_tag_x		# tag the operand type
    924	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
    925	bne.b		funfl_op2_done		# no
    926	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
    927funfl_op2_done:
    928	mov.b		%d0,DTAG(%a6)		# save dst optype tag
    929
    930funfl_extract:
    931
    932#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
    933#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
    934#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
    935#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
    936#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
    937#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
    938
    939	clr.l		%d0
    940	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
    941
    942	mov.b		1+EXC_CMDREG(%a6),%d1
    943	andi.w		&0x007f,%d1		# extract extension
    944
    945	andi.l		&0x00ff01ff,USER_FPSR(%a6)
    946
    947	fmov.l		&0x0,%fpcr		# zero current control regs
    948	fmov.l		&0x0,%fpsr
    949
    950	lea		FP_SRC(%a6),%a0
    951	lea		FP_DST(%a6),%a1
    952
    953# maybe we can make these entry points ONLY the OVFL entry points of each routine.
    954	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
    955	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
    956
    957	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
    958	bsr.l		store_fpreg
    959
    960# The `060 FPU multiplier hardware is such that if the result of a
    961# multiply operation is the smallest possible normalized number
    962# (0x00000000_80000000_00000000), then the machine will take an
    963# underflow exception. Since this is incorrect, we need to check
    964# if our emulation, after re-doing the operation, decided that
    965# no underflow was called for. We do these checks only in
    966# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
    967# special case will simply exit gracefully with the correct result.
    968
    969# the exceptional possibilities we have left ourselves with are ONLY overflow
    970# and inexact. and, the inexact is such that overflow occurred and was disabled
    971# but inexact was enabled.
    972	btst		&unfl_bit,FPCR_ENABLE(%a6)
    973	bne.b		funfl_unfl_on
    974
    975funfl_chkinex:
    976	btst		&inex2_bit,FPCR_ENABLE(%a6)
    977	bne.b		funfl_inex_on
    978
    979funfl_exit:
    980	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    981	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    982	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    983
    984	unlk		%a6
    985#$#	add.l		&24,%sp
    986	bra.l		_fpsp_done
    987
    988# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
    989# in fp1 (don't forget to save fp0). what to do now?
    990# well, we simply have to get to go to _real_unfl()!
    991funfl_unfl_on:
    992
    993# The `060 FPU multiplier hardware is such that if the result of a
    994# multiply operation is the smallest possible normalized number
    995# (0x00000000_80000000_00000000), then the machine will take an
    996# underflow exception. Since this is incorrect, we check here to see
    997# if our emulation, after re-doing the operation, decided that
    998# no underflow was called for.
    999	btst		&unfl_bit,FPSR_EXCEPT(%a6)
   1000	beq.w		funfl_chkinex
   1001
   1002funfl_unfl_on2:
   1003	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
   1004
   1005	mov.w		&0xe003,2+FP_SRC(%a6)	# save exc status
   1006
   1007	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   1008	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1009	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1010
   1011	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
   1012
   1013	unlk		%a6
   1014
   1015	bra.l		_real_unfl
   1016
   1017# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
   1018# we must jump to real_inex().
   1019funfl_inex_on:
   1020
   1021# The `060 FPU multiplier hardware is such that if the result of a
   1022# multiply operation is the smallest possible normalized number
   1023# (0x00000000_80000000_00000000), then the machine will take an
   1024# underflow exception.
   1025# But, whether bogus or not, if inexact is enabled AND it occurred,
   1026# then we have to branch to real_inex.
   1027
   1028	btst		&inex2_bit,FPSR_EXCEPT(%a6)
   1029	beq.w		funfl_exit
   1030
   1031funfl_inex_on2:
   1032
   1033	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to stack
   1034
   1035	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
   1036	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
   1037
   1038	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   1039	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1040	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1041
   1042	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
   1043
   1044	unlk		%a6
   1045
   1046	bra.l		_real_inex
   1047
   1048#######################################################################
   1049funfl_out:
   1050
   1051
   1052#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
   1053#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
   1054#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
   1055
   1056# the src operand is definitely a NORM(!), so tag it as such
   1057	mov.b		&NORM,STAG(%a6)		# set src optype tag
   1058
   1059	clr.l		%d0
   1060	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
   1061
   1062	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
   1063
   1064	fmov.l		&0x0,%fpcr		# zero current control regs
   1065	fmov.l		&0x0,%fpsr
   1066
   1067	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   1068
   1069	bsr.l		fout
   1070
   1071	btst		&unfl_bit,FPCR_ENABLE(%a6)
   1072	bne.w		funfl_unfl_on2
   1073
   1074	btst		&inex2_bit,FPCR_ENABLE(%a6)
   1075	bne.w		funfl_inex_on2
   1076
   1077	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   1078	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1079	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1080
   1081	unlk		%a6
   1082#$#	add.l		&24,%sp
   1083
   1084	btst		&0x7,(%sp)		# is trace on?
   1085	beq.l		_fpsp_done		# no
   1086
   1087	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
   1088	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
   1089	bra.l		_real_trace
   1090
   1091#########################################################################
   1092# XDEF ****************************************************************	#
   1093#	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
   1094#		        Data Type" exception.				#
   1095#									#
   1096#	This handler should be the first code executed upon taking the	#
   1097#	FP Unimplemented Data Type exception in an operating system.	#
   1098#									#
   1099# XREF ****************************************************************	#
   1100#	_imem_read_{word,long}() - read instruction word/longword	#
   1101#	fix_skewed_ops() - adjust src operand in fsave frame		#
   1102#	set_tag_x() - determine optype of src/dst operands		#
   1103#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
   1104#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
   1105#	load_fpn2() - load dst operand from FP regfile			#
   1106#	load_fpn1() - load src operand from FP regfile			#
   1107#	fout() - emulate an opclass 3 instruction			#
   1108#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
   1109#	_real_inex() - "callout" to operating system inexact handler	#
   1110#	_fpsp_done() - "callout" for exit; work all done		#
   1111#	_real_trace() - "callout" for Trace enabled exception		#
   1112#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
   1113#	_real_snan() - "callout" for SNAN exception			#
   1114#	_real_operr() - "callout" for OPERR exception			#
   1115#	_real_ovfl() - "callout" for OVFL exception			#
   1116#	_real_unfl() - "callout" for UNFL exception			#
   1117#	get_packed() - fetch packed operand from memory			#
   1118#									#
   1119# INPUT ***************************************************************	#
   1120#	- The system stack contains the "Unimp Data Type" stk frame	#
   1121#	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
   1122#									#
   1123# OUTPUT **************************************************************	#
   1124#	If Inexact exception (opclass 3):				#
   1125#	- The system stack is changed to an Inexact exception stk frame	#
   1126#	If SNAN exception (opclass 3):					#
   1127#	- The system stack is changed to an SNAN exception stk frame	#
   1128#	If OPERR exception (opclass 3):					#
   1129#	- The system stack is changed to an OPERR exception stk frame	#
   1130#	If OVFL exception (opclass 3):					#
   1131#	- The system stack is changed to an OVFL exception stk frame	#
   1132#	If UNFL exception (opclass 3):					#
   1133#	- The system stack is changed to an UNFL exception stack frame	#
   1134#	If Trace exception enabled:					#
   1135#	- The system stack is changed to a Trace exception stack frame	#
   1136#	Else: (normal case)						#
   1137#	- Correct result has been stored as appropriate			#
   1138#									#
   1139# ALGORITHM ***********************************************************	#
   1140#	Two main instruction types can enter here: (1) DENORM or UNNORM	#
   1141# unimplemented data types. These can be either opclass 0,2 or 3	#
   1142# instructions, and (2) PACKED unimplemented data format instructions	#
   1143# also of opclasses 0,2, or 3.						#
   1144#	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
   1145# operand from the fsave state frame and the dst operand (if dyadic)	#
   1146# from the FP register file. The instruction is then emulated by	#
   1147# choosing an emulation routine from a table of routines indexed by	#
   1148# instruction type. Once the instruction has been emulated and result	#
   1149# saved, then we check to see if any enabled exceptions resulted from	#
   1150# instruction emulation. If none, then we exit through the "callout"	#
   1151# _fpsp_done(). If there is an enabled FP exception, then we insert	#
   1152# this exception into the FPU in the fsave state frame and then exit	#
   1153# through _fpsp_done().							#
   1154#	PACKED opclass 0 and 2 is similar in how the instruction is	#
   1155# emulated and exceptions handled. The differences occur in how the	#
   1156# handler loads the packed op (by calling get_packed() routine) and	#
   1157# by the fact that a Trace exception could be pending for PACKED ops.	#
   1158# If a Trace exception is pending, then the current exception stack	#
   1159# frame is changed to a Trace exception stack frame and an exit is	#
   1160# made through _real_trace().						#
   1161#	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
   1162# performed by calling the routine fout(). If no exception should occur	#
   1163# as the result of emulation, then an exit either occurs through	#
   1164# _fpsp_done() or through _real_trace() if a Trace exception is pending	#
   1165# (a Trace stack frame must be created here, too). If an FP exception	#
   1166# should occur, then we must create an exception stack frame of that	#
   1167# type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
   1168# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3	#
   1169# emulation is performed in a similar manner.				#
   1170#									#
   1171#########################################################################
   1172
   1173#
   1174# (1) DENORM and UNNORM (unimplemented) data types:
   1175#
   1176#				post-instruction
   1177#				*****************
   1178#				*      EA	*
   1179#	 pre-instruction	*		*
   1180#	*****************	*****************
   1181#	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
   1182#	*****************	*****************
   1183#	*     Next	*	*     Next	*
   1184#	*      PC	*	*      PC	*
   1185#	*****************	*****************
   1186#	*      SR	*	*      SR	*
   1187#	*****************	*****************
   1188#
   1189# (2) PACKED format (unsupported) opclasses two and three:
   1190#	*****************
   1191#	*      EA	*
   1192#	*		*
   1193#	*****************
   1194#	* 0x2 *  0x0dc	*
   1195#	*****************
   1196#	*     Next	*
   1197#	*      PC	*
   1198#	*****************
   1199#	*      SR	*
   1200#	*****************
   1201#
   1202	global		_fpsp_unsupp
   1203_fpsp_unsupp:
   1204
   1205	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   1206
   1207	fsave		FP_SRC(%a6)		# save fp state
   1208
   1209	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   1210	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   1211	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   1212
   1213	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
   1214	bne.b		fu_s
   1215fu_u:
   1216	mov.l		%usp,%a0		# fetch user stack pointer
   1217	mov.l		%a0,EXC_A7(%a6)		# save on stack
   1218	bra.b		fu_cont
   1219# if the exception is an opclass zero or two unimplemented data type
   1220# exception, then the a7' calculated here is wrong since it doesn't
   1221# stack an ea. however, we don't need an a7' for this case anyways.
   1222fu_s:
   1223	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
   1224	mov.l		%a0,EXC_A7(%a6)		# save on stack
   1225
   1226fu_cont:
   1227
   1228# the FPIAR holds the "current PC" of the faulting instruction
   1229# the FPIAR should be set correctly for ALL exceptions passing through
   1230# this point.
   1231	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   1232	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   1233	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   1234	bsr.l		_imem_read_long		# fetch the instruction words
   1235	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
   1236
   1237############################
   1238
   1239	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
   1240
   1241# Separate opclass three (fpn-to-mem) ops since they have a different
   1242# stack frame and protocol.
   1243	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
   1244	bne.w		fu_out			# yes
   1245
   1246# Separate packed opclass two instructions.
   1247	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
   1248	cmpi.b		%d0,&0x13
   1249	beq.w		fu_in_pack
   1250
   1251
   1252# I'm not sure at this point what FPSR bits are valid for this instruction.
   1253# so, since the emulation routines re-create them anyways, zero exception field
   1254	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
   1255
   1256	fmov.l		&0x0,%fpcr		# zero current control regs
   1257	fmov.l		&0x0,%fpsr
   1258
   1259# Opclass two w/ memory-to-fpn operation will have an incorrect extended
   1260# precision format if the src format was single or double and the
   1261# source data type was an INF, NAN, DENORM, or UNNORM
   1262	lea		FP_SRC(%a6),%a0		# pass ptr to input
   1263	bsr.l		fix_skewed_ops
   1264
   1265# we don't know whether the src operand or the dst operand (or both) is the
   1266# UNNORM or DENORM. call the function that tags the operand type. if the
   1267# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
   1268	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   1269	bsr.l		set_tag_x		# tag the operand type
   1270	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   1271	bne.b		fu_op2			# no
   1272	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   1273
   1274fu_op2:
   1275	mov.b		%d0,STAG(%a6)		# save src optype tag
   1276
   1277	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1278
   1279# bit five of the fp extension word separates the monadic and dyadic operations
   1280# at this point
   1281	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   1282	beq.b		fu_extract		# monadic
   1283	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
   1284	beq.b		fu_extract		# yes, so it's monadic, too
   1285
   1286	bsr.l		load_fpn2		# load dst into FP_DST
   1287
   1288	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   1289	bsr.l		set_tag_x		# tag the operand type
   1290	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   1291	bne.b		fu_op2_done		# no
   1292	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   1293fu_op2_done:
   1294	mov.b		%d0,DTAG(%a6)		# save dst optype tag
   1295
   1296fu_extract:
   1297	clr.l		%d0
   1298	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   1299
   1300	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
   1301
   1302	lea		FP_SRC(%a6),%a0
   1303	lea		FP_DST(%a6),%a1
   1304
   1305	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
   1306	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   1307
   1308#
   1309# Exceptions in order of precedence:
   1310#	BSUN	: none
   1311#	SNAN	: all dyadic ops
   1312#	OPERR	: fsqrt(-NORM)
   1313#	OVFL	: all except ftst,fcmp
   1314#	UNFL	: all except ftst,fcmp
   1315#	DZ	: fdiv
   1316#	INEX2	: all except ftst,fcmp
   1317#	INEX1	: none (packed doesn't go through here)
   1318#
   1319
   1320# we determine the highest priority exception(if any) set by the
   1321# emulation routine that has also been enabled by the user.
   1322	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
   1323	bne.b		fu_in_ena		# some are enabled
   1324
   1325fu_in_cont:
   1326# fcmp and ftst do not store any result.
   1327	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
   1328	andi.b		&0x38,%d0		# extract bits 3-5
   1329	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
   1330	beq.b		fu_in_exit		# yes
   1331
   1332	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1333	bsr.l		store_fpreg		# store the result
   1334
   1335fu_in_exit:
   1336
   1337	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1338	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1339	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1340
   1341	unlk		%a6
   1342
   1343	bra.l		_fpsp_done
   1344
   1345fu_in_ena:
   1346	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
   1347	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   1348	bne.b		fu_in_exc		# there is at least one set
   1349
   1350#
   1351# No exceptions occurred that were also enabled. Now:
   1352#
   1353#	if (OVFL && ovfl_disabled && inexact_enabled) {
   1354#	    branch to _real_inex() (even if the result was exact!);
   1355#	} else {
   1356#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
   1357#	    return;
   1358#	}
   1359#
   1360	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
   1361	beq.b		fu_in_cont		# no
   1362
   1363fu_in_ovflchk:
   1364	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
   1365	beq.b		fu_in_cont		# no
   1366	bra.w		fu_in_exc_ovfl		# go insert overflow frame
   1367
   1368#
   1369# An exception occurred and that exception was enabled:
   1370#
   1371#	shift enabled exception field into lo byte of d0;
   1372#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
   1373#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
   1374#		/*
   1375#		 * this is the case where we must call _real_inex() now or else
   1376#		 * there will be no other way to pass it the exceptional operand
   1377#		 */
   1378#		call _real_inex();
   1379#	} else {
   1380#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
   1381#	}
   1382#
   1383fu_in_exc:
   1384	subi.l		&24,%d0			# fix offset to be 0-8
   1385	cmpi.b		%d0,&0x6		# is exception INEX? (6)
   1386	bne.b		fu_in_exc_exit		# no
   1387
   1388# the enabled exception was inexact
   1389	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
   1390	bne.w		fu_in_exc_unfl		# yes
   1391	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
   1392	bne.w		fu_in_exc_ovfl		# yes
   1393
   1394# here, we insert the correct fsave status value into the fsave frame for the
   1395# corresponding exception. the operand in the fsave frame should be the original
   1396# src operand.
   1397fu_in_exc_exit:
   1398	mov.l		%d0,-(%sp)		# save d0
   1399	bsr.l		funimp_skew		# skew sgl or dbl inputs
   1400	mov.l		(%sp)+,%d0		# restore d0
   1401
   1402	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
   1403
   1404	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1405	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1406	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1407
   1408	frestore	FP_SRC(%a6)		# restore src op
   1409
   1410	unlk		%a6
   1411
   1412	bra.l		_fpsp_done
   1413
   1414tbl_except:
   1415	short		0xe000,0xe006,0xe004,0xe005
   1416	short		0xe003,0xe002,0xe001,0xe001
   1417
   1418fu_in_exc_unfl:
   1419	mov.w		&0x4,%d0
   1420	bra.b		fu_in_exc_exit
   1421fu_in_exc_ovfl:
   1422	mov.w		&0x03,%d0
   1423	bra.b		fu_in_exc_exit
   1424
   1425# If the input operand to this operation was opclass two and a single
   1426# or double precision denorm, inf, or nan, the operand needs to be
   1427# "corrected" in order to have the proper equivalent extended precision
   1428# number.
   1429	global		fix_skewed_ops
   1430fix_skewed_ops:
   1431	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
   1432	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
   1433	beq.b		fso_sgl			# yes
   1434	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
   1435	beq.b		fso_dbl			# yes
   1436	rts					# no
   1437
   1438fso_sgl:
   1439	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
   1440	andi.w		&0x7fff,%d0		# strip sign
   1441	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
   1442	beq.b		fso_sgl_dnrm_zero	# yes
   1443	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
   1444	beq.b		fso_infnan		# yes
   1445	rts					# no
   1446
   1447fso_sgl_dnrm_zero:
   1448	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
   1449	beq.b		fso_zero		# it's a skewed zero
   1450fso_sgl_dnrm:
   1451# here, we count on norm not to alter a0...
   1452	bsr.l		norm			# normalize mantissa
   1453	neg.w		%d0			# -shft amt
   1454	addi.w		&0x3f81,%d0		# adjust new exponent
   1455	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
   1456	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
   1457	rts
   1458
   1459fso_zero:
   1460	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
   1461	rts
   1462
   1463fso_infnan:
   1464	andi.b		&0x7f,LOCAL_HI(%a0)	# clear j-bit
   1465	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
   1466	rts
   1467
   1468fso_dbl:
   1469	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
   1470	andi.w		&0x7fff,%d0		# strip sign
   1471	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
   1472	beq.b		fso_dbl_dnrm_zero	# yes
   1473	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
   1474	beq.b		fso_infnan		# yes
   1475	rts					# no
   1476
   1477fso_dbl_dnrm_zero:
   1478	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
   1479	bne.b		fso_dbl_dnrm		# it's a skewed denorm
   1480	tst.l		LOCAL_LO(%a0)		# is it a zero?
   1481	beq.b		fso_zero		# yes
   1482fso_dbl_dnrm:
   1483# here, we count on norm not to alter a0...
   1484	bsr.l		norm			# normalize mantissa
   1485	neg.w		%d0			# -shft amt
   1486	addi.w		&0x3c01,%d0		# adjust new exponent
   1487	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
   1488	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
   1489	rts
   1490
   1491#################################################################
   1492
   1493# fmove out took an unimplemented data type exception.
   1494# the src operand is in FP_SRC. Call _fout() to write out the result and
   1495# to determine which exceptions, if any, to take.
   1496fu_out:
   1497
   1498# Separate packed move outs from the UNNORM and DENORM move outs.
   1499	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
   1500	cmpi.b		%d0,&0x3
   1501	beq.w		fu_out_pack
   1502	cmpi.b		%d0,&0x7
   1503	beq.w		fu_out_pack
   1504
   1505
   1506# I'm not sure at this point what FPSR bits are valid for this instruction.
   1507# so, since the emulation routines re-create them anyways, zero exception field.
   1508# fmove out doesn't affect ccodes.
   1509	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
   1510
   1511	fmov.l		&0x0,%fpcr		# zero current control regs
   1512	fmov.l		&0x0,%fpsr
   1513
   1514# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
   1515# call here. just figure out what it is...
   1516	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
   1517	andi.w		&0x7fff,%d0		# strip sign
   1518	beq.b		fu_out_denorm		# it's a DENORM
   1519
   1520	lea		FP_SRC(%a6),%a0
   1521	bsr.l		unnorm_fix		# yes; fix it
   1522
   1523	mov.b		%d0,STAG(%a6)
   1524
   1525	bra.b		fu_out_cont
   1526fu_out_denorm:
   1527	mov.b		&DENORM,STAG(%a6)
   1528fu_out_cont:
   1529
   1530	clr.l		%d0
   1531	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   1532
   1533	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   1534
   1535	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
   1536	bsr.l		fout			# call fmove out routine
   1537
   1538# Exceptions in order of precedence:
   1539#	BSUN	: none
   1540#	SNAN	: none
   1541#	OPERR	: fmove.{b,w,l} out of large UNNORM
   1542#	OVFL	: fmove.{s,d}
   1543#	UNFL	: fmove.{s,d,x}
   1544#	DZ	: none
   1545#	INEX2	: all
   1546#	INEX1	: none (packed doesn't travel through here)
   1547
   1548# determine the highest priority exception(if any) set by the
   1549# emulation routine that has also been enabled by the user.
   1550	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   1551	bne.w		fu_out_ena		# some are enabled
   1552
   1553fu_out_done:
   1554
   1555	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
   1556
   1557# on extended precision opclass three instructions using pre-decrement or
   1558# post-increment addressing mode, the address register is not updated. is the
   1559# address register was the stack pointer used from user mode, then let's update
   1560# it here. if it was used from supervisor mode, then we have to handle this
   1561# as a special case.
   1562	btst		&0x5,EXC_SR(%a6)
   1563	bne.b		fu_out_done_s
   1564
   1565	mov.l		EXC_A7(%a6),%a0		# restore a7
   1566	mov.l		%a0,%usp
   1567
   1568fu_out_done_cont:
   1569	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1570	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1571	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1572
   1573	unlk		%a6
   1574
   1575	btst		&0x7,(%sp)		# is trace on?
   1576	bne.b		fu_out_trace		# yes
   1577
   1578	bra.l		_fpsp_done
   1579
   1580# is the ea mode pre-decrement of the stack pointer from supervisor mode?
   1581# ("fmov.x fpm,-(a7)") if so,
   1582fu_out_done_s:
   1583	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   1584	bne.b		fu_out_done_cont
   1585
   1586# the extended precision result is still in fp0. but, we need to save it
   1587# somewhere on the stack until we can copy it to its final resting place.
   1588# here, we're counting on the top of the stack to be the old place-holders
   1589# for fp0/fp1 which have already been restored. that way, we can write
   1590# over those destinations with the shifted stack frame.
   1591	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
   1592
   1593	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1594	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1595	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1596
   1597	mov.l		(%a6),%a6		# restore frame pointer
   1598
   1599	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   1600	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   1601
   1602# now, copy the result to the proper place on the stack
   1603	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
   1604	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
   1605	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
   1606
   1607	add.l		&LOCAL_SIZE-0x8,%sp
   1608
   1609	btst		&0x7,(%sp)
   1610	bne.b		fu_out_trace
   1611
   1612	bra.l		_fpsp_done
   1613
   1614fu_out_ena:
   1615	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
   1616	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   1617	bne.b		fu_out_exc		# there is at least one set
   1618
   1619# no exceptions were set.
   1620# if a disabled overflow occurred and inexact was enabled but the result
   1621# was exact, then a branch to _real_inex() is made.
   1622	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
   1623	beq.w		fu_out_done		# no
   1624
   1625fu_out_ovflchk:
   1626	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
   1627	beq.w		fu_out_done		# no
   1628	bra.w		fu_inex			# yes
   1629
   1630#
   1631# The fp move out that took the "Unimplemented Data Type" exception was
   1632# being traced. Since the stack frames are similar, get the "current" PC
   1633# from FPIAR and put it in the trace stack frame then jump to _real_trace().
   1634#
   1635#		  UNSUPP FRAME		   TRACE FRAME
   1636#		*****************	*****************
   1637#		*      EA	*	*    Current	*
   1638#		*		*	*      PC	*
   1639#		*****************	*****************
   1640#		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
   1641#		*****************	*****************
   1642#		*     Next	*	*     Next	*
   1643#		*      PC	*	*      PC	*
   1644#		*****************	*****************
   1645#		*      SR	*	*      SR	*
   1646#		*****************	*****************
   1647#
   1648fu_out_trace:
   1649	mov.w		&0x2024,0x6(%sp)
   1650	fmov.l		%fpiar,0x8(%sp)
   1651	bra.l		_real_trace
   1652
   1653# an exception occurred and that exception was enabled.
   1654fu_out_exc:
   1655	subi.l		&24,%d0			# fix offset to be 0-8
   1656
   1657# we don't mess with the existing fsave frame. just re-insert it and
   1658# jump to the "_real_{}()" handler...
   1659	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
   1660	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
   1661
   1662	swbeg		&0x8
   1663tbl_fu_out:
   1664	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
   1665	short		tbl_fu_out	- tbl_fu_out	# SNAN can't happen
   1666	short		fu_operr	- tbl_fu_out	# OPERR
   1667	short		fu_ovfl		- tbl_fu_out	# OVFL
   1668	short		fu_unfl		- tbl_fu_out	# UNFL
   1669	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
   1670	short		fu_inex		- tbl_fu_out	# INEX2
   1671	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
   1672
   1673# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
   1674# frestore it.
   1675fu_snan:
   1676	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1677	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1678	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1679
   1680	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
   1681	mov.w		&0xe006,2+FP_SRC(%a6)
   1682
   1683	frestore	FP_SRC(%a6)
   1684
   1685	unlk		%a6
   1686
   1687
   1688	bra.l		_real_snan
   1689
   1690fu_operr:
   1691	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1692	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1693	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1694
   1695	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
   1696	mov.w		&0xe004,2+FP_SRC(%a6)
   1697
   1698	frestore	FP_SRC(%a6)
   1699
   1700	unlk		%a6
   1701
   1702
   1703	bra.l		_real_operr
   1704
   1705fu_ovfl:
   1706	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
   1707
   1708	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1709	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1710	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1711
   1712	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
   1713	mov.w		&0xe005,2+FP_SRC(%a6)
   1714
   1715	frestore	FP_SRC(%a6)		# restore EXOP
   1716
   1717	unlk		%a6
   1718
   1719	bra.l		_real_ovfl
   1720
   1721# underflow can happen for extended precision. extended precision opclass
   1722# three instruction exceptions don't update the stack pointer. so, if the
   1723# exception occurred from user mode, then simply update a7 and exit normally.
   1724# if the exception occurred from supervisor mode, check if
   1725fu_unfl:
   1726	mov.l		EXC_A6(%a6),(%a6)	# restore a6
   1727
   1728	btst		&0x5,EXC_SR(%a6)
   1729	bne.w		fu_unfl_s
   1730
   1731	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
   1732	mov.l		%a0,%usp		# to or not...
   1733
   1734fu_unfl_cont:
   1735	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
   1736
   1737	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1738	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1739	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1740
   1741	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
   1742	mov.w		&0xe003,2+FP_SRC(%a6)
   1743
   1744	frestore	FP_SRC(%a6)		# restore EXOP
   1745
   1746	unlk		%a6
   1747
   1748	bra.l		_real_unfl
   1749
   1750fu_unfl_s:
   1751	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
   1752	bne.b		fu_unfl_cont
   1753
   1754# the extended precision result is still in fp0. but, we need to save it
   1755# somewhere on the stack until we can copy it to its final resting place
   1756# (where the exc frame is currently). make sure it's not at the top of the
   1757# frame or it will get overwritten when the exc stack frame is shifted "down".
   1758	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
   1759	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
   1760
   1761	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1762	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1763	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1764
   1765	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
   1766	mov.w		&0xe003,2+FP_DST(%a6)
   1767
   1768	frestore	FP_DST(%a6)		# restore EXOP
   1769
   1770	mov.l		(%a6),%a6		# restore frame pointer
   1771
   1772	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   1773	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   1774	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   1775
   1776# now, copy the result to the proper place on the stack
   1777	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
   1778	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
   1779	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
   1780
   1781	add.l		&LOCAL_SIZE-0x8,%sp
   1782
   1783	bra.l		_real_unfl
   1784
   1785# fmove in and out enter here.
   1786fu_inex:
   1787	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
   1788
   1789	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1790	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1791	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1792
   1793	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
   1794	mov.w		&0xe001,2+FP_SRC(%a6)
   1795
   1796	frestore	FP_SRC(%a6)		# restore EXOP
   1797
   1798	unlk		%a6
   1799
   1800
   1801	bra.l		_real_inex
   1802
   1803#########################################################################
   1804#########################################################################
   1805fu_in_pack:
   1806
   1807
   1808# I'm not sure at this point what FPSR bits are valid for this instruction.
   1809# so, since the emulation routines re-create them anyways, zero exception field
   1810	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
   1811
   1812	fmov.l		&0x0,%fpcr		# zero current control regs
   1813	fmov.l		&0x0,%fpsr
   1814
   1815	bsr.l		get_packed		# fetch packed src operand
   1816
   1817	lea		FP_SRC(%a6),%a0		# pass ptr to src
   1818	bsr.l		set_tag_x		# set src optype tag
   1819
   1820	mov.b		%d0,STAG(%a6)		# save src optype tag
   1821
   1822	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1823
   1824# bit five of the fp extension word separates the monadic and dyadic operations
   1825# at this point
   1826	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   1827	beq.b		fu_extract_p		# monadic
   1828	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
   1829	beq.b		fu_extract_p		# yes, so it's monadic, too
   1830
   1831	bsr.l		load_fpn2		# load dst into FP_DST
   1832
   1833	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   1834	bsr.l		set_tag_x		# tag the operand type
   1835	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   1836	bne.b		fu_op2_done_p		# no
   1837	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   1838fu_op2_done_p:
   1839	mov.b		%d0,DTAG(%a6)		# save dst optype tag
   1840
   1841fu_extract_p:
   1842	clr.l		%d0
   1843	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   1844
   1845	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
   1846
   1847	lea		FP_SRC(%a6),%a0
   1848	lea		FP_DST(%a6),%a1
   1849
   1850	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
   1851	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   1852
   1853#
   1854# Exceptions in order of precedence:
   1855#	BSUN	: none
   1856#	SNAN	: all dyadic ops
   1857#	OPERR	: fsqrt(-NORM)
   1858#	OVFL	: all except ftst,fcmp
   1859#	UNFL	: all except ftst,fcmp
   1860#	DZ	: fdiv
   1861#	INEX2	: all except ftst,fcmp
   1862#	INEX1	: all
   1863#
   1864
   1865# we determine the highest priority exception(if any) set by the
   1866# emulation routine that has also been enabled by the user.
   1867	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   1868	bne.w		fu_in_ena_p		# some are enabled
   1869
   1870fu_in_cont_p:
   1871# fcmp and ftst do not store any result.
   1872	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
   1873	andi.b		&0x38,%d0		# extract bits 3-5
   1874	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
   1875	beq.b		fu_in_exit_p		# yes
   1876
   1877	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1878	bsr.l		store_fpreg		# store the result
   1879
   1880fu_in_exit_p:
   1881
   1882	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
   1883	bne.w		fu_in_exit_s_p		# supervisor
   1884
   1885	mov.l		EXC_A7(%a6),%a0		# update user a7
   1886	mov.l		%a0,%usp
   1887
   1888fu_in_exit_cont_p:
   1889	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1890	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1891	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1892
   1893	unlk		%a6			# unravel stack frame
   1894
   1895	btst		&0x7,(%sp)		# is trace on?
   1896	bne.w		fu_trace_p		# yes
   1897
   1898	bra.l		_fpsp_done		# exit to os
   1899
   1900# the exception occurred in supervisor mode. check to see if the
   1901# addressing mode was (a7)+. if so, we'll need to shift the
   1902# stack frame "up".
   1903fu_in_exit_s_p:
   1904	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
   1905	beq.b		fu_in_exit_cont_p	# no
   1906
   1907	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1908	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1909	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1910
   1911	unlk		%a6			# unravel stack frame
   1912
   1913# shift the stack frame "up". we don't really care about the <ea> field.
   1914	mov.l		0x4(%sp),0x10(%sp)
   1915	mov.l		0x0(%sp),0xc(%sp)
   1916	add.l		&0xc,%sp
   1917
   1918	btst		&0x7,(%sp)		# is trace on?
   1919	bne.w		fu_trace_p		# yes
   1920
   1921	bra.l		_fpsp_done		# exit to os
   1922
   1923fu_in_ena_p:
   1924	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
   1925	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   1926	bne.b		fu_in_exc_p		# at least one was set
   1927
   1928#
   1929# No exceptions occurred that were also enabled. Now:
   1930#
   1931#	if (OVFL && ovfl_disabled && inexact_enabled) {
   1932#	    branch to _real_inex() (even if the result was exact!);
   1933#	} else {
   1934#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
   1935#	    return;
   1936#	}
   1937#
   1938	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
   1939	beq.w		fu_in_cont_p		# no
   1940
   1941fu_in_ovflchk_p:
   1942	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
   1943	beq.w		fu_in_cont_p		# no
   1944	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
   1945
   1946#
   1947# An exception occurred and that exception was enabled:
   1948#
   1949#	shift enabled exception field into lo byte of d0;
   1950#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
   1951#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
   1952#		/*
   1953#		 * this is the case where we must call _real_inex() now or else
   1954#		 * there will be no other way to pass it the exceptional operand
   1955#		 */
   1956#		call _real_inex();
   1957#	} else {
   1958#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
   1959#	}
   1960#
   1961fu_in_exc_p:
   1962	subi.l		&24,%d0			# fix offset to be 0-8
   1963	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
   1964	blt.b		fu_in_exc_exit_p	# no
   1965
   1966# the enabled exception was inexact
   1967	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
   1968	bne.w		fu_in_exc_unfl_p	# yes
   1969	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
   1970	bne.w		fu_in_exc_ovfl_p	# yes
   1971
   1972# here, we insert the correct fsave status value into the fsave frame for the
   1973# corresponding exception. the operand in the fsave frame should be the original
   1974# src operand.
   1975# as a reminder for future predicted pain and agony, we are passing in fsave the
   1976# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
   1977# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
   1978fu_in_exc_exit_p:
   1979	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
   1980	bne.w		fu_in_exc_exit_s_p	# supervisor
   1981
   1982	mov.l		EXC_A7(%a6),%a0		# update user a7
   1983	mov.l		%a0,%usp
   1984
   1985fu_in_exc_exit_cont_p:
   1986	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
   1987
   1988	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1989	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1990	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1991
   1992	frestore	FP_SRC(%a6)		# restore src op
   1993
   1994	unlk		%a6
   1995
   1996	btst		&0x7,(%sp)		# is trace enabled?
   1997	bne.w		fu_trace_p		# yes
   1998
   1999	bra.l		_fpsp_done
   2000
   2001tbl_except_p:
   2002	short		0xe000,0xe006,0xe004,0xe005
   2003	short		0xe003,0xe002,0xe001,0xe001
   2004
   2005fu_in_exc_ovfl_p:
   2006	mov.w		&0x3,%d0
   2007	bra.w		fu_in_exc_exit_p
   2008
   2009fu_in_exc_unfl_p:
   2010	mov.w		&0x4,%d0
   2011	bra.w		fu_in_exc_exit_p
   2012
   2013fu_in_exc_exit_s_p:
   2014	btst		&mia7_bit,SPCOND_FLG(%a6)
   2015	beq.b		fu_in_exc_exit_cont_p
   2016
   2017	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
   2018
   2019	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2020	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2021	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2022
   2023	frestore	FP_SRC(%a6)		# restore src op
   2024
   2025	unlk		%a6			# unravel stack frame
   2026
   2027# shift stack frame "up". who cares about <ea> field.
   2028	mov.l		0x4(%sp),0x10(%sp)
   2029	mov.l		0x0(%sp),0xc(%sp)
   2030	add.l		&0xc,%sp
   2031
   2032	btst		&0x7,(%sp)		# is trace on?
   2033	bne.b		fu_trace_p		# yes
   2034
   2035	bra.l		_fpsp_done		# exit to os
   2036
   2037#
   2038# The opclass two PACKED instruction that took an "Unimplemented Data Type"
   2039# exception was being traced. Make the "current" PC the FPIAR and put it in the
   2040# trace stack frame then jump to _real_trace().
   2041#
   2042#		  UNSUPP FRAME		   TRACE FRAME
   2043#		*****************	*****************
   2044#		*      EA	*	*    Current	*
   2045#		*		*	*      PC	*
   2046#		*****************	*****************
   2047#		* 0x2 *	0x0dc	*	* 0x2 *  0x024	*
   2048#		*****************	*****************
   2049#		*     Next	*	*     Next	*
   2050#		*      PC	*	*      PC	*
   2051#		*****************	*****************
   2052#		*      SR	*	*      SR	*
   2053#		*****************	*****************
   2054fu_trace_p:
   2055	mov.w		&0x2024,0x6(%sp)
   2056	fmov.l		%fpiar,0x8(%sp)
   2057
   2058	bra.l		_real_trace
   2059
   2060#########################################################
   2061#########################################################
   2062fu_out_pack:
   2063
   2064
   2065# I'm not sure at this point what FPSR bits are valid for this instruction.
   2066# so, since the emulation routines re-create them anyways, zero exception field.
   2067# fmove out doesn't affect ccodes.
   2068	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
   2069
   2070	fmov.l		&0x0,%fpcr		# zero current control regs
   2071	fmov.l		&0x0,%fpsr
   2072
   2073	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
   2074	bsr.l		load_fpn1
   2075
   2076# unlike other opclass 3, unimplemented data type exceptions, packed must be
   2077# able to detect all operand types.
   2078	lea		FP_SRC(%a6),%a0
   2079	bsr.l		set_tag_x		# tag the operand type
   2080	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   2081	bne.b		fu_op2_p		# no
   2082	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   2083
   2084fu_op2_p:
   2085	mov.b		%d0,STAG(%a6)		# save src optype tag
   2086
   2087	clr.l		%d0
   2088	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   2089
   2090	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   2091
   2092	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
   2093	bsr.l		fout			# call fmove out routine
   2094
   2095# Exceptions in order of precedence:
   2096#	BSUN	: no
   2097#	SNAN	: yes
   2098#	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
   2099#	OVFL	: no
   2100#	UNFL	: no
   2101#	DZ	: no
   2102#	INEX2	: yes
   2103#	INEX1	: no
   2104
   2105# determine the highest priority exception(if any) set by the
   2106# emulation routine that has also been enabled by the user.
   2107	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   2108	bne.w		fu_out_ena_p		# some are enabled
   2109
   2110fu_out_exit_p:
   2111	mov.l		EXC_A6(%a6),(%a6)	# restore a6
   2112
   2113	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
   2114	bne.b		fu_out_exit_s_p		# supervisor
   2115
   2116	mov.l		EXC_A7(%a6),%a0		# update user a7
   2117	mov.l		%a0,%usp
   2118
   2119fu_out_exit_cont_p:
   2120	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2121	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2122	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2123
   2124	unlk		%a6			# unravel stack frame
   2125
   2126	btst		&0x7,(%sp)		# is trace on?
   2127	bne.w		fu_trace_p		# yes
   2128
   2129	bra.l		_fpsp_done		# exit to os
   2130
   2131# the exception occurred in supervisor mode. check to see if the
   2132# addressing mode was -(a7). if so, we'll need to shift the
   2133# stack frame "down".
   2134fu_out_exit_s_p:
   2135	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
   2136	beq.b		fu_out_exit_cont_p	# no
   2137
   2138	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2139	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2140	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2141
   2142	mov.l		(%a6),%a6		# restore frame pointer
   2143
   2144	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2145	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2146
   2147# now, copy the result to the proper place on the stack
   2148	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
   2149	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
   2150	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
   2151
   2152	add.l		&LOCAL_SIZE-0x8,%sp
   2153
   2154	btst		&0x7,(%sp)
   2155	bne.w		fu_trace_p
   2156
   2157	bra.l		_fpsp_done
   2158
   2159fu_out_ena_p:
   2160	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
   2161	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   2162	beq.w		fu_out_exit_p
   2163
   2164	mov.l		EXC_A6(%a6),(%a6)	# restore a6
   2165
   2166# an exception occurred and that exception was enabled.
   2167# the only exception possible on packed move out are INEX, OPERR, and SNAN.
   2168fu_out_exc_p:
   2169	cmpi.b		%d0,&0x1a
   2170	bgt.w		fu_inex_p2
   2171	beq.w		fu_operr_p
   2172
   2173fu_snan_p:
   2174	btst		&0x5,EXC_SR(%a6)
   2175	bne.b		fu_snan_s_p
   2176
   2177	mov.l		EXC_A7(%a6),%a0
   2178	mov.l		%a0,%usp
   2179	bra.w		fu_snan
   2180
   2181fu_snan_s_p:
   2182	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2183	bne.w		fu_snan
   2184
   2185# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
   2186# the strategy is to move the exception frame "down" 12 bytes. then, we
   2187# can store the default result where the exception frame was.
   2188	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2189	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2190	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2191
   2192	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
   2193	mov.w		&0xe006,2+FP_SRC(%a6)	# set fsave status
   2194
   2195	frestore	FP_SRC(%a6)		# restore src operand
   2196
   2197	mov.l		(%a6),%a6		# restore frame pointer
   2198
   2199	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2200	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2201	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   2202
   2203# now, we copy the default result to its proper location
   2204	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
   2205	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
   2206	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
   2207
   2208	add.l		&LOCAL_SIZE-0x8,%sp
   2209
   2210
   2211	bra.l		_real_snan
   2212
   2213fu_operr_p:
   2214	btst		&0x5,EXC_SR(%a6)
   2215	bne.w		fu_operr_p_s
   2216
   2217	mov.l		EXC_A7(%a6),%a0
   2218	mov.l		%a0,%usp
   2219	bra.w		fu_operr
   2220
   2221fu_operr_p_s:
   2222	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2223	bne.w		fu_operr
   2224
   2225# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
   2226# the strategy is to move the exception frame "down" 12 bytes. then, we
   2227# can store the default result where the exception frame was.
   2228	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2229	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2230	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2231
   2232	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
   2233	mov.w		&0xe004,2+FP_SRC(%a6)	# set fsave status
   2234
   2235	frestore	FP_SRC(%a6)		# restore src operand
   2236
   2237	mov.l		(%a6),%a6		# restore frame pointer
   2238
   2239	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2240	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2241	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   2242
   2243# now, we copy the default result to its proper location
   2244	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
   2245	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
   2246	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
   2247
   2248	add.l		&LOCAL_SIZE-0x8,%sp
   2249
   2250
   2251	bra.l		_real_operr
   2252
   2253fu_inex_p2:
   2254	btst		&0x5,EXC_SR(%a6)
   2255	bne.w		fu_inex_s_p2
   2256
   2257	mov.l		EXC_A7(%a6),%a0
   2258	mov.l		%a0,%usp
   2259	bra.w		fu_inex
   2260
   2261fu_inex_s_p2:
   2262	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2263	bne.w		fu_inex
   2264
   2265# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
   2266# the strategy is to move the exception frame "down" 12 bytes. then, we
   2267# can store the default result where the exception frame was.
   2268	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2269	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2270	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2271
   2272	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
   2273	mov.w		&0xe001,2+FP_SRC(%a6)	# set fsave status
   2274
   2275	frestore	FP_SRC(%a6)		# restore src operand
   2276
   2277	mov.l		(%a6),%a6		# restore frame pointer
   2278
   2279	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2280	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2281	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   2282
   2283# now, we copy the default result to its proper location
   2284	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
   2285	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
   2286	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
   2287
   2288	add.l		&LOCAL_SIZE-0x8,%sp
   2289
   2290
   2291	bra.l		_real_inex
   2292
   2293#########################################################################
   2294
   2295#
   2296# if we're stuffing a source operand back into an fsave frame then we
   2297# have to make sure that for single or double source operands that the
   2298# format stuffed is as weird as the hardware usually makes it.
   2299#
   2300	global		funimp_skew
   2301funimp_skew:
   2302	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
   2303	cmpi.b		%d0,&0x1		# was src sgl?
   2304	beq.b		funimp_skew_sgl		# yes
   2305	cmpi.b		%d0,&0x5		# was src dbl?
   2306	beq.b		funimp_skew_dbl		# yes
   2307	rts
   2308
   2309funimp_skew_sgl:
   2310	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
   2311	andi.w		&0x7fff,%d0		# strip sign
   2312	beq.b		funimp_skew_sgl_not
   2313	cmpi.w		%d0,&0x3f80
   2314	bgt.b		funimp_skew_sgl_not
   2315	neg.w		%d0			# make exponent negative
   2316	addi.w		&0x3f81,%d0		# find amt to shift
   2317	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
   2318	lsr.l		%d0,%d1			# shift it
   2319	bset		&31,%d1			# set j-bit
   2320	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
   2321	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
   2322	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
   2323funimp_skew_sgl_not:
   2324	rts
   2325
   2326funimp_skew_dbl:
   2327	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
   2328	andi.w		&0x7fff,%d0		# strip sign
   2329	beq.b		funimp_skew_dbl_not
   2330	cmpi.w		%d0,&0x3c00
   2331	bgt.b		funimp_skew_dbl_not
   2332
   2333	tst.b		FP_SRC_EX(%a6)		# make "internal format"
   2334	smi.b		0x2+FP_SRC(%a6)
   2335	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
   2336	clr.l		%d0			# clear g,r,s
   2337	lea		FP_SRC(%a6),%a0		# pass ptr to src op
   2338	mov.w		&0x3c01,%d1		# pass denorm threshold
   2339	bsr.l		dnrm_lp			# denorm it
   2340	mov.w		&0x3c00,%d0		# new exponent
   2341	tst.b		0x2+FP_SRC(%a6)		# is sign set?
   2342	beq.b		fss_dbl_denorm_done	# no
   2343	bset		&15,%d0			# set sign
   2344fss_dbl_denorm_done:
   2345	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
   2346	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
   2347funimp_skew_dbl_not:
   2348	rts
   2349
   2350#########################################################################
   2351	global		_mem_write2
   2352_mem_write2:
   2353	btst		&0x5,EXC_SR(%a6)
   2354	beq.l		_dmem_write
   2355	mov.l		0x0(%a0),FP_DST_EX(%a6)
   2356	mov.l		0x4(%a0),FP_DST_HI(%a6)
   2357	mov.l		0x8(%a0),FP_DST_LO(%a6)
   2358	clr.l		%d1
   2359	rts
   2360
   2361#########################################################################
   2362# XDEF ****************************************************************	#
   2363#	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
   2364#			effective address" exception.			#
   2365#									#
   2366#	This handler should be the first code executed upon taking the	#
   2367#	FP Unimplemented Effective Address exception in an operating	#
   2368#	system.								#
   2369#									#
   2370# XREF ****************************************************************	#
   2371#	_imem_read_long() - read instruction longword			#
   2372#	fix_skewed_ops() - adjust src operand in fsave frame		#
   2373#	set_tag_x() - determine optype of src/dst operands		#
   2374#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
   2375#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
   2376#	load_fpn2() - load dst operand from FP regfile			#
   2377#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
   2378#	decbin() - convert packed data to FP binary data		#
   2379#	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
   2380#	_real_access() - "callout" for access error exception		#
   2381#	_mem_read() - read extended immediate operand from memory	#
   2382#	_fpsp_done() - "callout" for exit; work all done		#
   2383#	_real_trace() - "callout" for Trace enabled exception		#
   2384#	fmovm_dynamic() - emulate dynamic fmovm instruction		#
   2385#	fmovm_ctrl() - emulate fmovm control instruction		#
   2386#									#
   2387# INPUT ***************************************************************	#
   2388#	- The system stack contains the "Unimplemented <ea>" stk frame	#
   2389#									#
   2390# OUTPUT **************************************************************	#
   2391#	If access error:						#
   2392#	- The system stack is changed to an access error stack frame	#
   2393#	If FPU disabled:						#
   2394#	- The system stack is changed to an FPU disabled stack frame	#
   2395#	If Trace exception enabled:					#
   2396#	- The system stack is changed to a Trace exception stack frame	#
   2397#	Else: (normal case)						#
   2398#	- None (correct result has been stored as appropriate)		#
   2399#									#
   2400# ALGORITHM ***********************************************************	#
   2401#	This exception handles 3 types of operations:			#
   2402# (1) FP Instructions using extended precision or packed immediate	#
   2403#     addressing mode.							#
   2404# (2) The "fmovm.x" instruction w/ dynamic register specification.	#
   2405# (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
   2406#									#
   2407#	For immediate data operations, the data is read in w/ a		#
   2408# _mem_read() "callout", converted to FP binary (if packed), and used	#
   2409# as the source operand to the instruction specified by the instruction	#
   2410# word. If no FP exception should be reported ads a result of the	#
   2411# emulation, then the result is stored to the destination register and	#
   2412# the handler exits through _fpsp_done(). If an enabled exc has been	#
   2413# signalled as a result of emulation, then an fsave state frame		#
   2414# corresponding to the FP exception type must be entered into the 060	#
   2415# FPU before exiting. In either the enabled or disabled cases, we	#
   2416# must also check if a Trace exception is pending, in which case, we	#
   2417# must create a Trace exception stack frame from the current exception	#
   2418# stack frame. If no Trace is pending, we simply exit through		#
   2419# _fpsp_done().								#
   2420#	For "fmovm.x", call the routine fmovm_dynamic() which will	#
   2421# decode and emulate the instruction. No FP exceptions can be pending	#
   2422# as a result of this operation emulation. A Trace exception can be	#
   2423# pending, though, which means the current stack frame must be changed	#
   2424# to a Trace stack frame and an exit made through _real_trace().	#
   2425# For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
   2426# was executed from supervisor mode, this handler must store the FP	#
   2427# register file values to the system stack by itself since		#
   2428# fmovm_dynamic() can't handle this. A normal exit is made through	#
   2429# fpsp_done().								#
   2430#	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
   2431# Again, a Trace exception may be pending and an exit made through	#
   2432# _real_trace(). Else, a normal exit is made through _fpsp_done().	#
   2433#									#
   2434#	Before any of the above is attempted, it must be checked to	#
   2435# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
   2436# before the "FPU disabled" exception, but the "FPU disabled" exception	#
   2437# has higher priority, we check the disabled bit in the PCR. If set,	#
   2438# then we must create an 8 word "FPU disabled" exception stack frame	#
   2439# from the current 4 word exception stack frame. This includes		#
   2440# reproducing the effective address of the instruction to put on the	#
   2441# new stack frame.							#
   2442#									#
   2443#	In the process of all emulation work, if a _mem_read()		#
   2444# "callout" returns a failing result indicating an access error, then	#
   2445# we must create an access error stack frame from the current stack	#
   2446# frame. This information includes a faulting address and a fault-	#
   2447# status-longword. These are created within this handler.		#
   2448#									#
   2449#########################################################################
   2450
   2451	global		_fpsp_effadd
   2452_fpsp_effadd:
   2453
   2454# This exception type takes priority over the "Line F Emulator"
   2455# exception. Therefore, the FPU could be disabled when entering here.
   2456# So, we must check to see if it's disabled and handle that case separately.
   2457	mov.l		%d0,-(%sp)		# save d0
   2458	movc		%pcr,%d0		# load proc cr
   2459	btst		&0x1,%d0		# is FPU disabled?
   2460	bne.w		iea_disabled		# yes
   2461	mov.l		(%sp)+,%d0		# restore d0
   2462
   2463	link		%a6,&-LOCAL_SIZE	# init stack frame
   2464
   2465	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   2466	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   2467	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   2468
   2469# PC of instruction that took the exception is the PC in the frame
   2470	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
   2471
   2472	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   2473	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   2474	bsr.l		_imem_read_long		# fetch the instruction words
   2475	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
   2476
   2477#########################################################################
   2478
   2479	tst.w		%d0			# is operation fmovem?
   2480	bmi.w		iea_fmovm		# yes
   2481
   2482#
   2483# here, we will have:
   2484#	fabs	fdabs	fsabs		facos		fmod
   2485#	fadd	fdadd	fsadd		fasin		frem
   2486#	fcmp				fatan		fscale
   2487#	fdiv	fddiv	fsdiv		fatanh		fsin
   2488#	fint				fcos		fsincos
   2489#	fintrz				fcosh		fsinh
   2490#	fmove	fdmove	fsmove		fetox		ftan
   2491#	fmul	fdmul	fsmul		fetoxm1		ftanh
   2492#	fneg	fdneg	fsneg		fgetexp		ftentox
   2493#	fsgldiv				fgetman		ftwotox
   2494#	fsglmul				flog10
   2495#	fsqrt				flog2
   2496#	fsub	fdsub	fssub		flogn
   2497#	ftst				flognp1
   2498# which can all use f<op>.{x,p}
   2499# so, now it's immediate data extended precision AND PACKED FORMAT!
   2500#
   2501iea_op:
   2502	andi.l		&0x00ff00ff,USER_FPSR(%a6)
   2503
   2504	btst		&0xa,%d0		# is src fmt x or p?
   2505	bne.b		iea_op_pack		# packed
   2506
   2507
   2508	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
   2509	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
   2510	mov.l		&0xc,%d0		# pass: 12 bytes
   2511	bsr.l		_imem_read		# read extended immediate
   2512
   2513	tst.l		%d1			# did ifetch fail?
   2514	bne.w		iea_iacc		# yes
   2515
   2516	bra.b		iea_op_setsrc
   2517
   2518iea_op_pack:
   2519
   2520	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
   2521	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
   2522	mov.l		&0xc,%d0		# pass: 12 bytes
   2523	bsr.l		_imem_read		# read packed operand
   2524
   2525	tst.l		%d1			# did ifetch fail?
   2526	bne.w		iea_iacc		# yes
   2527
   2528# The packed operand is an INF or a NAN if the exponent field is all ones.
   2529	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
   2530	cmpi.w		%d0,&0x7fff		# INF or NAN?
   2531	beq.b		iea_op_setsrc		# operand is an INF or NAN
   2532
   2533# The packed operand is a zero if the mantissa is all zero, else it's
   2534# a normal packed op.
   2535	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
   2536	andi.b		&0x0f,%d0		# clear all but last nybble
   2537	bne.b		iea_op_gp_not_spec	# not a zero
   2538	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
   2539	bne.b		iea_op_gp_not_spec	# not a zero
   2540	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
   2541	beq.b		iea_op_setsrc		# operand is a ZERO
   2542iea_op_gp_not_spec:
   2543	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
   2544	bsr.l		decbin			# convert to extended
   2545	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
   2546
   2547iea_op_setsrc:
   2548	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
   2549
   2550# FP_SRC now holds the src operand.
   2551	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   2552	bsr.l		set_tag_x		# tag the operand type
   2553	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
   2554	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   2555	bne.b		iea_op_getdst		# no
   2556	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
   2557	mov.b		%d0,STAG(%a6)		# set new optype tag
   2558iea_op_getdst:
   2559	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
   2560
   2561	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   2562	beq.b		iea_op_extract		# monadic
   2563	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
   2564	bne.b		iea_op_spec		# yes
   2565
   2566iea_op_loaddst:
   2567	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
   2568	bsr.l		load_fpn2		# load dst operand
   2569
   2570	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   2571	bsr.l		set_tag_x		# tag the operand type
   2572	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
   2573	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   2574	bne.b		iea_op_extract		# no
   2575	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
   2576	mov.b		%d0,DTAG(%a6)		# set new optype tag
   2577	bra.b		iea_op_extract
   2578
   2579# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
   2580iea_op_spec:
   2581	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
   2582	beq.b		iea_op_extract		# yes
   2583# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
   2584# store a result. then, only fcmp will branch back and pick up a dst operand.
   2585	st		STORE_FLG(%a6)		# don't store a final result
   2586	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
   2587	beq.b		iea_op_loaddst		# yes
   2588
   2589iea_op_extract:
   2590	clr.l		%d0
   2591	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
   2592
   2593	mov.b		1+EXC_CMDREG(%a6),%d1
   2594	andi.w		&0x007f,%d1		# extract extension
   2595
   2596	fmov.l		&0x0,%fpcr
   2597	fmov.l		&0x0,%fpsr
   2598
   2599	lea		FP_SRC(%a6),%a0
   2600	lea		FP_DST(%a6),%a1
   2601
   2602	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
   2603	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   2604
   2605#
   2606# Exceptions in order of precedence:
   2607#	BSUN	: none
   2608#	SNAN	: all operations
   2609#	OPERR	: all reg-reg or mem-reg operations that can normally operr
   2610#	OVFL	: same as OPERR
   2611#	UNFL	: same as OPERR
   2612#	DZ	: same as OPERR
   2613#	INEX2	: same as OPERR
   2614#	INEX1	: all packed immediate operations
   2615#
   2616
   2617# we determine the highest priority exception(if any) set by the
   2618# emulation routine that has also been enabled by the user.
   2619	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   2620	bne.b		iea_op_ena		# some are enabled
   2621
   2622# now, we save the result, unless, of course, the operation was ftst or fcmp.
   2623# these don't save results.
   2624iea_op_save:
   2625	tst.b		STORE_FLG(%a6)		# does this op store a result?
   2626	bne.b		iea_op_exit1		# exit with no frestore
   2627
   2628iea_op_store:
   2629	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
   2630	bsr.l		store_fpreg		# store the result
   2631
   2632iea_op_exit1:
   2633	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
   2634	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
   2635
   2636	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   2637	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2638	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2639
   2640	unlk		%a6			# unravel the frame
   2641
   2642	btst		&0x7,(%sp)		# is trace on?
   2643	bne.w		iea_op_trace		# yes
   2644
   2645	bra.l		_fpsp_done		# exit to os
   2646
   2647iea_op_ena:
   2648	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
   2649	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   2650	bne.b		iea_op_exc		# at least one was set
   2651
   2652# no exception occurred. now, did a disabled, exact overflow occur with inexact
   2653# enabled? if so, then we have to stuff an overflow frame into the FPU.
   2654	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
   2655	beq.b		iea_op_save
   2656
   2657iea_op_ovfl:
   2658	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
   2659	beq.b		iea_op_store		# no
   2660	bra.b		iea_op_exc_ovfl		# yes
   2661
   2662# an enabled exception occurred. we have to insert the exception type back into
   2663# the machine.
   2664iea_op_exc:
   2665	subi.l		&24,%d0			# fix offset to be 0-8
   2666	cmpi.b		%d0,&0x6		# is exception INEX?
   2667	bne.b		iea_op_exc_force	# no
   2668
   2669# the enabled exception was inexact. so, if it occurs with an overflow
   2670# or underflow that was disabled, then we have to force an overflow or
   2671# underflow frame.
   2672	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
   2673	bne.b		iea_op_exc_ovfl		# yes
   2674	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
   2675	bne.b		iea_op_exc_unfl		# yes
   2676
   2677iea_op_exc_force:
   2678	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
   2679	bra.b		iea_op_exit2		# exit with frestore
   2680
   2681tbl_iea_except:
   2682	short		0xe002, 0xe006, 0xe004, 0xe005
   2683	short		0xe003, 0xe002, 0xe001, 0xe001
   2684
   2685iea_op_exc_ovfl:
   2686	mov.w		&0xe005,2+FP_SRC(%a6)
   2687	bra.b		iea_op_exit2
   2688
   2689iea_op_exc_unfl:
   2690	mov.w		&0xe003,2+FP_SRC(%a6)
   2691
   2692iea_op_exit2:
   2693	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
   2694	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
   2695
   2696	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   2697	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2698	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2699
   2700	frestore	FP_SRC(%a6)		# restore exceptional state
   2701
   2702	unlk		%a6			# unravel the frame
   2703
   2704	btst		&0x7,(%sp)		# is trace on?
   2705	bne.b		iea_op_trace		# yes
   2706
   2707	bra.l		_fpsp_done		# exit to os
   2708
   2709#
   2710# The opclass two instruction that took an "Unimplemented Effective Address"
   2711# exception was being traced. Make the "current" PC the FPIAR and put it in
   2712# the trace stack frame then jump to _real_trace().
   2713#
   2714#		 UNIMP EA FRAME		   TRACE FRAME
   2715#		*****************	*****************
   2716#		* 0x0 *  0x0f0	*	*    Current	*
   2717#		*****************	*      PC	*
   2718#		*    Current	*	*****************
   2719#		*      PC	*	* 0x2 *  0x024	*
   2720#		*****************	*****************
   2721#		*      SR	*	*     Next	*
   2722#		*****************	*      PC	*
   2723#					*****************
   2724#					*      SR	*
   2725#					*****************
   2726iea_op_trace:
   2727	mov.l		(%sp),-(%sp)		# shift stack frame "down"
   2728	mov.w		0x8(%sp),0x4(%sp)
   2729	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
   2730	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
   2731
   2732	bra.l		_real_trace
   2733
   2734#########################################################################
   2735iea_fmovm:
   2736	btst		&14,%d0			# ctrl or data reg
   2737	beq.w		iea_fmovm_ctrl
   2738
   2739iea_fmovm_data:
   2740
   2741	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
   2742	bne.b		iea_fmovm_data_s
   2743
   2744iea_fmovm_data_u:
   2745	mov.l		%usp,%a0
   2746	mov.l		%a0,EXC_A7(%a6)		# store current a7
   2747	bsr.l		fmovm_dynamic		# do dynamic fmovm
   2748	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
   2749	mov.l		%a0,%usp		# update usp
   2750	bra.w		iea_fmovm_exit
   2751
   2752iea_fmovm_data_s:
   2753	clr.b		SPCOND_FLG(%a6)
   2754	lea		0x2+EXC_VOFF(%a6),%a0
   2755	mov.l		%a0,EXC_A7(%a6)
   2756	bsr.l		fmovm_dynamic		# do dynamic fmovm
   2757
   2758	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2759	beq.w		iea_fmovm_data_predec
   2760	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
   2761	bne.w		iea_fmovm_exit
   2762
   2763# right now, d0 = the size.
   2764# the data has been fetched from the supervisor stack, but we have not
   2765# incremented the stack pointer by the appropriate number of bytes.
   2766# do it here.
   2767iea_fmovm_data_postinc:
   2768	btst		&0x7,EXC_SR(%a6)
   2769	bne.b		iea_fmovm_data_pi_trace
   2770
   2771	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
   2772	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
   2773	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
   2774
   2775	lea		(EXC_SR,%a6,%d0),%a0
   2776	mov.l		%a0,EXC_SR(%a6)
   2777
   2778	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   2779	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2780	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2781
   2782	unlk		%a6
   2783	mov.l		(%sp)+,%sp
   2784	bra.l		_fpsp_done
   2785
   2786iea_fmovm_data_pi_trace:
   2787	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
   2788	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
   2789	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
   2790	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
   2791
   2792	lea		(EXC_SR-0x4,%a6,%d0),%a0
   2793	mov.l		%a0,EXC_SR(%a6)
   2794
   2795	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   2796	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2797	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2798
   2799	unlk		%a6
   2800	mov.l		(%sp)+,%sp
   2801	bra.l		_real_trace
   2802
   2803# right now, d1 = size and d0 = the strg.
   2804iea_fmovm_data_predec:
   2805	mov.b		%d1,EXC_VOFF(%a6)	# store strg
   2806	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
   2807
   2808	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   2809	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2810	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2811
   2812	mov.l		(%a6),-(%sp)		# make a copy of a6
   2813	mov.l		%d0,-(%sp)		# save d0
   2814	mov.l		%d1,-(%sp)		# save d1
   2815	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
   2816
   2817	clr.l		%d0
   2818	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
   2819	neg.l		%d0			# get negative of size
   2820
   2821	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
   2822	beq.b		iea_fmovm_data_p2
   2823
   2824	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
   2825	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
   2826	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
   2827	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
   2828
   2829	pea		(%a6,%d0)		# create final sp
   2830	bra.b		iea_fmovm_data_p3
   2831
   2832iea_fmovm_data_p2:
   2833	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
   2834	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
   2835	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
   2836
   2837	pea		(0x4,%a6,%d0)		# create final sp
   2838
   2839iea_fmovm_data_p3:
   2840	clr.l		%d1
   2841	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
   2842
   2843	tst.b		%d1
   2844	bpl.b		fm_1
   2845	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
   2846	addi.l		&0xc,%d0
   2847fm_1:
   2848	lsl.b		&0x1,%d1
   2849	bpl.b		fm_2
   2850	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
   2851	addi.l		&0xc,%d0
   2852fm_2:
   2853	lsl.b		&0x1,%d1
   2854	bpl.b		fm_3
   2855	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
   2856	addi.l		&0xc,%d0
   2857fm_3:
   2858	lsl.b		&0x1,%d1
   2859	bpl.b		fm_4
   2860	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
   2861	addi.l		&0xc,%d0
   2862fm_4:
   2863	lsl.b		&0x1,%d1
   2864	bpl.b		fm_5
   2865	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
   2866	addi.l		&0xc,%d0
   2867fm_5:
   2868	lsl.b		&0x1,%d1
   2869	bpl.b		fm_6
   2870	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
   2871	addi.l		&0xc,%d0
   2872fm_6:
   2873	lsl.b		&0x1,%d1
   2874	bpl.b		fm_7
   2875	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
   2876	addi.l		&0xc,%d0
   2877fm_7:
   2878	lsl.b		&0x1,%d1
   2879	bpl.b		fm_end
   2880	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
   2881fm_end:
   2882	mov.l		0x4(%sp),%d1
   2883	mov.l		0x8(%sp),%d0
   2884	mov.l		0xc(%sp),%a6
   2885	mov.l		(%sp)+,%sp
   2886
   2887	btst		&0x7,(%sp)		# is trace enabled?
   2888	beq.l		_fpsp_done
   2889	bra.l		_real_trace
   2890
   2891#########################################################################
   2892iea_fmovm_ctrl:
   2893
   2894	bsr.l		fmovm_ctrl		# load ctrl regs
   2895
   2896iea_fmovm_exit:
   2897	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   2898	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2899	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2900
   2901	btst		&0x7,EXC_SR(%a6)	# is trace on?
   2902	bne.b		iea_fmovm_trace		# yes
   2903
   2904	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
   2905
   2906	unlk		%a6			# unravel the frame
   2907
   2908	bra.l		_fpsp_done		# exit to os
   2909
   2910#
   2911# The control reg instruction that took an "Unimplemented Effective Address"
   2912# exception was being traced. The "Current PC" for the trace frame is the
   2913# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
   2914# After fixing the stack frame, jump to _real_trace().
   2915#
   2916#		 UNIMP EA FRAME		   TRACE FRAME
   2917#		*****************	*****************
   2918#		* 0x0 *  0x0f0	*	*    Current	*
   2919#		*****************	*      PC	*
   2920#		*    Current	*	*****************
   2921#		*      PC	*	* 0x2 *  0x024	*
   2922#		*****************	*****************
   2923#		*      SR	*	*     Next	*
   2924#		*****************	*      PC	*
   2925#					*****************
   2926#					*      SR	*
   2927#					*****************
   2928# this ain't a pretty solution, but it works:
   2929# -restore a6 (not with unlk)
   2930# -shift stack frame down over where old a6 used to be
   2931# -add LOCAL_SIZE to stack pointer
   2932iea_fmovm_trace:
   2933	mov.l		(%a6),%a6		# restore frame pointer
   2934	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
   2935	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
   2936	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
   2937	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
   2938	add.l		&LOCAL_SIZE,%sp		# clear stack frame
   2939
   2940	bra.l		_real_trace
   2941
   2942#########################################################################
   2943# The FPU is disabled and so we should really have taken the "Line
   2944# F Emulator" exception. So, here we create an 8-word stack frame
   2945# from our 4-word stack frame. This means we must calculate the length
   2946# the faulting instruction to get the "next PC". This is trivial for
   2947# immediate operands but requires some extra work for fmovm dynamic
   2948# which can use most addressing modes.
   2949iea_disabled:
   2950	mov.l		(%sp)+,%d0		# restore d0
   2951
   2952	link		%a6,&-LOCAL_SIZE	# init stack frame
   2953
   2954	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   2955
   2956# PC of instruction that took the exception is the PC in the frame
   2957	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
   2958	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   2959	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   2960	bsr.l		_imem_read_long		# fetch the instruction words
   2961	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
   2962
   2963	tst.w		%d0			# is instr fmovm?
   2964	bmi.b		iea_dis_fmovm		# yes
   2965# instruction is using an extended precision immediate operand. Therefore,
   2966# the total instruction length is 16 bytes.
   2967iea_dis_immed:
   2968	mov.l		&0x10,%d0		# 16 bytes of instruction
   2969	bra.b		iea_dis_cont
   2970iea_dis_fmovm:
   2971	btst		&0xe,%d0		# is instr fmovm ctrl
   2972	bne.b		iea_dis_fmovm_data	# no
   2973# the instruction is a fmovm.l with 2 or 3 registers.
   2974	bfextu		%d0{&19:&3},%d1
   2975	mov.l		&0xc,%d0
   2976	cmpi.b		%d1,&0x7		# move all regs?
   2977	bne.b		iea_dis_cont
   2978	addq.l		&0x4,%d0
   2979	bra.b		iea_dis_cont
   2980# the instruction is an fmovm.x dynamic which can use many addressing
   2981# modes and thus can have several different total instruction lengths.
   2982# call fmovm_calc_ea which will go through the ea calc process and,
   2983# as a by-product, will tell us how long the instruction is.
   2984iea_dis_fmovm_data:
   2985	clr.l		%d0
   2986	bsr.l		fmovm_calc_ea
   2987	mov.l		EXC_EXTWPTR(%a6),%d0
   2988	sub.l		EXC_PC(%a6),%d0
   2989iea_dis_cont:
   2990	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
   2991
   2992	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2993
   2994	unlk		%a6
   2995
   2996# here, we actually create the 8-word frame from the 4-word frame,
   2997# with the "next PC" as additional info.
   2998# the <ea> field is let as undefined.
   2999	subq.l		&0x8,%sp		# make room for new stack
   3000	mov.l		%d0,-(%sp)		# save d0
   3001	mov.w		0xc(%sp),0x4(%sp)	# move SR
   3002	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
   3003	clr.l		%d0
   3004	mov.w		0x12(%sp),%d0
   3005	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
   3006	add.l		%d0,0x6(%sp)		# make Next PC
   3007	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
   3008	mov.l		(%sp)+,%d0		# restore d0
   3009
   3010	bra.l		_real_fpu_disabled
   3011
   3012##########
   3013
   3014iea_iacc:
   3015	movc		%pcr,%d0
   3016	btst		&0x1,%d0
   3017	bne.b		iea_iacc_cont
   3018	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3019	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
   3020iea_iacc_cont:
   3021	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3022
   3023	unlk		%a6
   3024
   3025	subq.w		&0x8,%sp		# make stack frame bigger
   3026	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
   3027	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
   3028	mov.w		&0x4008,0x6(%sp)	# store voff
   3029	mov.l		0x2(%sp),0x8(%sp)	# store ea
   3030	mov.l		&0x09428001,0xc(%sp)	# store fslw
   3031
   3032iea_acc_done:
   3033	btst		&0x5,(%sp)		# user or supervisor mode?
   3034	beq.b		iea_acc_done2		# user
   3035	bset		&0x2,0xd(%sp)		# set supervisor TM bit
   3036
   3037iea_acc_done2:
   3038	bra.l		_real_access
   3039
   3040iea_dacc:
   3041	lea		-LOCAL_SIZE(%a6),%sp
   3042
   3043	movc		%pcr,%d1
   3044	btst		&0x1,%d1
   3045	bne.b		iea_dacc_cont
   3046	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
   3047	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3048iea_dacc_cont:
   3049	mov.l		(%a6),%a6
   3050
   3051	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
   3052	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
   3053	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
   3054	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
   3055	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
   3056	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
   3057
   3058	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
   3059	add.w		&LOCAL_SIZE-0x4,%sp
   3060
   3061	bra.b		iea_acc_done
   3062
   3063#########################################################################
   3064# XDEF ****************************************************************	#
   3065#	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
   3066#									#
   3067#	This handler should be the first code executed upon taking the	#
   3068#	FP Operand Error exception in an operating system.		#
   3069#									#
   3070# XREF ****************************************************************	#
   3071#	_imem_read_long() - read instruction longword			#
   3072#	fix_skewed_ops() - adjust src operand in fsave frame		#
   3073#	_real_operr() - "callout" to operating system operr handler	#
   3074#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
   3075#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
   3076#	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
   3077#									#
   3078# INPUT ***************************************************************	#
   3079#	- The system stack contains the FP Operr exception frame	#
   3080#	- The fsave frame contains the source operand			#
   3081#									#
   3082# OUTPUT **************************************************************	#
   3083#	No access error:						#
   3084#	- The system stack is unchanged					#
   3085#	- The fsave frame contains the adjusted src op for opclass 0,2	#
   3086#									#
   3087# ALGORITHM ***********************************************************	#
   3088#	In a system where the FP Operr exception is enabled, the goal	#
   3089# is to get to the handler specified at _real_operr(). But, on the 060,	#
   3090# for opclass zero and two instruction taking this exception, the	#
   3091# input operand in the fsave frame may be incorrect for some cases	#
   3092# and needs to be corrected. This handler calls fix_skewed_ops() to	#
   3093# do just this and then exits through _real_operr().			#
   3094#	For opclass 3 instructions, the 060 doesn't store the default	#
   3095# operr result out to memory or data register file as it should.	#
   3096# This code must emulate the move out before finally exiting through	#
   3097# _real_inex(). The move out, if to memory, is performed using		#
   3098# _mem_write() "callout" routines that may return a failing result.	#
   3099# In this special case, the handler must exit through facc_out()	#
   3100# which creates an access error stack frame from the current operr	#
   3101# stack frame.								#
   3102#									#
   3103#########################################################################
   3104
   3105	global		_fpsp_operr
   3106_fpsp_operr:
   3107
   3108	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3109
   3110	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3111
   3112	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3113	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3114	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3115
   3116# the FPIAR holds the "current PC" of the faulting instruction
   3117	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3118
   3119	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3120	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3121	bsr.l		_imem_read_long		# fetch the instruction words
   3122	mov.l		%d0,EXC_OPWORD(%a6)
   3123
   3124##############################################################################
   3125
   3126	btst		&13,%d0			# is instr an fmove out?
   3127	bne.b		foperr_out		# fmove out
   3128
   3129
   3130# here, we simply see if the operand in the fsave frame needs to be "unskewed".
   3131# this would be the case for opclass two operations with a source infinity or
   3132# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
   3133# cause an operr so we don't need to check for them here.
   3134	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3135	bsr.l		fix_skewed_ops		# fix src op
   3136
   3137foperr_exit:
   3138	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3139	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3140	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3141
   3142	frestore	FP_SRC(%a6)
   3143
   3144	unlk		%a6
   3145	bra.l		_real_operr
   3146
   3147########################################################################
   3148
   3149#
   3150# the hardware does not save the default result to memory on enabled
   3151# operand error exceptions. we do this here before passing control to
   3152# the user operand error handler.
   3153#
   3154# byte, word, and long destination format operations can pass
   3155# through here. we simply need to test the sign of the src
   3156# operand and save the appropriate minimum or maximum integer value
   3157# to the effective address as pointed to by the stacked effective address.
   3158#
   3159# although packed opclass three operations can take operand error
   3160# exceptions, they won't pass through here since they are caught
   3161# first by the unsupported data format exception handler. that handler
   3162# sends them directly to _real_operr() if necessary.
   3163#
   3164foperr_out:
   3165
   3166	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
   3167	andi.w		&0x7fff,%d1
   3168	cmpi.w		%d1,&0x7fff
   3169	bne.b		foperr_out_not_qnan
   3170# the operand is either an infinity or a QNAN.
   3171	tst.l		FP_SRC_LO(%a6)
   3172	bne.b		foperr_out_qnan
   3173	mov.l		FP_SRC_HI(%a6),%d1
   3174	andi.l		&0x7fffffff,%d1
   3175	beq.b		foperr_out_not_qnan
   3176foperr_out_qnan:
   3177	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
   3178	bra.b		foperr_out_jmp
   3179
   3180foperr_out_not_qnan:
   3181	mov.l		&0x7fffffff,%d1
   3182	tst.b		FP_SRC_EX(%a6)
   3183	bpl.b		foperr_out_not_qnan2
   3184	addq.l		&0x1,%d1
   3185foperr_out_not_qnan2:
   3186	mov.l		%d1,L_SCR1(%a6)
   3187
   3188foperr_out_jmp:
   3189	bfextu		%d0{&19:&3},%d0		# extract dst format field
   3190	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
   3191	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
   3192	jmp		(tbl_operr.b,%pc,%a0)
   3193
   3194tbl_operr:
   3195	short		foperr_out_l - tbl_operr # long word integer
   3196	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
   3197	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
   3198	short		foperr_exit  - tbl_operr # packed won't enter here
   3199	short		foperr_out_w - tbl_operr # word integer
   3200	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
   3201	short		foperr_out_b - tbl_operr # byte integer
   3202	short		tbl_operr    - tbl_operr # packed won't enter here
   3203
   3204foperr_out_b:
   3205	mov.b		L_SCR1(%a6),%d0		# load positive default result
   3206	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3207	ble.b		foperr_out_b_save_dn	# yes
   3208	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3209	bsr.l		_dmem_write_byte	# write the default result
   3210
   3211	tst.l		%d1			# did dstore fail?
   3212	bne.l		facc_out_b		# yes
   3213
   3214	bra.w		foperr_exit
   3215foperr_out_b_save_dn:
   3216	andi.w		&0x0007,%d1
   3217	bsr.l		store_dreg_b		# store result to regfile
   3218	bra.w		foperr_exit
   3219
   3220foperr_out_w:
   3221	mov.w		L_SCR1(%a6),%d0		# load positive default result
   3222	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3223	ble.b		foperr_out_w_save_dn	# yes
   3224	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3225	bsr.l		_dmem_write_word	# write the default result
   3226
   3227	tst.l		%d1			# did dstore fail?
   3228	bne.l		facc_out_w		# yes
   3229
   3230	bra.w		foperr_exit
   3231foperr_out_w_save_dn:
   3232	andi.w		&0x0007,%d1
   3233	bsr.l		store_dreg_w		# store result to regfile
   3234	bra.w		foperr_exit
   3235
   3236foperr_out_l:
   3237	mov.l		L_SCR1(%a6),%d0		# load positive default result
   3238	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3239	ble.b		foperr_out_l_save_dn	# yes
   3240	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3241	bsr.l		_dmem_write_long	# write the default result
   3242
   3243	tst.l		%d1			# did dstore fail?
   3244	bne.l		facc_out_l		# yes
   3245
   3246	bra.w		foperr_exit
   3247foperr_out_l_save_dn:
   3248	andi.w		&0x0007,%d1
   3249	bsr.l		store_dreg_l		# store result to regfile
   3250	bra.w		foperr_exit
   3251
   3252#########################################################################
   3253# XDEF ****************************************************************	#
   3254#	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
   3255#									#
   3256#	This handler should be the first code executed upon taking the	#
   3257#	FP Signalling NAN exception in an operating system.		#
   3258#									#
   3259# XREF ****************************************************************	#
   3260#	_imem_read_long() - read instruction longword			#
   3261#	fix_skewed_ops() - adjust src operand in fsave frame		#
   3262#	_real_snan() - "callout" to operating system SNAN handler	#
   3263#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
   3264#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
   3265#	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
   3266#	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
   3267#									#
   3268# INPUT ***************************************************************	#
   3269#	- The system stack contains the FP SNAN exception frame		#
   3270#	- The fsave frame contains the source operand			#
   3271#									#
   3272# OUTPUT **************************************************************	#
   3273#	No access error:						#
   3274#	- The system stack is unchanged					#
   3275#	- The fsave frame contains the adjusted src op for opclass 0,2	#
   3276#									#
   3277# ALGORITHM ***********************************************************	#
   3278#	In a system where the FP SNAN exception is enabled, the goal	#
   3279# is to get to the handler specified at _real_snan(). But, on the 060,	#
   3280# for opclass zero and two instructions taking this exception, the	#
   3281# input operand in the fsave frame may be incorrect for some cases	#
   3282# and needs to be corrected. This handler calls fix_skewed_ops() to	#
   3283# do just this and then exits through _real_snan().			#
   3284#	For opclass 3 instructions, the 060 doesn't store the default	#
   3285# SNAN result out to memory or data register file as it should.		#
   3286# This code must emulate the move out before finally exiting through	#
   3287# _real_snan(). The move out, if to memory, is performed using		#
   3288# _mem_write() "callout" routines that may return a failing result.	#
   3289# In this special case, the handler must exit through facc_out()	#
   3290# which creates an access error stack frame from the current SNAN	#
   3291# stack frame.								#
   3292#	For the case of an extended precision opclass 3 instruction,	#
   3293# if the effective addressing mode was -() or ()+, then the address	#
   3294# register must get updated by calling _calc_ea_fout(). If the <ea>	#
   3295# was -(a7) from supervisor mode, then the exception frame currently	#
   3296# on the system stack must be carefully moved "down" to make room	#
   3297# for the operand being moved.						#
   3298#									#
   3299#########################################################################
   3300
   3301	global		_fpsp_snan
   3302_fpsp_snan:
   3303
   3304	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3305
   3306	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3307
   3308	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3309	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3310	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3311
   3312# the FPIAR holds the "current PC" of the faulting instruction
   3313	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3314
   3315	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3316	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3317	bsr.l		_imem_read_long		# fetch the instruction words
   3318	mov.l		%d0,EXC_OPWORD(%a6)
   3319
   3320##############################################################################
   3321
   3322	btst		&13,%d0			# is instr an fmove out?
   3323	bne.w		fsnan_out		# fmove out
   3324
   3325
   3326# here, we simply see if the operand in the fsave frame needs to be "unskewed".
   3327# this would be the case for opclass two operations with a source infinity or
   3328# denorm operand in the sgl or dbl format. NANs also become skewed and must be
   3329# fixed here.
   3330	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3331	bsr.l		fix_skewed_ops		# fix src op
   3332
   3333fsnan_exit:
   3334	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3335	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3336	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3337
   3338	frestore	FP_SRC(%a6)
   3339
   3340	unlk		%a6
   3341	bra.l		_real_snan
   3342
   3343########################################################################
   3344
   3345#
   3346# the hardware does not save the default result to memory on enabled
   3347# snan exceptions. we do this here before passing control to
   3348# the user snan handler.
   3349#
   3350# byte, word, long, and packed destination format operations can pass
   3351# through here. since packed format operations already were handled by
   3352# fpsp_unsupp(), then we need to do nothing else for them here.
   3353# for byte, word, and long, we simply need to test the sign of the src
   3354# operand and save the appropriate minimum or maximum integer value
   3355# to the effective address as pointed to by the stacked effective address.
   3356#
   3357fsnan_out:
   3358
   3359	bfextu		%d0{&19:&3},%d0		# extract dst format field
   3360	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
   3361	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
   3362	jmp		(tbl_snan.b,%pc,%a0)
   3363
   3364tbl_snan:
   3365	short		fsnan_out_l - tbl_snan # long word integer
   3366	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
   3367	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
   3368	short		tbl_snan    - tbl_snan # packed needs no help
   3369	short		fsnan_out_w - tbl_snan # word integer
   3370	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
   3371	short		fsnan_out_b - tbl_snan # byte integer
   3372	short		tbl_snan    - tbl_snan # packed needs no help
   3373
   3374fsnan_out_b:
   3375	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
   3376	bset		&6,%d0			# set SNAN bit
   3377	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3378	ble.b		fsnan_out_b_dn		# yes
   3379	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3380	bsr.l		_dmem_write_byte	# write the default result
   3381
   3382	tst.l		%d1			# did dstore fail?
   3383	bne.l		facc_out_b		# yes
   3384
   3385	bra.w		fsnan_exit
   3386fsnan_out_b_dn:
   3387	andi.w		&0x0007,%d1
   3388	bsr.l		store_dreg_b		# store result to regfile
   3389	bra.w		fsnan_exit
   3390
   3391fsnan_out_w:
   3392	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
   3393	bset		&14,%d0			# set SNAN bit
   3394	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3395	ble.b		fsnan_out_w_dn		# yes
   3396	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3397	bsr.l		_dmem_write_word	# write the default result
   3398
   3399	tst.l		%d1			# did dstore fail?
   3400	bne.l		facc_out_w		# yes
   3401
   3402	bra.w		fsnan_exit
   3403fsnan_out_w_dn:
   3404	andi.w		&0x0007,%d1
   3405	bsr.l		store_dreg_w		# store result to regfile
   3406	bra.w		fsnan_exit
   3407
   3408fsnan_out_l:
   3409	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
   3410	bset		&30,%d0			# set SNAN bit
   3411	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3412	ble.b		fsnan_out_l_dn		# yes
   3413	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3414	bsr.l		_dmem_write_long	# write the default result
   3415
   3416	tst.l		%d1			# did dstore fail?
   3417	bne.l		facc_out_l		# yes
   3418
   3419	bra.w		fsnan_exit
   3420fsnan_out_l_dn:
   3421	andi.w		&0x0007,%d1
   3422	bsr.l		store_dreg_l		# store result to regfile
   3423	bra.w		fsnan_exit
   3424
   3425fsnan_out_s:
   3426	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3427	ble.b		fsnan_out_d_dn		# yes
   3428	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
   3429	andi.l		&0x80000000,%d0		# keep sign
   3430	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
   3431	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
   3432	lsr.l		&0x8,%d1		# shift mantissa for sgl
   3433	or.l		%d1,%d0			# create sgl SNAN
   3434	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3435	bsr.l		_dmem_write_long	# write the default result
   3436
   3437	tst.l		%d1			# did dstore fail?
   3438	bne.l		facc_out_l		# yes
   3439
   3440	bra.w		fsnan_exit
   3441fsnan_out_d_dn:
   3442	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
   3443	andi.l		&0x80000000,%d0		# keep sign
   3444	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
   3445	mov.l		%d1,-(%sp)
   3446	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
   3447	lsr.l		&0x8,%d1		# shift mantissa for sgl
   3448	or.l		%d1,%d0			# create sgl SNAN
   3449	mov.l		(%sp)+,%d1
   3450	andi.w		&0x0007,%d1
   3451	bsr.l		store_dreg_l		# store result to regfile
   3452	bra.w		fsnan_exit
   3453
   3454fsnan_out_d:
   3455	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
   3456	andi.l		&0x80000000,%d0		# keep sign
   3457	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
   3458	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
   3459	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
   3460	mov.l		&11,%d0			# load shift amt
   3461	lsr.l		%d0,%d1
   3462	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
   3463	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
   3464	andi.l		&0x000007ff,%d1
   3465	ror.l		%d0,%d1
   3466	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
   3467	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
   3468	lsr.l		%d0,%d1
   3469	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
   3470	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   3471	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
   3472	movq.l		&0x8,%d0		# pass: size of 8 bytes
   3473	bsr.l		_dmem_write		# write the default result
   3474
   3475	tst.l		%d1			# did dstore fail?
   3476	bne.l		facc_out_d		# yes
   3477
   3478	bra.w		fsnan_exit
   3479
   3480# for extended precision, if the addressing mode is pre-decrement or
   3481# post-increment, then the address register did not get updated.
   3482# in addition, for pre-decrement, the stacked <ea> is incorrect.
   3483fsnan_out_x:
   3484	clr.b		SPCOND_FLG(%a6)		# clear special case flag
   3485
   3486	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
   3487	clr.w		2+FP_SCR0(%a6)
   3488	mov.l		FP_SRC_HI(%a6),%d0
   3489	bset		&30,%d0
   3490	mov.l		%d0,FP_SCR0_HI(%a6)
   3491	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
   3492
   3493	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
   3494	bne.b		fsnan_out_x_s		# yes
   3495
   3496	mov.l		%usp,%a0		# fetch user stack pointer
   3497	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
   3498	mov.l		(%a6),EXC_A6(%a6)
   3499
   3500	bsr.l		_calc_ea_fout		# find the correct ea,update An
   3501	mov.l		%a0,%a1
   3502	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
   3503
   3504	mov.l		EXC_A7(%a6),%a0
   3505	mov.l		%a0,%usp		# restore user stack pointer
   3506	mov.l		EXC_A6(%a6),(%a6)
   3507
   3508fsnan_out_x_save:
   3509	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   3510	movq.l		&0xc,%d0		# pass: size of extended
   3511	bsr.l		_dmem_write		# write the default result
   3512
   3513	tst.l		%d1			# did dstore fail?
   3514	bne.l		facc_out_x		# yes
   3515
   3516	bra.w		fsnan_exit
   3517
   3518fsnan_out_x_s:
   3519	mov.l		(%a6),EXC_A6(%a6)
   3520
   3521	bsr.l		_calc_ea_fout		# find the correct ea,update An
   3522	mov.l		%a0,%a1
   3523	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
   3524
   3525	mov.l		EXC_A6(%a6),(%a6)
   3526
   3527	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
   3528	bne.b		fsnan_out_x_save	# no
   3529
   3530# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
   3531	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3532	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3533	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3534
   3535	frestore	FP_SRC(%a6)
   3536
   3537	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
   3538
   3539	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   3540	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
   3541	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   3542
   3543	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
   3544	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
   3545	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
   3546
   3547	add.l		&LOCAL_SIZE-0x8,%sp
   3548
   3549	bra.l		_real_snan
   3550
   3551#########################################################################
   3552# XDEF ****************************************************************	#
   3553#	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
   3554#									#
   3555#	This handler should be the first code executed upon taking the	#
   3556#	FP Inexact exception in an operating system.			#
   3557#									#
   3558# XREF ****************************************************************	#
   3559#	_imem_read_long() - read instruction longword			#
   3560#	fix_skewed_ops() - adjust src operand in fsave frame		#
   3561#	set_tag_x() - determine optype of src/dst operands		#
   3562#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
   3563#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
   3564#	load_fpn2() - load dst operand from FP regfile			#
   3565#	smovcr() - emulate an "fmovcr" instruction			#
   3566#	fout() - emulate an opclass 3 instruction			#
   3567#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
   3568#	_real_inex() - "callout" to operating system inexact handler	#
   3569#									#
   3570# INPUT ***************************************************************	#
   3571#	- The system stack contains the FP Inexact exception frame	#
   3572#	- The fsave frame contains the source operand			#
   3573#									#
   3574# OUTPUT **************************************************************	#
   3575#	- The system stack is unchanged					#
   3576#	- The fsave frame contains the adjusted src op for opclass 0,2	#
   3577#									#
   3578# ALGORITHM ***********************************************************	#
   3579#	In a system where the FP Inexact exception is enabled, the goal	#
   3580# is to get to the handler specified at _real_inex(). But, on the 060,	#
   3581# for opclass zero and two instruction taking this exception, the	#
   3582# hardware doesn't store the correct result to the destination FP	#
   3583# register as did the '040 and '881/2. This handler must emulate the	#
   3584# instruction in order to get this value and then store it to the	#
   3585# correct register before calling _real_inex().				#
   3586#	For opclass 3 instructions, the 060 doesn't store the default	#
   3587# inexact result out to memory or data register file as it should.	#
   3588# This code must emulate the move out by calling fout() before finally	#
   3589# exiting through _real_inex().						#
   3590#									#
   3591#########################################################################
   3592
   3593	global		_fpsp_inex
   3594_fpsp_inex:
   3595
   3596	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3597
   3598	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3599
   3600	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3601	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3602	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3603
   3604# the FPIAR holds the "current PC" of the faulting instruction
   3605	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3606
   3607	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3608	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3609	bsr.l		_imem_read_long		# fetch the instruction words
   3610	mov.l		%d0,EXC_OPWORD(%a6)
   3611
   3612##############################################################################
   3613
   3614	btst		&13,%d0			# is instr an fmove out?
   3615	bne.w		finex_out		# fmove out
   3616
   3617
   3618# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
   3619# longword integer directly into the upper longword of the mantissa along
   3620# w/ an exponent value of 0x401e. we convert this to extended precision here.
   3621	bfextu		%d0{&19:&3},%d0		# fetch instr size
   3622	bne.b		finex_cont		# instr size is not long
   3623	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
   3624	bne.b		finex_cont		# no
   3625	fmov.l		&0x0,%fpcr
   3626	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
   3627	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
   3628	mov.w		&0xe001,0x2+FP_SRC(%a6)
   3629
   3630finex_cont:
   3631	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3632	bsr.l		fix_skewed_ops		# fix src op
   3633
   3634# Here, we zero the ccode and exception byte field since we're going to
   3635# emulate the whole instruction. Notice, though, that we don't kill the
   3636# INEX1 bit. This is because a packed op has long since been converted
   3637# to extended before arriving here. Therefore, we need to retain the
   3638# INEX1 bit from when the operand was first converted.
   3639	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
   3640
   3641	fmov.l		&0x0,%fpcr		# zero current control regs
   3642	fmov.l		&0x0,%fpsr
   3643
   3644	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
   3645	cmpi.b		%d1,&0x17		# is op an fmovecr?
   3646	beq.w		finex_fmovcr		# yes
   3647
   3648	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3649	bsr.l		set_tag_x		# tag the operand type
   3650	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
   3651
   3652# bits four and five of the fp extension word separate the monadic and dyadic
   3653# operations that can pass through fpsp_inex(). remember that fcmp and ftst
   3654# will never take this exception, but fsincos will.
   3655	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   3656	beq.b		finex_extract		# monadic
   3657
   3658	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
   3659	bne.b		finex_extract		# yes
   3660
   3661	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   3662	bsr.l		load_fpn2		# load dst into FP_DST
   3663
   3664	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   3665	bsr.l		set_tag_x		# tag the operand type
   3666	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   3667	bne.b		finex_op2_done		# no
   3668	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   3669finex_op2_done:
   3670	mov.b		%d0,DTAG(%a6)		# save dst optype tag
   3671
   3672finex_extract:
   3673	clr.l		%d0
   3674	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
   3675
   3676	mov.b		1+EXC_CMDREG(%a6),%d1
   3677	andi.w		&0x007f,%d1		# extract extension
   3678
   3679	lea		FP_SRC(%a6),%a0
   3680	lea		FP_DST(%a6),%a1
   3681
   3682	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
   3683	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   3684
   3685# the operation has been emulated. the result is in fp0.
   3686finex_save:
   3687	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
   3688	bsr.l		store_fpreg
   3689
   3690finex_exit:
   3691	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3692	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3693	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3694
   3695	frestore	FP_SRC(%a6)
   3696
   3697	unlk		%a6
   3698	bra.l		_real_inex
   3699
   3700finex_fmovcr:
   3701	clr.l		%d0
   3702	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
   3703	mov.b		1+EXC_CMDREG(%a6),%d1
   3704	andi.l		&0x0000007f,%d1		# pass rom offset
   3705	bsr.l		smovcr
   3706	bra.b		finex_save
   3707
   3708########################################################################
   3709
   3710#
   3711# the hardware does not save the default result to memory on enabled
   3712# inexact exceptions. we do this here before passing control to
   3713# the user inexact handler.
   3714#
   3715# byte, word, and long destination format operations can pass
   3716# through here. so can double and single precision.
   3717# although packed opclass three operations can take inexact
   3718# exceptions, they won't pass through here since they are caught
   3719# first by the unsupported data format exception handler. that handler
   3720# sends them directly to _real_inex() if necessary.
   3721#
   3722finex_out:
   3723
   3724	mov.b		&NORM,STAG(%a6)		# src is a NORM
   3725
   3726	clr.l		%d0
   3727	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
   3728
   3729	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
   3730
   3731	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   3732
   3733	bsr.l		fout			# store the default result
   3734
   3735	bra.b		finex_exit
   3736
   3737#########################################################################
   3738# XDEF ****************************************************************	#
   3739#	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
   3740#									#
   3741#	This handler should be the first code executed upon taking	#
   3742#	the FP DZ exception in an operating system.			#
   3743#									#
   3744# XREF ****************************************************************	#
   3745#	_imem_read_long() - read instruction longword from memory	#
   3746#	fix_skewed_ops() - adjust fsave operand				#
   3747#	_real_dz() - "callout" exit point from FP DZ handler		#
   3748#									#
   3749# INPUT ***************************************************************	#
   3750#	- The system stack contains the FP DZ exception stack.		#
   3751#	- The fsave frame contains the source operand.			#
   3752#									#
   3753# OUTPUT **************************************************************	#
   3754#	- The system stack contains the FP DZ exception stack.		#
   3755#	- The fsave frame contains the adjusted source operand.		#
   3756#									#
   3757# ALGORITHM ***********************************************************	#
   3758#	In a system where the DZ exception is enabled, the goal is to	#
   3759# get to the handler specified at _real_dz(). But, on the 060, when the	#
   3760# exception is taken, the input operand in the fsave state frame may	#
   3761# be incorrect for some cases and need to be adjusted. So, this package	#
   3762# adjusts the operand using fix_skewed_ops() and then branches to	#
   3763# _real_dz().								#
   3764#									#
   3765#########################################################################
   3766
   3767	global		_fpsp_dz
   3768_fpsp_dz:
   3769
   3770	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3771
   3772	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3773
   3774	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3775	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3776	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3777
   3778# the FPIAR holds the "current PC" of the faulting instruction
   3779	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3780
   3781	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3782	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3783	bsr.l		_imem_read_long		# fetch the instruction words
   3784	mov.l		%d0,EXC_OPWORD(%a6)
   3785
   3786##############################################################################
   3787
   3788
   3789# here, we simply see if the operand in the fsave frame needs to be "unskewed".
   3790# this would be the case for opclass two operations with a source zero
   3791# in the sgl or dbl format.
   3792	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3793	bsr.l		fix_skewed_ops		# fix src op
   3794
   3795fdz_exit:
   3796	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3797	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3798	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3799
   3800	frestore	FP_SRC(%a6)
   3801
   3802	unlk		%a6
   3803	bra.l		_real_dz
   3804
   3805#########################################################################
   3806# XDEF ****************************************************************	#
   3807#	_fpsp_fline(): 060FPSP entry point for "Line F emulator"	#
   3808#		       exception when the "reduced" version of the	#
   3809#		       FPSP is implemented that does not emulate	#
   3810#		       FP unimplemented instructions.			#
   3811#									#
   3812#	This handler should be the first code executed upon taking a	#
   3813#	"Line F Emulator" exception in an operating system integrating	#
   3814#	the reduced version of 060FPSP.					#
   3815#									#
   3816# XREF ****************************************************************	#
   3817#	_real_fpu_disabled() - Handle "FPU disabled" exceptions		#
   3818#	_real_fline() - Handle all other cases (treated equally)	#
   3819#									#
   3820# INPUT ***************************************************************	#
   3821#	- The system stack contains a "Line F Emulator" exception	#
   3822#	  stack frame.							#
   3823#									#
   3824# OUTPUT **************************************************************	#
   3825#	- The system stack is unchanged.				#
   3826#									#
   3827# ALGORITHM ***********************************************************	#
   3828#	When a "Line F Emulator" exception occurs in a system where	#
   3829# "FPU Unimplemented" instructions will not be emulated, the exception	#
   3830# can occur because then FPU is disabled or the instruction is to be	#
   3831# classifed as "Line F". This module determines which case exists and	#
   3832# calls the appropriate "callout".					#
   3833#									#
   3834#########################################################################
   3835
   3836	global		_fpsp_fline
   3837_fpsp_fline:
   3838
   3839# check to see if the FPU is disabled. if so, jump to the OS entry
   3840# point for that condition.
   3841	cmpi.w		0x6(%sp),&0x402c
   3842	beq.l		_real_fpu_disabled
   3843
   3844	bra.l		_real_fline
   3845
   3846#########################################################################
   3847# XDEF ****************************************************************	#
   3848#	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
   3849#									#
   3850# XREF ****************************************************************	#
   3851#	inc_areg() - increment an address register			#
   3852#	dec_areg() - decrement an address register			#
   3853#									#
   3854# INPUT ***************************************************************	#
   3855#	d0 = number of bytes to adjust <ea> by				#
   3856#									#
   3857# OUTPUT **************************************************************	#
   3858#	None								#
   3859#									#
   3860# ALGORITHM ***********************************************************	#
   3861# "Dummy" CALCulate Effective Address:					#
   3862#	The stacked <ea> for FP unimplemented instructions and opclass	#
   3863#	two packed instructions is correct with the exception of...	#
   3864#									#
   3865#	1) -(An)   : The register is not updated regardless of size.	#
   3866#		     Also, for extended precision and packed, the	#
   3867#		     stacked <ea> value is 8 bytes too big		#
   3868#	2) (An)+   : The register is not updated.			#
   3869#	3) #<data> : The upper longword of the immediate operand is	#
   3870#		     stacked b,w,l and s sizes are completely stacked.	#
   3871#		     d,x, and p are not.				#
   3872#									#
   3873#########################################################################
   3874
   3875	global		_dcalc_ea
   3876_dcalc_ea:
   3877	mov.l		%d0, %a0		# move # bytes to %a0
   3878
   3879	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
   3880	mov.l		%d0, %d1		# make a copy
   3881
   3882	andi.w		&0x38, %d0		# extract mode field
   3883	andi.l		&0x7, %d1		# extract reg  field
   3884
   3885	cmpi.b		%d0,&0x18		# is mode (An)+ ?
   3886	beq.b		dcea_pi			# yes
   3887
   3888	cmpi.b		%d0,&0x20		# is mode -(An) ?
   3889	beq.b		dcea_pd			# yes
   3890
   3891	or.w		%d1,%d0			# concat mode,reg
   3892	cmpi.b		%d0,&0x3c		# is mode #<data>?
   3893
   3894	beq.b		dcea_imm		# yes
   3895
   3896	mov.l		EXC_EA(%a6),%a0		# return <ea>
   3897	rts
   3898
   3899# need to set immediate data flag here since we'll need to do
   3900# an imem_read to fetch this later.
   3901dcea_imm:
   3902	mov.b		&immed_flg,SPCOND_FLG(%a6)
   3903	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
   3904	rts
   3905
   3906# here, the <ea> is stacked correctly. however, we must update the
   3907# address register...
   3908dcea_pi:
   3909	mov.l		%a0,%d0			# pass amt to inc by
   3910	bsr.l		inc_areg		# inc addr register
   3911
   3912	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   3913	rts
   3914
   3915# the <ea> is stacked correctly for all but extended and packed which
   3916# the <ea>s are 8 bytes too large.
   3917# it would make no sense to have a pre-decrement to a7 in supervisor
   3918# mode so we don't even worry about this tricky case here : )
   3919dcea_pd:
   3920	mov.l		%a0,%d0			# pass amt to dec by
   3921	bsr.l		dec_areg		# dec addr register
   3922
   3923	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   3924
   3925	cmpi.b		%d0,&0xc		# is opsize ext or packed?
   3926	beq.b		dcea_pd2		# yes
   3927	rts
   3928dcea_pd2:
   3929	sub.l		&0x8,%a0		# correct <ea>
   3930	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
   3931	rts
   3932
   3933#########################################################################
   3934# XDEF ****************************************************************	#
   3935#	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
   3936#			 and packed data opclass 3 operations.		#
   3937#									#
   3938# XREF ****************************************************************	#
   3939#	None								#
   3940#									#
   3941# INPUT ***************************************************************	#
   3942#	None								#
   3943#									#
   3944# OUTPUT **************************************************************	#
   3945#	a0 = return correct effective address				#
   3946#									#
   3947# ALGORITHM ***********************************************************	#
   3948#	For opclass 3 extended and packed data operations, the <ea>	#
   3949# stacked for the exception is incorrect for -(an) and (an)+ addressing	#
   3950# modes. Also, while we're at it, the index register itself must get	#
   3951# updated.								#
   3952#	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
   3953# and return that value as the correct <ea> and store that value in An.	#
   3954# For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
   3955#									#
   3956#########################################################################
   3957
   3958# This calc_ea is currently used to retrieve the correct <ea>
   3959# for fmove outs of type extended and packed.
   3960	global		_calc_ea_fout
   3961_calc_ea_fout:
   3962	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
   3963	mov.l		%d0,%d1			# make a copy
   3964
   3965	andi.w		&0x38,%d0		# extract mode field
   3966	andi.l		&0x7,%d1		# extract reg  field
   3967
   3968	cmpi.b		%d0,&0x18		# is mode (An)+ ?
   3969	beq.b		ceaf_pi			# yes
   3970
   3971	cmpi.b		%d0,&0x20		# is mode -(An) ?
   3972	beq.w		ceaf_pd			# yes
   3973
   3974	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   3975	rts
   3976
   3977# (An)+ : extended and packed fmove out
   3978#	: stacked <ea> is correct
   3979#	: "An" not updated
   3980ceaf_pi:
   3981	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
   3982	mov.l		EXC_EA(%a6),%a0
   3983	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
   3984
   3985	swbeg		&0x8
   3986tbl_ceaf_pi:
   3987	short		ceaf_pi0 - tbl_ceaf_pi
   3988	short		ceaf_pi1 - tbl_ceaf_pi
   3989	short		ceaf_pi2 - tbl_ceaf_pi
   3990	short		ceaf_pi3 - tbl_ceaf_pi
   3991	short		ceaf_pi4 - tbl_ceaf_pi
   3992	short		ceaf_pi5 - tbl_ceaf_pi
   3993	short		ceaf_pi6 - tbl_ceaf_pi
   3994	short		ceaf_pi7 - tbl_ceaf_pi
   3995
   3996ceaf_pi0:
   3997	addi.l		&0xc,EXC_DREGS+0x8(%a6)
   3998	rts
   3999ceaf_pi1:
   4000	addi.l		&0xc,EXC_DREGS+0xc(%a6)
   4001	rts
   4002ceaf_pi2:
   4003	add.l		&0xc,%a2
   4004	rts
   4005ceaf_pi3:
   4006	add.l		&0xc,%a3
   4007	rts
   4008ceaf_pi4:
   4009	add.l		&0xc,%a4
   4010	rts
   4011ceaf_pi5:
   4012	add.l		&0xc,%a5
   4013	rts
   4014ceaf_pi6:
   4015	addi.l		&0xc,EXC_A6(%a6)
   4016	rts
   4017ceaf_pi7:
   4018	mov.b		&mia7_flg,SPCOND_FLG(%a6)
   4019	addi.l		&0xc,EXC_A7(%a6)
   4020	rts
   4021
   4022# -(An) : extended and packed fmove out
   4023#	: stacked <ea> = actual <ea> + 8
   4024#	: "An" not updated
   4025ceaf_pd:
   4026	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
   4027	mov.l		EXC_EA(%a6),%a0
   4028	sub.l		&0x8,%a0
   4029	sub.l		&0x8,EXC_EA(%a6)
   4030	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
   4031
   4032	swbeg		&0x8
   4033tbl_ceaf_pd:
   4034	short		ceaf_pd0 - tbl_ceaf_pd
   4035	short		ceaf_pd1 - tbl_ceaf_pd
   4036	short		ceaf_pd2 - tbl_ceaf_pd
   4037	short		ceaf_pd3 - tbl_ceaf_pd
   4038	short		ceaf_pd4 - tbl_ceaf_pd
   4039	short		ceaf_pd5 - tbl_ceaf_pd
   4040	short		ceaf_pd6 - tbl_ceaf_pd
   4041	short		ceaf_pd7 - tbl_ceaf_pd
   4042
   4043ceaf_pd0:
   4044	mov.l		%a0,EXC_DREGS+0x8(%a6)
   4045	rts
   4046ceaf_pd1:
   4047	mov.l		%a0,EXC_DREGS+0xc(%a6)
   4048	rts
   4049ceaf_pd2:
   4050	mov.l		%a0,%a2
   4051	rts
   4052ceaf_pd3:
   4053	mov.l		%a0,%a3
   4054	rts
   4055ceaf_pd4:
   4056	mov.l		%a0,%a4
   4057	rts
   4058ceaf_pd5:
   4059	mov.l		%a0,%a5
   4060	rts
   4061ceaf_pd6:
   4062	mov.l		%a0,EXC_A6(%a6)
   4063	rts
   4064ceaf_pd7:
   4065	mov.l		%a0,EXC_A7(%a6)
   4066	mov.b		&mda7_flg,SPCOND_FLG(%a6)
   4067	rts
   4068
   4069#
   4070# This table holds the offsets of the emulation routines for each individual
   4071# math operation relative to the address of this table. Included are
   4072# routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
   4073# this table is for the version if the 060FPSP without transcendentals.
   4074# The location within the table is determined by the extension bits of the
   4075# operation longword.
   4076#
   4077
   4078	swbeg		&109
   4079tbl_unsupp:
   4080	long		fin		- tbl_unsupp	# 00: fmove
   4081	long		fint		- tbl_unsupp	# 01: fint
   4082	long		tbl_unsupp	- tbl_unsupp	# 02: fsinh
   4083	long		fintrz		- tbl_unsupp	# 03: fintrz
   4084	long		fsqrt		- tbl_unsupp	# 04: fsqrt
   4085	long		tbl_unsupp	- tbl_unsupp
   4086	long		tbl_unsupp	- tbl_unsupp	# 06: flognp1
   4087	long		tbl_unsupp	- tbl_unsupp
   4088	long		tbl_unsupp	- tbl_unsupp	# 08: fetoxm1
   4089	long		tbl_unsupp	- tbl_unsupp	# 09: ftanh
   4090	long		tbl_unsupp	- tbl_unsupp	# 0a: fatan
   4091	long		tbl_unsupp	- tbl_unsupp
   4092	long		tbl_unsupp	- tbl_unsupp	# 0c: fasin
   4093	long		tbl_unsupp	- tbl_unsupp	# 0d: fatanh
   4094	long		tbl_unsupp	- tbl_unsupp	# 0e: fsin
   4095	long		tbl_unsupp	- tbl_unsupp	# 0f: ftan
   4096	long		tbl_unsupp	- tbl_unsupp	# 10: fetox
   4097	long		tbl_unsupp	- tbl_unsupp	# 11: ftwotox
   4098	long		tbl_unsupp	- tbl_unsupp	# 12: ftentox
   4099	long		tbl_unsupp	- tbl_unsupp
   4100	long		tbl_unsupp	- tbl_unsupp	# 14: flogn
   4101	long		tbl_unsupp	- tbl_unsupp	# 15: flog10
   4102	long		tbl_unsupp	- tbl_unsupp	# 16: flog2
   4103	long		tbl_unsupp	- tbl_unsupp
   4104	long		fabs		- tbl_unsupp	# 18: fabs
   4105	long		tbl_unsupp	- tbl_unsupp	# 19: fcosh
   4106	long		fneg		- tbl_unsupp	# 1a: fneg
   4107	long		tbl_unsupp	- tbl_unsupp
   4108	long		tbl_unsupp	- tbl_unsupp	# 1c: facos
   4109	long		tbl_unsupp	- tbl_unsupp	# 1d: fcos
   4110	long		tbl_unsupp	- tbl_unsupp	# 1e: fgetexp
   4111	long		tbl_unsupp	- tbl_unsupp	# 1f: fgetman
   4112	long		fdiv		- tbl_unsupp	# 20: fdiv
   4113	long		tbl_unsupp	- tbl_unsupp	# 21: fmod
   4114	long		fadd		- tbl_unsupp	# 22: fadd
   4115	long		fmul		- tbl_unsupp	# 23: fmul
   4116	long		fsgldiv		- tbl_unsupp	# 24: fsgldiv
   4117	long		tbl_unsupp	- tbl_unsupp	# 25: frem
   4118	long		tbl_unsupp	- tbl_unsupp	# 26: fscale
   4119	long		fsglmul		- tbl_unsupp	# 27: fsglmul
   4120	long		fsub		- tbl_unsupp	# 28: fsub
   4121	long		tbl_unsupp	- tbl_unsupp
   4122	long		tbl_unsupp	- tbl_unsupp
   4123	long		tbl_unsupp	- tbl_unsupp
   4124	long		tbl_unsupp	- tbl_unsupp
   4125	long		tbl_unsupp	- tbl_unsupp
   4126	long		tbl_unsupp	- tbl_unsupp
   4127	long		tbl_unsupp	- tbl_unsupp
   4128	long		tbl_unsupp	- tbl_unsupp	# 30: fsincos
   4129	long		tbl_unsupp	- tbl_unsupp	# 31: fsincos
   4130	long		tbl_unsupp	- tbl_unsupp	# 32: fsincos
   4131	long		tbl_unsupp	- tbl_unsupp	# 33: fsincos
   4132	long		tbl_unsupp	- tbl_unsupp	# 34: fsincos
   4133	long		tbl_unsupp	- tbl_unsupp	# 35: fsincos
   4134	long		tbl_unsupp	- tbl_unsupp	# 36: fsincos
   4135	long		tbl_unsupp	- tbl_unsupp	# 37: fsincos
   4136	long		fcmp		- tbl_unsupp	# 38: fcmp
   4137	long		tbl_unsupp	- tbl_unsupp
   4138	long		ftst		- tbl_unsupp	# 3a: ftst
   4139	long		tbl_unsupp	- tbl_unsupp
   4140	long		tbl_unsupp	- tbl_unsupp
   4141	long		tbl_unsupp	- tbl_unsupp
   4142	long		tbl_unsupp	- tbl_unsupp
   4143	long		tbl_unsupp	- tbl_unsupp
   4144	long		fsin		- tbl_unsupp	# 40: fsmove
   4145	long		fssqrt		- tbl_unsupp	# 41: fssqrt
   4146	long		tbl_unsupp	- tbl_unsupp
   4147	long		tbl_unsupp	- tbl_unsupp
   4148	long		fdin		- tbl_unsupp	# 44: fdmove
   4149	long		fdsqrt		- tbl_unsupp	# 45: fdsqrt
   4150	long		tbl_unsupp	- tbl_unsupp
   4151	long		tbl_unsupp	- tbl_unsupp
   4152	long		tbl_unsupp	- tbl_unsupp
   4153	long		tbl_unsupp	- tbl_unsupp
   4154	long		tbl_unsupp	- tbl_unsupp
   4155	long		tbl_unsupp	- tbl_unsupp
   4156	long		tbl_unsupp	- tbl_unsupp
   4157	long		tbl_unsupp	- tbl_unsupp
   4158	long		tbl_unsupp	- tbl_unsupp
   4159	long		tbl_unsupp	- tbl_unsupp
   4160	long		tbl_unsupp	- tbl_unsupp
   4161	long		tbl_unsupp	- tbl_unsupp
   4162	long		tbl_unsupp	- tbl_unsupp
   4163	long		tbl_unsupp	- tbl_unsupp
   4164	long		tbl_unsupp	- tbl_unsupp
   4165	long		tbl_unsupp	- tbl_unsupp
   4166	long		tbl_unsupp	- tbl_unsupp
   4167	long		tbl_unsupp	- tbl_unsupp
   4168	long		fsabs		- tbl_unsupp	# 58: fsabs
   4169	long		tbl_unsupp	- tbl_unsupp
   4170	long		fsneg		- tbl_unsupp	# 5a: fsneg
   4171	long		tbl_unsupp	- tbl_unsupp
   4172	long		fdabs		- tbl_unsupp	# 5c: fdabs
   4173	long		tbl_unsupp	- tbl_unsupp
   4174	long		fdneg		- tbl_unsupp	# 5e: fdneg
   4175	long		tbl_unsupp	- tbl_unsupp
   4176	long		fsdiv		- tbl_unsupp	# 60: fsdiv
   4177	long		tbl_unsupp	- tbl_unsupp
   4178	long		fsadd		- tbl_unsupp	# 62: fsadd
   4179	long		fsmul		- tbl_unsupp	# 63: fsmul
   4180	long		fddiv		- tbl_unsupp	# 64: fddiv
   4181	long		tbl_unsupp	- tbl_unsupp
   4182	long		fdadd		- tbl_unsupp	# 66: fdadd
   4183	long		fdmul		- tbl_unsupp	# 67: fdmul
   4184	long		fssub		- tbl_unsupp	# 68: fssub
   4185	long		tbl_unsupp	- tbl_unsupp
   4186	long		tbl_unsupp	- tbl_unsupp
   4187	long		tbl_unsupp	- tbl_unsupp
   4188	long		fdsub		- tbl_unsupp	# 6c: fdsub
   4189
   4190#################################################
   4191# Add this here so non-fp modules can compile.
   4192# (smovcr is called from fpsp_inex.)
   4193	global		smovcr
   4194smovcr:
   4195	bra.b		smovcr
   4196
   4197#########################################################################
   4198# XDEF ****************************************************************	#
   4199#	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
   4200#									#
   4201# XREF ****************************************************************	#
   4202#	fetch_dreg() - fetch data register				#
   4203#	{i,d,}mem_read() - fetch data from memory			#
   4204#	_mem_write() - write data to memory				#
   4205#	iea_iacc() - instruction memory access error occurred		#
   4206#	iea_dacc() - data memory access error occurred			#
   4207#	restore() - restore An index regs if access error occurred	#
   4208#									#
   4209# INPUT ***************************************************************	#
   4210#	None								#
   4211#									#
   4212# OUTPUT **************************************************************	#
   4213#	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
   4214#		d0 = size of dump					#
   4215#		d1 = Dn							#
   4216#	Else if instruction access error,				#
   4217#		d0 = FSLW						#
   4218#	Else if data access error,					#
   4219#		d0 = FSLW						#
   4220#		a0 = address of fault					#
   4221#	Else								#
   4222#		none.							#
   4223#									#
   4224# ALGORITHM ***********************************************************	#
   4225#	The effective address must be calculated since this is entered	#
   4226# from an "Unimplemented Effective Address" exception handler. So, we	#
   4227# have our own fcalc_ea() routine here. If an access error is flagged	#
   4228# by a _{i,d,}mem_read() call, we must exit through the special		#
   4229# handler.								#
   4230#	The data register is determined and its value loaded to get the	#
   4231# string of FP registers affected. This value is used as an index into	#
   4232# a lookup table such that we can determine the number of bytes		#
   4233# involved.								#
   4234#	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
   4235# to read in all FP values. Again, _mem_read() may fail and require a	#
   4236# special exit.								#
   4237#	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
   4238# to write all FP values. _mem_write() may also fail.			#
   4239#	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
   4240# then we return the size of the dump and the string to the caller	#
   4241# so that the move can occur outside of this routine. This special	#
   4242# case is required so that moves to the system stack are handled	#
   4243# correctly.								#
   4244#									#
   4245# DYNAMIC:								#
   4246#	fmovm.x	dn, <ea>						#
   4247#	fmovm.x	<ea>, dn						#
   4248#									#
   4249#	      <WORD 1>		      <WORD2>				#
   4250#	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
   4251#									#
   4252#	& = (0): predecrement addressing mode				#
   4253#	    (1): postincrement or control addressing mode		#
   4254#	@ = (0): move listed regs from memory to the FPU		#
   4255#	    (1): move listed regs from the FPU to memory		#
   4256#	$$$    : index of data register holding reg select mask		#
   4257#									#
   4258# NOTES:								#
   4259#	If the data register holds a zero, then the			#
   4260#	instruction is a nop.						#
   4261#									#
   4262#########################################################################
   4263
   4264	global		fmovm_dynamic
   4265fmovm_dynamic:
   4266
   4267# extract the data register in which the bit string resides...
   4268	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
   4269	andi.w		&0x70,%d1		# extract reg bits
   4270	lsr.b		&0x4,%d1		# shift into lo bits
   4271
   4272# fetch the bit string into d0...
   4273	bsr.l		fetch_dreg		# fetch reg string
   4274
   4275	andi.l		&0x000000ff,%d0		# keep only lo byte
   4276
   4277	mov.l		%d0,-(%sp)		# save strg
   4278	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
   4279	mov.l		%d0,-(%sp)		# save size
   4280	bsr.l		fmovm_calc_ea		# calculate <ea>
   4281	mov.l		(%sp)+,%d0		# restore size
   4282	mov.l		(%sp)+,%d1		# restore strg
   4283
   4284# if the bit string is a zero, then the operation is a no-op
   4285# but, make sure that we've calculated ea and advanced the opword pointer
   4286	beq.w		fmovm_data_done
   4287
   4288# separate move ins from move outs...
   4289	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
   4290	beq.w		fmovm_data_in		# it's a move out
   4291
   4292#############
   4293# MOVE OUT: #
   4294#############
   4295fmovm_data_out:
   4296	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
   4297	bne.w		fmovm_out_ctrl		# control
   4298
   4299############################
   4300fmovm_out_predec:
   4301# for predecrement mode, the bit string is the opposite of both control
   4302# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
   4303# here, we convert it to be just like the others...
   4304	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
   4305
   4306	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
   4307	beq.b		fmovm_out_ctrl		# user
   4308
   4309fmovm_out_predec_s:
   4310	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
   4311	bne.b		fmovm_out_ctrl
   4312
   4313# the operation was unfortunately an: fmovm.x dn,-(sp)
   4314# called from supervisor mode.
   4315# we're also passing "size" and "strg" back to the calling routine
   4316	rts
   4317
   4318############################
   4319fmovm_out_ctrl:
   4320	mov.l		%a0,%a1			# move <ea> to a1
   4321
   4322	sub.l		%d0,%sp			# subtract size of dump
   4323	lea		(%sp),%a0
   4324
   4325	tst.b		%d1			# should FP0 be moved?
   4326	bpl.b		fmovm_out_ctrl_fp1	# no
   4327
   4328	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
   4329	mov.l		0x4+EXC_FP0(%a6),(%a0)+
   4330	mov.l		0x8+EXC_FP0(%a6),(%a0)+
   4331
   4332fmovm_out_ctrl_fp1:
   4333	lsl.b		&0x1,%d1		# should FP1 be moved?
   4334	bpl.b		fmovm_out_ctrl_fp2	# no
   4335
   4336	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
   4337	mov.l		0x4+EXC_FP1(%a6),(%a0)+
   4338	mov.l		0x8+EXC_FP1(%a6),(%a0)+
   4339
   4340fmovm_out_ctrl_fp2:
   4341	lsl.b		&0x1,%d1		# should FP2 be moved?
   4342	bpl.b		fmovm_out_ctrl_fp3	# no
   4343
   4344	fmovm.x		&0x20,(%a0)		# yes
   4345	add.l		&0xc,%a0
   4346
   4347fmovm_out_ctrl_fp3:
   4348	lsl.b		&0x1,%d1		# should FP3 be moved?
   4349	bpl.b		fmovm_out_ctrl_fp4	# no
   4350
   4351	fmovm.x		&0x10,(%a0)		# yes
   4352	add.l		&0xc,%a0
   4353
   4354fmovm_out_ctrl_fp4:
   4355	lsl.b		&0x1,%d1		# should FP4 be moved?
   4356	bpl.b		fmovm_out_ctrl_fp5	# no
   4357
   4358	fmovm.x		&0x08,(%a0)		# yes
   4359	add.l		&0xc,%a0
   4360
   4361fmovm_out_ctrl_fp5:
   4362	lsl.b		&0x1,%d1		# should FP5 be moved?
   4363	bpl.b		fmovm_out_ctrl_fp6	# no
   4364
   4365	fmovm.x		&0x04,(%a0)		# yes
   4366	add.l		&0xc,%a0
   4367
   4368fmovm_out_ctrl_fp6:
   4369	lsl.b		&0x1,%d1		# should FP6 be moved?
   4370	bpl.b		fmovm_out_ctrl_fp7	# no
   4371
   4372	fmovm.x		&0x02,(%a0)		# yes
   4373	add.l		&0xc,%a0
   4374
   4375fmovm_out_ctrl_fp7:
   4376	lsl.b		&0x1,%d1		# should FP7 be moved?
   4377	bpl.b		fmovm_out_ctrl_done	# no
   4378
   4379	fmovm.x		&0x01,(%a0)		# yes
   4380	add.l		&0xc,%a0
   4381
   4382fmovm_out_ctrl_done:
   4383	mov.l		%a1,L_SCR1(%a6)
   4384
   4385	lea		(%sp),%a0		# pass: supervisor src
   4386	mov.l		%d0,-(%sp)		# save size
   4387	bsr.l		_dmem_write		# copy data to user mem
   4388
   4389	mov.l		(%sp)+,%d0
   4390	add.l		%d0,%sp			# clear fpreg data from stack
   4391
   4392	tst.l		%d1			# did dstore err?
   4393	bne.w		fmovm_out_err		# yes
   4394
   4395	rts
   4396
   4397############
   4398# MOVE IN: #
   4399############
   4400fmovm_data_in:
   4401	mov.l		%a0,L_SCR1(%a6)
   4402
   4403	sub.l		%d0,%sp			# make room for fpregs
   4404	lea		(%sp),%a1
   4405
   4406	mov.l		%d1,-(%sp)		# save bit string for later
   4407	mov.l		%d0,-(%sp)		# save # of bytes
   4408
   4409	bsr.l		_dmem_read		# copy data from user mem
   4410
   4411	mov.l		(%sp)+,%d0		# retrieve # of bytes
   4412
   4413	tst.l		%d1			# did dfetch fail?
   4414	bne.w		fmovm_in_err		# yes
   4415
   4416	mov.l		(%sp)+,%d1		# load bit string
   4417
   4418	lea		(%sp),%a0		# addr of stack
   4419
   4420	tst.b		%d1			# should FP0 be moved?
   4421	bpl.b		fmovm_data_in_fp1	# no
   4422
   4423	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
   4424	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
   4425	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
   4426
   4427fmovm_data_in_fp1:
   4428	lsl.b		&0x1,%d1		# should FP1 be moved?
   4429	bpl.b		fmovm_data_in_fp2	# no
   4430
   4431	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
   4432	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
   4433	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
   4434
   4435fmovm_data_in_fp2:
   4436	lsl.b		&0x1,%d1		# should FP2 be moved?
   4437	bpl.b		fmovm_data_in_fp3	# no
   4438
   4439	fmovm.x		(%a0)+,&0x20		# yes
   4440
   4441fmovm_data_in_fp3:
   4442	lsl.b		&0x1,%d1		# should FP3 be moved?
   4443	bpl.b		fmovm_data_in_fp4	# no
   4444
   4445	fmovm.x		(%a0)+,&0x10		# yes
   4446
   4447fmovm_data_in_fp4:
   4448	lsl.b		&0x1,%d1		# should FP4 be moved?
   4449	bpl.b		fmovm_data_in_fp5	# no
   4450
   4451	fmovm.x		(%a0)+,&0x08		# yes
   4452
   4453fmovm_data_in_fp5:
   4454	lsl.b		&0x1,%d1		# should FP5 be moved?
   4455	bpl.b		fmovm_data_in_fp6	# no
   4456
   4457	fmovm.x		(%a0)+,&0x04		# yes
   4458
   4459fmovm_data_in_fp6:
   4460	lsl.b		&0x1,%d1		# should FP6 be moved?
   4461	bpl.b		fmovm_data_in_fp7	# no
   4462
   4463	fmovm.x		(%a0)+,&0x02		# yes
   4464
   4465fmovm_data_in_fp7:
   4466	lsl.b		&0x1,%d1		# should FP7 be moved?
   4467	bpl.b		fmovm_data_in_done	# no
   4468
   4469	fmovm.x		(%a0)+,&0x01		# yes
   4470
   4471fmovm_data_in_done:
   4472	add.l		%d0,%sp			# remove fpregs from stack
   4473	rts
   4474
   4475#####################################
   4476
   4477fmovm_data_done:
   4478	rts
   4479
   4480##############################################################################
   4481
   4482#
   4483# table indexed by the operation's bit string that gives the number
   4484# of bytes that will be moved.
   4485#
   4486# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
   4487#
   4488tbl_fmovm_size:
   4489	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
   4490	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   4491	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   4492	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4493	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   4494	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4495	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4496	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4497	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   4498	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4499	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4500	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4501	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4502	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4503	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4504	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   4505	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
   4506	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4507	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4508	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4509	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4510	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4511	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4512	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   4513	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
   4514	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4515	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4516	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   4517	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
   4518	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   4519	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
   4520	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
   4521
   4522#
   4523# table to convert a pre-decrement bit string into a post-increment
   4524# or control bit string.
   4525# ex:	0x00	==>	0x00
   4526#	0x01	==>	0x80
   4527#	0x02	==>	0x40
   4528#		.
   4529#		.
   4530#	0xfd	==>	0xbf
   4531#	0xfe	==>	0x7f
   4532#	0xff	==>	0xff
   4533#
   4534tbl_fmovm_convert:
   4535	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
   4536	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
   4537	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
   4538	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
   4539	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
   4540	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
   4541	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
   4542	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
   4543	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
   4544	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
   4545	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
   4546	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
   4547	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
   4548	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
   4549	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
   4550	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
   4551	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
   4552	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
   4553	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
   4554	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
   4555	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
   4556	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
   4557	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
   4558	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
   4559	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
   4560	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
   4561	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
   4562	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
   4563	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
   4564	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
   4565	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
   4566	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
   4567
   4568	global		fmovm_calc_ea
   4569###############################################
   4570# _fmovm_calc_ea: calculate effective address #
   4571###############################################
   4572fmovm_calc_ea:
   4573	mov.l		%d0,%a0			# move # bytes to a0
   4574
   4575# currently, MODE and REG are taken from the EXC_OPWORD. this could be
   4576# easily changed if they were inputs passed in registers.
   4577	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
   4578	mov.w		%d0,%d1			# make a copy
   4579
   4580	andi.w		&0x3f,%d0		# extract mode field
   4581	andi.l		&0x7,%d1		# extract reg  field
   4582
   4583# jump to the corresponding function for each {MODE,REG} pair.
   4584	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
   4585	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
   4586
   4587	swbeg		&64
   4588tbl_fea_mode:
   4589	short		tbl_fea_mode	-	tbl_fea_mode
   4590	short		tbl_fea_mode	-	tbl_fea_mode
   4591	short		tbl_fea_mode	-	tbl_fea_mode
   4592	short		tbl_fea_mode	-	tbl_fea_mode
   4593	short		tbl_fea_mode	-	tbl_fea_mode
   4594	short		tbl_fea_mode	-	tbl_fea_mode
   4595	short		tbl_fea_mode	-	tbl_fea_mode
   4596	short		tbl_fea_mode	-	tbl_fea_mode
   4597
   4598	short		tbl_fea_mode	-	tbl_fea_mode
   4599	short		tbl_fea_mode	-	tbl_fea_mode
   4600	short		tbl_fea_mode	-	tbl_fea_mode
   4601	short		tbl_fea_mode	-	tbl_fea_mode
   4602	short		tbl_fea_mode	-	tbl_fea_mode
   4603	short		tbl_fea_mode	-	tbl_fea_mode
   4604	short		tbl_fea_mode	-	tbl_fea_mode
   4605	short		tbl_fea_mode	-	tbl_fea_mode
   4606
   4607	short		faddr_ind_a0	-	tbl_fea_mode
   4608	short		faddr_ind_a1	-	tbl_fea_mode
   4609	short		faddr_ind_a2	-	tbl_fea_mode
   4610	short		faddr_ind_a3	-	tbl_fea_mode
   4611	short		faddr_ind_a4	-	tbl_fea_mode
   4612	short		faddr_ind_a5	-	tbl_fea_mode
   4613	short		faddr_ind_a6	-	tbl_fea_mode
   4614	short		faddr_ind_a7	-	tbl_fea_mode
   4615
   4616	short		faddr_ind_p_a0	-	tbl_fea_mode
   4617	short		faddr_ind_p_a1	-	tbl_fea_mode
   4618	short		faddr_ind_p_a2	-	tbl_fea_mode
   4619	short		faddr_ind_p_a3	-	tbl_fea_mode
   4620	short		faddr_ind_p_a4	-	tbl_fea_mode
   4621	short		faddr_ind_p_a5	-	tbl_fea_mode
   4622	short		faddr_ind_p_a6	-	tbl_fea_mode
   4623	short		faddr_ind_p_a7	-	tbl_fea_mode
   4624
   4625	short		faddr_ind_m_a0	-	tbl_fea_mode
   4626	short		faddr_ind_m_a1	-	tbl_fea_mode
   4627	short		faddr_ind_m_a2	-	tbl_fea_mode
   4628	short		faddr_ind_m_a3	-	tbl_fea_mode
   4629	short		faddr_ind_m_a4	-	tbl_fea_mode
   4630	short		faddr_ind_m_a5	-	tbl_fea_mode
   4631	short		faddr_ind_m_a6	-	tbl_fea_mode
   4632	short		faddr_ind_m_a7	-	tbl_fea_mode
   4633
   4634	short		faddr_ind_disp_a0	-	tbl_fea_mode
   4635	short		faddr_ind_disp_a1	-	tbl_fea_mode
   4636	short		faddr_ind_disp_a2	-	tbl_fea_mode
   4637	short		faddr_ind_disp_a3	-	tbl_fea_mode
   4638	short		faddr_ind_disp_a4	-	tbl_fea_mode
   4639	short		faddr_ind_disp_a5	-	tbl_fea_mode
   4640	short		faddr_ind_disp_a6	-	tbl_fea_mode
   4641	short		faddr_ind_disp_a7	-	tbl_fea_mode
   4642
   4643	short		faddr_ind_ext	-	tbl_fea_mode
   4644	short		faddr_ind_ext	-	tbl_fea_mode
   4645	short		faddr_ind_ext	-	tbl_fea_mode
   4646	short		faddr_ind_ext	-	tbl_fea_mode
   4647	short		faddr_ind_ext	-	tbl_fea_mode
   4648	short		faddr_ind_ext	-	tbl_fea_mode
   4649	short		faddr_ind_ext	-	tbl_fea_mode
   4650	short		faddr_ind_ext	-	tbl_fea_mode
   4651
   4652	short		fabs_short	-	tbl_fea_mode
   4653	short		fabs_long	-	tbl_fea_mode
   4654	short		fpc_ind		-	tbl_fea_mode
   4655	short		fpc_ind_ext	-	tbl_fea_mode
   4656	short		tbl_fea_mode	-	tbl_fea_mode
   4657	short		tbl_fea_mode	-	tbl_fea_mode
   4658	short		tbl_fea_mode	-	tbl_fea_mode
   4659	short		tbl_fea_mode	-	tbl_fea_mode
   4660
   4661###################################
   4662# Address register indirect: (An) #
   4663###################################
   4664faddr_ind_a0:
   4665	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
   4666	rts
   4667
   4668faddr_ind_a1:
   4669	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
   4670	rts
   4671
   4672faddr_ind_a2:
   4673	mov.l		%a2,%a0			# Get current a2
   4674	rts
   4675
   4676faddr_ind_a3:
   4677	mov.l		%a3,%a0			# Get current a3
   4678	rts
   4679
   4680faddr_ind_a4:
   4681	mov.l		%a4,%a0			# Get current a4
   4682	rts
   4683
   4684faddr_ind_a5:
   4685	mov.l		%a5,%a0			# Get current a5
   4686	rts
   4687
   4688faddr_ind_a6:
   4689	mov.l		(%a6),%a0		# Get current a6
   4690	rts
   4691
   4692faddr_ind_a7:
   4693	mov.l		EXC_A7(%a6),%a0		# Get current a7
   4694	rts
   4695
   4696#####################################################
   4697# Address register indirect w/ postincrement: (An)+ #
   4698#####################################################
   4699faddr_ind_p_a0:
   4700	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
   4701	mov.l		%d0,%d1
   4702	add.l		%a0,%d1			# Increment
   4703	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
   4704	mov.l		%d0,%a0
   4705	rts
   4706
   4707faddr_ind_p_a1:
   4708	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
   4709	mov.l		%d0,%d1
   4710	add.l		%a0,%d1			# Increment
   4711	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
   4712	mov.l		%d0,%a0
   4713	rts
   4714
   4715faddr_ind_p_a2:
   4716	mov.l		%a2,%d0			# Get current a2
   4717	mov.l		%d0,%d1
   4718	add.l		%a0,%d1			# Increment
   4719	mov.l		%d1,%a2			# Save incr value
   4720	mov.l		%d0,%a0
   4721	rts
   4722
   4723faddr_ind_p_a3:
   4724	mov.l		%a3,%d0			# Get current a3
   4725	mov.l		%d0,%d1
   4726	add.l		%a0,%d1			# Increment
   4727	mov.l		%d1,%a3			# Save incr value
   4728	mov.l		%d0,%a0
   4729	rts
   4730
   4731faddr_ind_p_a4:
   4732	mov.l		%a4,%d0			# Get current a4
   4733	mov.l		%d0,%d1
   4734	add.l		%a0,%d1			# Increment
   4735	mov.l		%d1,%a4			# Save incr value
   4736	mov.l		%d0,%a0
   4737	rts
   4738
   4739faddr_ind_p_a5:
   4740	mov.l		%a5,%d0			# Get current a5
   4741	mov.l		%d0,%d1
   4742	add.l		%a0,%d1			# Increment
   4743	mov.l		%d1,%a5			# Save incr value
   4744	mov.l		%d0,%a0
   4745	rts
   4746
   4747faddr_ind_p_a6:
   4748	mov.l		(%a6),%d0		# Get current a6
   4749	mov.l		%d0,%d1
   4750	add.l		%a0,%d1			# Increment
   4751	mov.l		%d1,(%a6)		# Save incr value
   4752	mov.l		%d0,%a0
   4753	rts
   4754
   4755faddr_ind_p_a7:
   4756	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
   4757
   4758	mov.l		EXC_A7(%a6),%d0		# Get current a7
   4759	mov.l		%d0,%d1
   4760	add.l		%a0,%d1			# Increment
   4761	mov.l		%d1,EXC_A7(%a6)		# Save incr value
   4762	mov.l		%d0,%a0
   4763	rts
   4764
   4765####################################################
   4766# Address register indirect w/ predecrement: -(An) #
   4767####################################################
   4768faddr_ind_m_a0:
   4769	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
   4770	sub.l		%a0,%d0			# Decrement
   4771	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
   4772	mov.l		%d0,%a0
   4773	rts
   4774
   4775faddr_ind_m_a1:
   4776	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
   4777	sub.l		%a0,%d0			# Decrement
   4778	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
   4779	mov.l		%d0,%a0
   4780	rts
   4781
   4782faddr_ind_m_a2:
   4783	mov.l		%a2,%d0			# Get current a2
   4784	sub.l		%a0,%d0			# Decrement
   4785	mov.l		%d0,%a2			# Save decr value
   4786	mov.l		%d0,%a0
   4787	rts
   4788
   4789faddr_ind_m_a3:
   4790	mov.l		%a3,%d0			# Get current a3
   4791	sub.l		%a0,%d0			# Decrement
   4792	mov.l		%d0,%a3			# Save decr value
   4793	mov.l		%d0,%a0
   4794	rts
   4795
   4796faddr_ind_m_a4:
   4797	mov.l		%a4,%d0			# Get current a4
   4798	sub.l		%a0,%d0			# Decrement
   4799	mov.l		%d0,%a4			# Save decr value
   4800	mov.l		%d0,%a0
   4801	rts
   4802
   4803faddr_ind_m_a5:
   4804	mov.l		%a5,%d0			# Get current a5
   4805	sub.l		%a0,%d0			# Decrement
   4806	mov.l		%d0,%a5			# Save decr value
   4807	mov.l		%d0,%a0
   4808	rts
   4809
   4810faddr_ind_m_a6:
   4811	mov.l		(%a6),%d0		# Get current a6
   4812	sub.l		%a0,%d0			# Decrement
   4813	mov.l		%d0,(%a6)		# Save decr value
   4814	mov.l		%d0,%a0
   4815	rts
   4816
   4817faddr_ind_m_a7:
   4818	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
   4819
   4820	mov.l		EXC_A7(%a6),%d0		# Get current a7
   4821	sub.l		%a0,%d0			# Decrement
   4822	mov.l		%d0,EXC_A7(%a6)		# Save decr value
   4823	mov.l		%d0,%a0
   4824	rts
   4825
   4826########################################################
   4827# Address register indirect w/ displacement: (d16, An) #
   4828########################################################
   4829faddr_ind_disp_a0:
   4830	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4831	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4832	bsr.l		_imem_read_word
   4833
   4834	tst.l		%d1			# did ifetch fail?
   4835	bne.l		iea_iacc		# yes
   4836
   4837	mov.w		%d0,%a0			# sign extend displacement
   4838
   4839	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
   4840	rts
   4841
   4842faddr_ind_disp_a1:
   4843	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4844	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4845	bsr.l		_imem_read_word
   4846
   4847	tst.l		%d1			# did ifetch fail?
   4848	bne.l		iea_iacc		# yes
   4849
   4850	mov.w		%d0,%a0			# sign extend displacement
   4851
   4852	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
   4853	rts
   4854
   4855faddr_ind_disp_a2:
   4856	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4857	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4858	bsr.l		_imem_read_word
   4859
   4860	tst.l		%d1			# did ifetch fail?
   4861	bne.l		iea_iacc		# yes
   4862
   4863	mov.w		%d0,%a0			# sign extend displacement
   4864
   4865	add.l		%a2,%a0			# a2 + d16
   4866	rts
   4867
   4868faddr_ind_disp_a3:
   4869	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4870	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4871	bsr.l		_imem_read_word
   4872
   4873	tst.l		%d1			# did ifetch fail?
   4874	bne.l		iea_iacc		# yes
   4875
   4876	mov.w		%d0,%a0			# sign extend displacement
   4877
   4878	add.l		%a3,%a0			# a3 + d16
   4879	rts
   4880
   4881faddr_ind_disp_a4:
   4882	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4883	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4884	bsr.l		_imem_read_word
   4885
   4886	tst.l		%d1			# did ifetch fail?
   4887	bne.l		iea_iacc		# yes
   4888
   4889	mov.w		%d0,%a0			# sign extend displacement
   4890
   4891	add.l		%a4,%a0			# a4 + d16
   4892	rts
   4893
   4894faddr_ind_disp_a5:
   4895	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4896	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4897	bsr.l		_imem_read_word
   4898
   4899	tst.l		%d1			# did ifetch fail?
   4900	bne.l		iea_iacc		# yes
   4901
   4902	mov.w		%d0,%a0			# sign extend displacement
   4903
   4904	add.l		%a5,%a0			# a5 + d16
   4905	rts
   4906
   4907faddr_ind_disp_a6:
   4908	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4909	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4910	bsr.l		_imem_read_word
   4911
   4912	tst.l		%d1			# did ifetch fail?
   4913	bne.l		iea_iacc		# yes
   4914
   4915	mov.w		%d0,%a0			# sign extend displacement
   4916
   4917	add.l		(%a6),%a0		# a6 + d16
   4918	rts
   4919
   4920faddr_ind_disp_a7:
   4921	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4922	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4923	bsr.l		_imem_read_word
   4924
   4925	tst.l		%d1			# did ifetch fail?
   4926	bne.l		iea_iacc		# yes
   4927
   4928	mov.w		%d0,%a0			# sign extend displacement
   4929
   4930	add.l		EXC_A7(%a6),%a0		# a7 + d16
   4931	rts
   4932
   4933########################################################################
   4934# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
   4935#    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
   4936# Memory indirect postindexed: ([bd, An], Xn, od)		       #
   4937# Memory indirect preindexed: ([bd, An, Xn], od)		       #
   4938########################################################################
   4939faddr_ind_ext:
   4940	addq.l		&0x8,%d1
   4941	bsr.l		fetch_dreg		# fetch base areg
   4942	mov.l		%d0,-(%sp)
   4943
   4944	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4945	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4946	bsr.l		_imem_read_word		# fetch extword in d0
   4947
   4948	tst.l		%d1			# did ifetch fail?
   4949	bne.l		iea_iacc		# yes
   4950
   4951	mov.l		(%sp)+,%a0
   4952
   4953	btst		&0x8,%d0
   4954	bne.w		fcalc_mem_ind
   4955
   4956	mov.l		%d0,L_SCR1(%a6)		# hold opword
   4957
   4958	mov.l		%d0,%d1
   4959	rol.w		&0x4,%d1
   4960	andi.w		&0xf,%d1		# extract index regno
   4961
   4962# count on fetch_dreg() not to alter a0...
   4963	bsr.l		fetch_dreg		# fetch index
   4964
   4965	mov.l		%d2,-(%sp)		# save d2
   4966	mov.l		L_SCR1(%a6),%d2		# fetch opword
   4967
   4968	btst		&0xb,%d2		# is it word or long?
   4969	bne.b		faii8_long
   4970	ext.l		%d0			# sign extend word index
   4971faii8_long:
   4972	mov.l		%d2,%d1
   4973	rol.w		&0x7,%d1
   4974	andi.l		&0x3,%d1		# extract scale value
   4975
   4976	lsl.l		%d1,%d0			# shift index by scale
   4977
   4978	extb.l		%d2			# sign extend displacement
   4979	add.l		%d2,%d0			# index + disp
   4980	add.l		%d0,%a0			# An + (index + disp)
   4981
   4982	mov.l		(%sp)+,%d2		# restore old d2
   4983	rts
   4984
   4985###########################
   4986# Absolute short: (XXX).W #
   4987###########################
   4988fabs_short:
   4989	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4990	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4991	bsr.l		_imem_read_word		# fetch short address
   4992
   4993	tst.l		%d1			# did ifetch fail?
   4994	bne.l		iea_iacc		# yes
   4995
   4996	mov.w		%d0,%a0			# return <ea> in a0
   4997	rts
   4998
   4999##########################
   5000# Absolute long: (XXX).L #
   5001##########################
   5002fabs_long:
   5003	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5004	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5005	bsr.l		_imem_read_long		# fetch long address
   5006
   5007	tst.l		%d1			# did ifetch fail?
   5008	bne.l		iea_iacc		# yes
   5009
   5010	mov.l		%d0,%a0			# return <ea> in a0
   5011	rts
   5012
   5013#######################################################
   5014# Program counter indirect w/ displacement: (d16, PC) #
   5015#######################################################
   5016fpc_ind:
   5017	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5018	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5019	bsr.l		_imem_read_word		# fetch word displacement
   5020
   5021	tst.l		%d1			# did ifetch fail?
   5022	bne.l		iea_iacc		# yes
   5023
   5024	mov.w		%d0,%a0			# sign extend displacement
   5025
   5026	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
   5027
   5028# _imem_read_word() increased the extwptr by 2. need to adjust here.
   5029	subq.l		&0x2,%a0		# adjust <ea>
   5030	rts
   5031
   5032##########################################################
   5033# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
   5034# "     "     w/   "  (base displacement): (bd, PC, An)  #
   5035# PC memory indirect postindexed: ([bd, PC], Xn, od)     #
   5036# PC memory indirect preindexed: ([bd, PC, Xn], od)      #
   5037##########################################################
   5038fpc_ind_ext:
   5039	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5040	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5041	bsr.l		_imem_read_word		# fetch ext word
   5042
   5043	tst.l		%d1			# did ifetch fail?
   5044	bne.l		iea_iacc		# yes
   5045
   5046	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
   5047	subq.l		&0x2,%a0		# adjust base
   5048
   5049	btst		&0x8,%d0		# is disp only 8 bits?
   5050	bne.w		fcalc_mem_ind		# calc memory indirect
   5051
   5052	mov.l		%d0,L_SCR1(%a6)		# store opword
   5053
   5054	mov.l		%d0,%d1			# make extword copy
   5055	rol.w		&0x4,%d1		# rotate reg num into place
   5056	andi.w		&0xf,%d1		# extract register number
   5057
   5058# count on fetch_dreg() not to alter a0...
   5059	bsr.l		fetch_dreg		# fetch index
   5060
   5061	mov.l		%d2,-(%sp)		# save d2
   5062	mov.l		L_SCR1(%a6),%d2		# fetch opword
   5063
   5064	btst		&0xb,%d2		# is index word or long?
   5065	bne.b		fpii8_long		# long
   5066	ext.l		%d0			# sign extend word index
   5067fpii8_long:
   5068	mov.l		%d2,%d1
   5069	rol.w		&0x7,%d1		# rotate scale value into place
   5070	andi.l		&0x3,%d1		# extract scale value
   5071
   5072	lsl.l		%d1,%d0			# shift index by scale
   5073
   5074	extb.l		%d2			# sign extend displacement
   5075	add.l		%d2,%d0			# disp + index
   5076	add.l		%d0,%a0			# An + (index + disp)
   5077
   5078	mov.l		(%sp)+,%d2		# restore temp register
   5079	rts
   5080
   5081# d2 = index
   5082# d3 = base
   5083# d4 = od
   5084# d5 = extword
   5085fcalc_mem_ind:
   5086	btst		&0x6,%d0		# is the index suppressed?
   5087	beq.b		fcalc_index
   5088
   5089	movm.l		&0x3c00,-(%sp)		# save d2-d5
   5090
   5091	mov.l		%d0,%d5			# put extword in d5
   5092	mov.l		%a0,%d3			# put base in d3
   5093
   5094	clr.l		%d2			# yes, so index = 0
   5095	bra.b		fbase_supp_ck
   5096
   5097# index:
   5098fcalc_index:
   5099	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
   5100	bfextu		%d0{&16:&4},%d1		# fetch dreg index
   5101	bsr.l		fetch_dreg
   5102
   5103	movm.l		&0x3c00,-(%sp)		# save d2-d5
   5104	mov.l		%d0,%d2			# put index in d2
   5105	mov.l		L_SCR1(%a6),%d5
   5106	mov.l		%a0,%d3
   5107
   5108	btst		&0xb,%d5		# is index word or long?
   5109	bne.b		fno_ext
   5110	ext.l		%d2
   5111
   5112fno_ext:
   5113	bfextu		%d5{&21:&2},%d0
   5114	lsl.l		%d0,%d2
   5115
   5116# base address (passed as parameter in d3):
   5117# we clear the value here if it should actually be suppressed.
   5118fbase_supp_ck:
   5119	btst		&0x7,%d5		# is the bd suppressed?
   5120	beq.b		fno_base_sup
   5121	clr.l		%d3
   5122
   5123# base displacement:
   5124fno_base_sup:
   5125	bfextu		%d5{&26:&2},%d0		# get bd size
   5126#	beq.l		fmovm_error		# if (size == 0) it's reserved
   5127
   5128	cmpi.b		%d0,&0x2
   5129	blt.b		fno_bd
   5130	beq.b		fget_word_bd
   5131
   5132	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5133	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5134	bsr.l		_imem_read_long
   5135
   5136	tst.l		%d1			# did ifetch fail?
   5137	bne.l		fcea_iacc		# yes
   5138
   5139	bra.b		fchk_ind
   5140
   5141fget_word_bd:
   5142	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5143	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5144	bsr.l		_imem_read_word
   5145
   5146	tst.l		%d1			# did ifetch fail?
   5147	bne.l		fcea_iacc		# yes
   5148
   5149	ext.l		%d0			# sign extend bd
   5150
   5151fchk_ind:
   5152	add.l		%d0,%d3			# base += bd
   5153
   5154# outer displacement:
   5155fno_bd:
   5156	bfextu		%d5{&30:&2},%d0		# is od suppressed?
   5157	beq.w		faii_bd
   5158
   5159	cmpi.b		%d0,&0x2
   5160	blt.b		fnull_od
   5161	beq.b		fword_od
   5162
   5163	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5164	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5165	bsr.l		_imem_read_long
   5166
   5167	tst.l		%d1			# did ifetch fail?
   5168	bne.l		fcea_iacc		# yes
   5169
   5170	bra.b		fadd_them
   5171
   5172fword_od:
   5173	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5174	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5175	bsr.l		_imem_read_word
   5176
   5177	tst.l		%d1			# did ifetch fail?
   5178	bne.l		fcea_iacc		# yes
   5179
   5180	ext.l		%d0			# sign extend od
   5181	bra.b		fadd_them
   5182
   5183fnull_od:
   5184	clr.l		%d0
   5185
   5186fadd_them:
   5187	mov.l		%d0,%d4
   5188
   5189	btst		&0x2,%d5		# pre or post indexing?
   5190	beq.b		fpre_indexed
   5191
   5192	mov.l		%d3,%a0
   5193	bsr.l		_dmem_read_long
   5194
   5195	tst.l		%d1			# did dfetch fail?
   5196	bne.w		fcea_err		# yes
   5197
   5198	add.l		%d2,%d0			# <ea> += index
   5199	add.l		%d4,%d0			# <ea> += od
   5200	bra.b		fdone_ea
   5201
   5202fpre_indexed:
   5203	add.l		%d2,%d3			# preindexing
   5204	mov.l		%d3,%a0
   5205	bsr.l		_dmem_read_long
   5206
   5207	tst.l		%d1			# did dfetch fail?
   5208	bne.w		fcea_err		# yes
   5209
   5210	add.l		%d4,%d0			# ea += od
   5211	bra.b		fdone_ea
   5212
   5213faii_bd:
   5214	add.l		%d2,%d3			# ea = (base + bd) + index
   5215	mov.l		%d3,%d0
   5216fdone_ea:
   5217	mov.l		%d0,%a0
   5218
   5219	movm.l		(%sp)+,&0x003c		# restore d2-d5
   5220	rts
   5221
   5222#########################################################
   5223fcea_err:
   5224	mov.l		%d3,%a0
   5225
   5226	movm.l		(%sp)+,&0x003c		# restore d2-d5
   5227	mov.w		&0x0101,%d0
   5228	bra.l		iea_dacc
   5229
   5230fcea_iacc:
   5231	movm.l		(%sp)+,&0x003c		# restore d2-d5
   5232	bra.l		iea_iacc
   5233
   5234fmovm_out_err:
   5235	bsr.l		restore
   5236	mov.w		&0x00e1,%d0
   5237	bra.b		fmovm_err
   5238
   5239fmovm_in_err:
   5240	bsr.l		restore
   5241	mov.w		&0x0161,%d0
   5242
   5243fmovm_err:
   5244	mov.l		L_SCR1(%a6),%a0
   5245	bra.l		iea_dacc
   5246
   5247#########################################################################
   5248# XDEF ****************************************************************	#
   5249#	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
   5250#									#
   5251# XREF ****************************************************************	#
   5252#	_imem_read_long() - read longword from memory			#
   5253#	iea_iacc() - _imem_read_long() failed; error recovery		#
   5254#									#
   5255# INPUT ***************************************************************	#
   5256#	None								#
   5257#									#
   5258# OUTPUT **************************************************************	#
   5259#	If _imem_read_long() doesn't fail:				#
   5260#		USER_FPCR(a6)  = new FPCR value				#
   5261#		USER_FPSR(a6)  = new FPSR value				#
   5262#		USER_FPIAR(a6) = new FPIAR value			#
   5263#									#
   5264# ALGORITHM ***********************************************************	#
   5265#	Decode the instruction type by looking at the extension word	#
   5266# in order to see how many control registers to fetch from memory.	#
   5267# Fetch them using _imem_read_long(). If this fetch fails, exit through	#
   5268# the special access error exit handler iea_iacc().			#
   5269#									#
   5270# Instruction word decoding:						#
   5271#									#
   5272#	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
   5273#									#
   5274#		WORD1			WORD2				#
   5275#	1111 0010 00 111100	100$ $$00 0000 0000			#
   5276#									#
   5277#	$$$ (100): FPCR							#
   5278#	    (010): FPSR							#
   5279#	    (001): FPIAR						#
   5280#	    (000): FPIAR						#
   5281#									#
   5282#########################################################################
   5283
   5284	global		fmovm_ctrl
   5285fmovm_ctrl:
   5286	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
   5287	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
   5288	beq.w		fctrl_in_7		# yes
   5289	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
   5290	beq.w		fctrl_in_6		# yes
   5291	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
   5292	beq.b		fctrl_in_5		# yes
   5293
   5294# fmovem.l #<data>, fpsr/fpiar
   5295fctrl_in_3:
   5296	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5297	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5298	bsr.l		_imem_read_long		# fetch FPSR from mem
   5299
   5300	tst.l		%d1			# did ifetch fail?
   5301	bne.l		iea_iacc		# yes
   5302
   5303	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
   5304	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5305	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5306	bsr.l		_imem_read_long		# fetch FPIAR from mem
   5307
   5308	tst.l		%d1			# did ifetch fail?
   5309	bne.l		iea_iacc		# yes
   5310
   5311	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
   5312	rts
   5313
   5314# fmovem.l #<data>, fpcr/fpiar
   5315fctrl_in_5:
   5316	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5317	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5318	bsr.l		_imem_read_long		# fetch FPCR from mem
   5319
   5320	tst.l		%d1			# did ifetch fail?
   5321	bne.l		iea_iacc		# yes
   5322
   5323	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
   5324	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5325	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5326	bsr.l		_imem_read_long		# fetch FPIAR from mem
   5327
   5328	tst.l		%d1			# did ifetch fail?
   5329	bne.l		iea_iacc		# yes
   5330
   5331	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
   5332	rts
   5333
   5334# fmovem.l #<data>, fpcr/fpsr
   5335fctrl_in_6:
   5336	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5337	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5338	bsr.l		_imem_read_long		# fetch FPCR from mem
   5339
   5340	tst.l		%d1			# did ifetch fail?
   5341	bne.l		iea_iacc		# yes
   5342
   5343	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
   5344	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5345	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5346	bsr.l		_imem_read_long		# fetch FPSR from mem
   5347
   5348	tst.l		%d1			# did ifetch fail?
   5349	bne.l		iea_iacc		# yes
   5350
   5351	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
   5352	rts
   5353
   5354# fmovem.l #<data>, fpcr/fpsr/fpiar
   5355fctrl_in_7:
   5356	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5357	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5358	bsr.l		_imem_read_long		# fetch FPCR from mem
   5359
   5360	tst.l		%d1			# did ifetch fail?
   5361	bne.l		iea_iacc		# yes
   5362
   5363	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
   5364	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5365	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5366	bsr.l		_imem_read_long		# fetch FPSR from mem
   5367
   5368	tst.l		%d1			# did ifetch fail?
   5369	bne.l		iea_iacc		# yes
   5370
   5371	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
   5372	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   5373	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   5374	bsr.l		_imem_read_long		# fetch FPIAR from mem
   5375
   5376	tst.l		%d1			# did ifetch fail?
   5377	bne.l		iea_iacc		# yes
   5378
   5379	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
   5380	rts
   5381
   5382##########################################################################
   5383
   5384#########################################################################
   5385# XDEF ****************************************************************	#
   5386#	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
   5387#			  OVFL/UNFL exceptions will result		#
   5388#									#
   5389# XREF ****************************************************************	#
   5390#	norm() - normalize mantissa after adjusting exponent		#
   5391#									#
   5392# INPUT ***************************************************************	#
   5393#	FP_SRC(a6) = fp op1(src)					#
   5394#	FP_DST(a6) = fp op2(dst)					#
   5395#									#
   5396# OUTPUT **************************************************************	#
   5397#	FP_SRC(a6) = fp op1 scaled(src)					#
   5398#	FP_DST(a6) = fp op2 scaled(dst)					#
   5399#	d0         = scale amount					#
   5400#									#
   5401# ALGORITHM ***********************************************************	#
   5402#	If the DST exponent is > the SRC exponent, set the DST exponent	#
   5403# equal to 0x3fff and scale the SRC exponent by the value that the	#
   5404# DST exponent was scaled by. If the SRC exponent is greater or equal,	#
   5405# do the opposite. Return this scale factor in d0.			#
   5406#	If the two exponents differ by > the number of mantissa bits	#
   5407# plus two, then set the smallest exponent to a very small value as a	#
   5408# quick shortcut.							#
   5409#									#
   5410#########################################################################
   5411
   5412	global		addsub_scaler2
   5413addsub_scaler2:
   5414	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   5415	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   5416	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   5417	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   5418	mov.w		SRC_EX(%a0),%d0
   5419	mov.w		DST_EX(%a1),%d1
   5420	mov.w		%d0,FP_SCR0_EX(%a6)
   5421	mov.w		%d1,FP_SCR1_EX(%a6)
   5422
   5423	andi.w		&0x7fff,%d0
   5424	andi.w		&0x7fff,%d1
   5425	mov.w		%d0,L_SCR1(%a6)		# store src exponent
   5426	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
   5427
   5428	cmp.w		%d0, %d1		# is src exp >= dst exp?
   5429	bge.l		src_exp_ge2
   5430
   5431# dst exp is >  src exp; scale dst to exp = 0x3fff
   5432dst_exp_gt2:
   5433	bsr.l		scale_to_zero_dst
   5434	mov.l		%d0,-(%sp)		# save scale factor
   5435
   5436	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
   5437	bne.b		cmpexp12
   5438
   5439	lea		FP_SCR0(%a6),%a0
   5440	bsr.l		norm			# normalize the denorm; result is new exp
   5441	neg.w		%d0			# new exp = -(shft val)
   5442	mov.w		%d0,L_SCR1(%a6)		# inset new exp
   5443
   5444cmpexp12:
   5445	mov.w		2+L_SCR1(%a6),%d0
   5446	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
   5447
   5448	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
   5449	bge.b		quick_scale12
   5450
   5451	mov.w		L_SCR1(%a6),%d0
   5452	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
   5453	mov.w		FP_SCR0_EX(%a6),%d1
   5454	and.w		&0x8000,%d1
   5455	or.w		%d1,%d0			# concat {sgn,new exp}
   5456	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
   5457
   5458	mov.l		(%sp)+,%d0		# return SCALE factor
   5459	rts
   5460
   5461quick_scale12:
   5462	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
   5463	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
   5464
   5465	mov.l		(%sp)+,%d0		# return SCALE factor
   5466	rts
   5467
   5468# src exp is >= dst exp; scale src to exp = 0x3fff
   5469src_exp_ge2:
   5470	bsr.l		scale_to_zero_src
   5471	mov.l		%d0,-(%sp)		# save scale factor
   5472
   5473	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
   5474	bne.b		cmpexp22
   5475	lea		FP_SCR1(%a6),%a0
   5476	bsr.l		norm			# normalize the denorm; result is new exp
   5477	neg.w		%d0			# new exp = -(shft val)
   5478	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
   5479
   5480cmpexp22:
   5481	mov.w		L_SCR1(%a6),%d0
   5482	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
   5483
   5484	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
   5485	bge.b		quick_scale22
   5486
   5487	mov.w		2+L_SCR1(%a6),%d0
   5488	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
   5489	mov.w		FP_SCR1_EX(%a6),%d1
   5490	andi.w		&0x8000,%d1
   5491	or.w		%d1,%d0			# concat {sgn,new exp}
   5492	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
   5493
   5494	mov.l		(%sp)+,%d0		# return SCALE factor
   5495	rts
   5496
   5497quick_scale22:
   5498	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
   5499	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
   5500
   5501	mov.l		(%sp)+,%d0		# return SCALE factor
   5502	rts
   5503
   5504##########################################################################
   5505
   5506#########################################################################
   5507# XDEF ****************************************************************	#
   5508#	scale_to_zero_src(): scale the exponent of extended precision	#
   5509#			     value at FP_SCR0(a6).			#
   5510#									#
   5511# XREF ****************************************************************	#
   5512#	norm() - normalize the mantissa if the operand was a DENORM	#
   5513#									#
   5514# INPUT ***************************************************************	#
   5515#	FP_SCR0(a6) = extended precision operand to be scaled		#
   5516#									#
   5517# OUTPUT **************************************************************	#
   5518#	FP_SCR0(a6) = scaled extended precision operand			#
   5519#	d0	    = scale value					#
   5520#									#
   5521# ALGORITHM ***********************************************************	#
   5522#	Set the exponent of the input operand to 0x3fff. Save the value	#
   5523# of the difference between the original and new exponent. Then,	#
   5524# normalize the operand if it was a DENORM. Add this normalization	#
   5525# value to the previous value. Return the result.			#
   5526#									#
   5527#########################################################################
   5528
   5529	global		scale_to_zero_src
   5530scale_to_zero_src:
   5531	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
   5532	mov.w		%d1,%d0			# make a copy
   5533
   5534	andi.l		&0x7fff,%d1		# extract operand's exponent
   5535
   5536	andi.w		&0x8000,%d0		# extract operand's sgn
   5537	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
   5538
   5539	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
   5540
   5541	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
   5542	beq.b		stzs_denorm		# normalize the DENORM
   5543
   5544stzs_norm:
   5545	mov.l		&0x3fff,%d0
   5546	sub.l		%d1,%d0			# scale = BIAS + (-exp)
   5547
   5548	rts
   5549
   5550stzs_denorm:
   5551	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
   5552	bsr.l		norm			# normalize denorm
   5553	neg.l		%d0			# new exponent = -(shft val)
   5554	mov.l		%d0,%d1			# prepare for op_norm call
   5555	bra.b		stzs_norm		# finish scaling
   5556
   5557###
   5558
   5559#########################################################################
   5560# XDEF ****************************************************************	#
   5561#	scale_sqrt(): scale the input operand exponent so a subsequent	#
   5562#		      fsqrt operation won't take an exception.		#
   5563#									#
   5564# XREF ****************************************************************	#
   5565#	norm() - normalize the mantissa if the operand was a DENORM	#
   5566#									#
   5567# INPUT ***************************************************************	#
   5568#	FP_SCR0(a6) = extended precision operand to be scaled		#
   5569#									#
   5570# OUTPUT **************************************************************	#
   5571#	FP_SCR0(a6) = scaled extended precision operand			#
   5572#	d0	    = scale value					#
   5573#									#
   5574# ALGORITHM ***********************************************************	#
   5575#	If the input operand is a DENORM, normalize it.			#
   5576#	If the exponent of the input operand is even, set the exponent	#
   5577# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the	#
   5578# exponent of the input operand is off, set the exponent to ox3fff and	#
   5579# return a scale factor of "(exp-0x3fff)/2".				#
   5580#									#
   5581#########################################################################
   5582
   5583	global		scale_sqrt
   5584scale_sqrt:
   5585	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
   5586	beq.b		ss_denorm		# normalize the DENORM
   5587
   5588	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
   5589	andi.l		&0x7fff,%d1		# extract operand's exponent
   5590
   5591	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
   5592
   5593	btst		&0x0,%d1		# is exp even or odd?
   5594	beq.b		ss_norm_even
   5595
   5596	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
   5597
   5598	mov.l		&0x3fff,%d0
   5599	sub.l		%d1,%d0			# scale = BIAS + (-exp)
   5600	asr.l		&0x1,%d0		# divide scale factor by 2
   5601	rts
   5602
   5603ss_norm_even:
   5604	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
   5605
   5606	mov.l		&0x3ffe,%d0
   5607	sub.l		%d1,%d0			# scale = BIAS + (-exp)
   5608	asr.l		&0x1,%d0		# divide scale factor by 2
   5609	rts
   5610
   5611ss_denorm:
   5612	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
   5613	bsr.l		norm			# normalize denorm
   5614
   5615	btst		&0x0,%d0		# is exp even or odd?
   5616	beq.b		ss_denorm_even
   5617
   5618	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
   5619
   5620	add.l		&0x3fff,%d0
   5621	asr.l		&0x1,%d0		# divide scale factor by 2
   5622	rts
   5623
   5624ss_denorm_even:
   5625	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
   5626
   5627	add.l		&0x3ffe,%d0
   5628	asr.l		&0x1,%d0		# divide scale factor by 2
   5629	rts
   5630
   5631###
   5632
   5633#########################################################################
   5634# XDEF ****************************************************************	#
   5635#	scale_to_zero_dst(): scale the exponent of extended precision	#
   5636#			     value at FP_SCR1(a6).			#
   5637#									#
   5638# XREF ****************************************************************	#
   5639#	norm() - normalize the mantissa if the operand was a DENORM	#
   5640#									#
   5641# INPUT ***************************************************************	#
   5642#	FP_SCR1(a6) = extended precision operand to be scaled		#
   5643#									#
   5644# OUTPUT **************************************************************	#
   5645#	FP_SCR1(a6) = scaled extended precision operand			#
   5646#	d0	    = scale value					#
   5647#									#
   5648# ALGORITHM ***********************************************************	#
   5649#	Set the exponent of the input operand to 0x3fff. Save the value	#
   5650# of the difference between the original and new exponent. Then,	#
   5651# normalize the operand if it was a DENORM. Add this normalization	#
   5652# value to the previous value. Return the result.			#
   5653#									#
   5654#########################################################################
   5655
   5656	global		scale_to_zero_dst
   5657scale_to_zero_dst:
   5658	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
   5659	mov.w		%d1,%d0			# make a copy
   5660
   5661	andi.l		&0x7fff,%d1		# extract operand's exponent
   5662
   5663	andi.w		&0x8000,%d0		# extract operand's sgn
   5664	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
   5665
   5666	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
   5667
   5668	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
   5669	beq.b		stzd_denorm		# normalize the DENORM
   5670
   5671stzd_norm:
   5672	mov.l		&0x3fff,%d0
   5673	sub.l		%d1,%d0			# scale = BIAS + (-exp)
   5674	rts
   5675
   5676stzd_denorm:
   5677	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
   5678	bsr.l		norm			# normalize denorm
   5679	neg.l		%d0			# new exponent = -(shft val)
   5680	mov.l		%d0,%d1			# prepare for op_norm call
   5681	bra.b		stzd_norm		# finish scaling
   5682
   5683##########################################################################
   5684
   5685#########################################################################
   5686# XDEF ****************************************************************	#
   5687#	res_qnan(): return default result w/ QNAN operand for dyadic	#
   5688#	res_snan(): return default result w/ SNAN operand for dyadic	#
   5689#	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
   5690#	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
   5691#									#
   5692# XREF ****************************************************************	#
   5693#	None								#
   5694#									#
   5695# INPUT ***************************************************************	#
   5696#	FP_SRC(a6) = pointer to extended precision src operand		#
   5697#	FP_DST(a6) = pointer to extended precision dst operand		#
   5698#									#
   5699# OUTPUT **************************************************************	#
   5700#	fp0 = default result						#
   5701#									#
   5702# ALGORITHM ***********************************************************	#
   5703#	If either operand (but not both operands) of an operation is a	#
   5704# nonsignalling NAN, then that NAN is returned as the result. If both	#
   5705# operands are nonsignalling NANs, then the destination operand		#
   5706# nonsignalling NAN is returned as the result.				#
   5707#	If either operand to an operation is a signalling NAN (SNAN),	#
   5708# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
   5709# enable bit is set in the FPCR, then the trap is taken and the		#
   5710# destination is not modified. If the SNAN trap enable bit is not set,	#
   5711# then the SNAN is converted to a nonsignalling NAN (by setting the	#
   5712# SNAN bit in the operand to one), and the operation continues as	#
   5713# described in the preceding paragraph, for nonsignalling NANs.		#
   5714#	Make sure the appropriate FPSR bits are set before exiting.	#
   5715#									#
   5716#########################################################################
   5717
   5718	global		res_qnan
   5719	global		res_snan
   5720res_qnan:
   5721res_snan:
   5722	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
   5723	beq.b		dst_snan2
   5724	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
   5725	beq.b		dst_qnan2
   5726src_nan:
   5727	cmp.b		STAG(%a6), &QNAN
   5728	beq.b		src_qnan2
   5729	global		res_snan_1op
   5730res_snan_1op:
   5731src_snan2:
   5732	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
   5733	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
   5734	lea		FP_SRC(%a6), %a0
   5735	bra.b		nan_comp
   5736	global		res_qnan_1op
   5737res_qnan_1op:
   5738src_qnan2:
   5739	or.l		&nan_mask, USER_FPSR(%a6)
   5740	lea		FP_SRC(%a6), %a0
   5741	bra.b		nan_comp
   5742dst_snan2:
   5743	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
   5744	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
   5745	lea		FP_DST(%a6), %a0
   5746	bra.b		nan_comp
   5747dst_qnan2:
   5748	lea		FP_DST(%a6), %a0
   5749	cmp.b		STAG(%a6), &SNAN
   5750	bne		nan_done
   5751	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
   5752nan_done:
   5753	or.l		&nan_mask, USER_FPSR(%a6)
   5754nan_comp:
   5755	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
   5756	beq.b		nan_not_neg
   5757	or.l		&neg_mask, USER_FPSR(%a6)
   5758nan_not_neg:
   5759	fmovm.x		(%a0), &0x80
   5760	rts
   5761
   5762#########################################################################
   5763# XDEF ****************************************************************	#
   5764#	res_operr(): return default result during operand error		#
   5765#									#
   5766# XREF ****************************************************************	#
   5767#	None								#
   5768#									#
   5769# INPUT ***************************************************************	#
   5770#	None								#
   5771#									#
   5772# OUTPUT **************************************************************	#
   5773#	fp0 = default operand error result				#
   5774#									#
   5775# ALGORITHM ***********************************************************	#
   5776#	An nonsignalling NAN is returned as the default result when	#
   5777# an operand error occurs for the following cases:			#
   5778#									#
   5779#	Multiply: (Infinity x Zero)					#
   5780#	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
   5781#									#
   5782#########################################################################
   5783
   5784	global		res_operr
   5785res_operr:
   5786	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
   5787	fmovm.x		nan_return(%pc), &0x80
   5788	rts
   5789
   5790nan_return:
   5791	long		0x7fff0000, 0xffffffff, 0xffffffff
   5792
   5793#########################################################################
   5794# XDEF ****************************************************************	#
   5795#	_denorm(): denormalize an intermediate result			#
   5796#									#
   5797# XREF ****************************************************************	#
   5798#	None								#
   5799#									#
   5800# INPUT *************************************************************** #
   5801#	a0 = points to the operand to be denormalized			#
   5802#		(in the internal extended format)			#
   5803#									#
   5804#	d0 = rounding precision						#
   5805#									#
   5806# OUTPUT **************************************************************	#
   5807#	a0 = pointer to the denormalized result				#
   5808#		(in the internal extended format)			#
   5809#									#
   5810#	d0 = guard,round,sticky						#
   5811#									#
   5812# ALGORITHM ***********************************************************	#
   5813#	According to the exponent underflow threshold for the given	#
   5814# precision, shift the mantissa bits to the right in order raise the	#
   5815# exponent of the operand to the threshold value. While shifting the	#
   5816# mantissa bits right, maintain the value of the guard, round, and	#
   5817# sticky bits.								#
   5818# other notes:								#
   5819#	(1) _denorm() is called by the underflow routines		#
   5820#	(2) _denorm() does NOT affect the status register		#
   5821#									#
   5822#########################################################################
   5823
   5824#
   5825# table of exponent threshold values for each precision
   5826#
   5827tbl_thresh:
   5828	short		0x0
   5829	short		sgl_thresh
   5830	short		dbl_thresh
   5831
   5832	global		_denorm
   5833_denorm:
   5834#
   5835# Load the exponent threshold for the precision selected and check
   5836# to see if (threshold - exponent) is > 65 in which case we can
   5837# simply calculate the sticky bit and zero the mantissa. otherwise
   5838# we have to call the denormalization routine.
   5839#
   5840	lsr.b		&0x2, %d0		# shift prec to lo bits
   5841	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
   5842	mov.w		%d1, %d0		# copy d1 into d0
   5843	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
   5844	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
   5845	bpl.b		denorm_set_stky		# yes; just calc sticky
   5846
   5847	clr.l		%d0			# clear g,r,s
   5848	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
   5849	beq.b		denorm_call		# no; don't change anything
   5850	bset		&29, %d0		# yes; set sticky bit
   5851
   5852denorm_call:
   5853	bsr.l		dnrm_lp			# denormalize the number
   5854	rts
   5855
   5856#
   5857# all bit would have been shifted off during the denorm so simply
   5858# calculate if the sticky should be set and clear the entire mantissa.
   5859#
   5860denorm_set_stky:
   5861	mov.l		&0x20000000, %d0	# set sticky bit in return value
   5862	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
   5863	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
   5864	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
   5865	rts
   5866
   5867#									#
   5868# dnrm_lp(): normalize exponent/mantissa to specified threshold		#
   5869#									#
   5870# INPUT:								#
   5871#	%a0	   : points to the operand to be denormalized		#
   5872#	%d0{31:29} : initial guard,round,sticky				#
   5873#	%d1{15:0}  : denormalization threshold				#
   5874# OUTPUT:								#
   5875#	%a0	   : points to the denormalized operand			#
   5876#	%d0{31:29} : final guard,round,sticky				#
   5877#									#
   5878
   5879# *** Local Equates *** #
   5880set	GRS,		L_SCR2			# g,r,s temp storage
   5881set	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
   5882
   5883	global		dnrm_lp
   5884dnrm_lp:
   5885
   5886#
   5887# make a copy of FTEMP_LO and place the g,r,s bits directly after it
   5888# in memory so as to make the bitfield extraction for denormalization easier.
   5889#
   5890	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
   5891	mov.l		%d0, GRS(%a6)		# place g,r,s after it
   5892
   5893#
   5894# check to see how much less than the underflow threshold the operand
   5895# exponent is.
   5896#
   5897	mov.l		%d1, %d0		# copy the denorm threshold
   5898	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
   5899	ble.b		dnrm_no_lp		# d1 <= 0
   5900	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
   5901	blt.b		case_1			# yes
   5902	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
   5903	blt.b		case_2			# yes
   5904	bra.w		case_3			# (d1 >= 64)
   5905
   5906#
   5907# No normalization necessary
   5908#
   5909dnrm_no_lp:
   5910	mov.l		GRS(%a6), %d0		# restore original g,r,s
   5911	rts
   5912
   5913#
   5914# case (0<d1<32)
   5915#
   5916# %d0 = denorm threshold
   5917# %d1 = "n" = amt to shift
   5918#
   5919#	---------------------------------------------------------
   5920#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
   5921#	---------------------------------------------------------
   5922#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
   5923#	\	   \		      \			 \
   5924#	 \	    \		       \		  \
   5925#	  \	     \			\		   \
   5926#	   \	      \			 \		    \
   5927#	    \	       \		  \		     \
   5928#	     \		\		   \		      \
   5929#	      \		 \		    \		       \
   5930#	       \	  \		     \			\
   5931#	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
   5932#	---------------------------------------------------------
   5933#	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
   5934#	---------------------------------------------------------
   5935#
   5936case_1:
   5937	mov.l		%d2, -(%sp)		# create temp storage
   5938
   5939	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
   5940	mov.l		&32, %d0
   5941	sub.w		%d1, %d0		# %d0 = 32 - %d1
   5942
   5943	cmpi.w		%d1, &29		# is shft amt >= 29
   5944	blt.b		case1_extract		# no; no fix needed
   5945	mov.b		GRS(%a6), %d2
   5946	or.b		%d2, 3+FTEMP_LO2(%a6)
   5947
   5948case1_extract:
   5949	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
   5950	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
   5951	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
   5952
   5953	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
   5954	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
   5955
   5956	bftst		%d0{&2:&30}		# were bits shifted off?
   5957	beq.b		case1_sticky_clear	# no; go finish
   5958	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
   5959
   5960case1_sticky_clear:
   5961	and.l		&0xe0000000, %d0	# clear all but G,R,S
   5962	mov.l		(%sp)+, %d2		# restore temp register
   5963	rts
   5964
   5965#
   5966# case (32<=d1<64)
   5967#
   5968# %d0 = denorm threshold
   5969# %d1 = "n" = amt to shift
   5970#
   5971#	---------------------------------------------------------
   5972#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
   5973#	---------------------------------------------------------
   5974#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
   5975#	\	   \		      \
   5976#	 \	    \		       \
   5977#	  \	     \			-------------------
   5978#	   \	      --------------------		   \
   5979#	    -------------------		  \		    \
   5980#			       \	   \		     \
   5981#				\	    \		      \
   5982#				 \	     \		       \
   5983#	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
   5984#	---------------------------------------------------------
   5985#	|0...............0|0....0| NEW_LO     |grs		|
   5986#	---------------------------------------------------------
   5987#
   5988case_2:
   5989	mov.l		%d2, -(%sp)		# create temp storage
   5990
   5991	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
   5992	subi.w		&0x20, %d1		# %d1 now between 0 and 32
   5993	mov.l		&0x20, %d0
   5994	sub.w		%d1, %d0		# %d0 = 32 - %d1
   5995
   5996# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
   5997# the number of bits to check for the sticky detect.
   5998# it only plays a role in shift amounts of 61-63.
   5999	mov.b		GRS(%a6), %d2
   6000	or.b		%d2, 3+FTEMP_LO2(%a6)
   6001
   6002	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
   6003	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
   6004
   6005	bftst		%d1{&2:&30}		# were any bits shifted off?
   6006	bne.b		case2_set_sticky	# yes; set sticky bit
   6007	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
   6008	bne.b		case2_set_sticky	# yes; set sticky bit
   6009
   6010	mov.l		%d1, %d0		# move new G,R,S to %d0
   6011	bra.b		case2_end
   6012
   6013case2_set_sticky:
   6014	mov.l		%d1, %d0		# move new G,R,S to %d0
   6015	bset		&rnd_stky_bit, %d0	# set sticky bit
   6016
   6017case2_end:
   6018	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
   6019	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
   6020	and.l		&0xe0000000, %d0	# clear all but G,R,S
   6021
   6022	mov.l		(%sp)+,%d2		# restore temp register
   6023	rts
   6024
   6025#
   6026# case (d1>=64)
   6027#
   6028# %d0 = denorm threshold
   6029# %d1 = amt to shift
   6030#
   6031case_3:
   6032	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
   6033
   6034	cmpi.w		%d1, &65		# is shift amt > 65?
   6035	blt.b		case3_64		# no; it's == 64
   6036	beq.b		case3_65		# no; it's == 65
   6037
   6038#
   6039# case (d1>65)
   6040#
   6041# Shift value is > 65 and out of range. All bits are shifted off.
   6042# Return a zero mantissa with the sticky bit set
   6043#
   6044	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
   6045	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
   6046	mov.l		&0x20000000, %d0	# set sticky bit
   6047	rts
   6048
   6049#
   6050# case (d1 == 64)
   6051#
   6052#	---------------------------------------------------------
   6053#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
   6054#	---------------------------------------------------------
   6055#	<-------(32)------>
   6056#	\		   \
   6057#	 \		    \
   6058#	  \		     \
   6059#	   \		      ------------------------------
   6060#	    -------------------------------		    \
   6061#					   \		     \
   6062#					    \		      \
   6063#					     \		       \
   6064#					      <-------(32)------>
   6065#	---------------------------------------------------------
   6066#	|0...............0|0................0|grs		|
   6067#	---------------------------------------------------------
   6068#
   6069case3_64:
   6070	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
   6071	mov.l		%d0, %d1		# make a copy
   6072	and.l		&0xc0000000, %d0	# extract G,R
   6073	and.l		&0x3fffffff, %d1	# extract other bits
   6074
   6075	bra.b		case3_complete
   6076
   6077#
   6078# case (d1 == 65)
   6079#
   6080#	---------------------------------------------------------
   6081#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
   6082#	---------------------------------------------------------
   6083#	<-------(32)------>
   6084#	\		   \
   6085#	 \		    \
   6086#	  \		     \
   6087#	   \		      ------------------------------
   6088#	    --------------------------------		    \
   6089#					    \		     \
   6090#					     \		      \
   6091#					      \		       \
   6092#					       <-------(31)----->
   6093#	---------------------------------------------------------
   6094#	|0...............0|0................0|0rs		|
   6095#	---------------------------------------------------------
   6096#
   6097case3_65:
   6098	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
   6099	and.l		&0x80000000, %d0	# extract R bit
   6100	lsr.l		&0x1, %d0		# shift high bit into R bit
   6101	and.l		&0x7fffffff, %d1	# extract other bits
   6102
   6103case3_complete:
   6104# last operation done was an "and" of the bits shifted off so the condition
   6105# codes are already set so branch accordingly.
   6106	bne.b		case3_set_sticky	# yes; go set new sticky
   6107	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
   6108	bne.b		case3_set_sticky	# yes; go set new sticky
   6109	tst.b		GRS(%a6)		# were any bits shifted off?
   6110	bne.b		case3_set_sticky	# yes; go set new sticky
   6111
   6112#
   6113# no bits were shifted off so don't set the sticky bit.
   6114# the guard and
   6115# the entire mantissa is zero.
   6116#
   6117	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
   6118	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
   6119	rts
   6120
   6121#
   6122# some bits were shifted off so set the sticky bit.
   6123# the entire mantissa is zero.
   6124#
   6125case3_set_sticky:
   6126	bset		&rnd_stky_bit,%d0	# set new sticky bit
   6127	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
   6128	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
   6129	rts
   6130
   6131#########################################################################
   6132# XDEF ****************************************************************	#
   6133#	_round(): round result according to precision/mode		#
   6134#									#
   6135# XREF ****************************************************************	#
   6136#	None								#
   6137#									#
   6138# INPUT ***************************************************************	#
   6139#	a0	  = ptr to input operand in internal extended format	#
   6140#	d1(hi)    = contains rounding precision:			#
   6141#			ext = $0000xxxx					#
   6142#			sgl = $0004xxxx					#
   6143#			dbl = $0008xxxx					#
   6144#	d1(lo)	  = contains rounding mode:				#
   6145#			RN  = $xxxx0000					#
   6146#			RZ  = $xxxx0001					#
   6147#			RM  = $xxxx0002					#
   6148#			RP  = $xxxx0003					#
   6149#	d0{31:29} = contains the g,r,s bits (extended)			#
   6150#									#
   6151# OUTPUT **************************************************************	#
   6152#	a0 = pointer to rounded result					#
   6153#									#
   6154# ALGORITHM ***********************************************************	#
   6155#	On return the value pointed to by a0 is correctly rounded,	#
   6156#	a0 is preserved and the g-r-s bits in d0 are cleared.		#
   6157#	The result is not typed - the tag field is invalid.  The	#
   6158#	result is still in the internal extended format.		#
   6159#									#
   6160#	The INEX bit of USER_FPSR will be set if the rounded result was	#
   6161#	inexact (i.e. if any of the g-r-s bits were set).		#
   6162#									#
   6163#########################################################################
   6164
   6165	global		_round
   6166_round:
   6167#
   6168# ext_grs() looks at the rounding precision and sets the appropriate
   6169# G,R,S bits.
   6170# If (G,R,S == 0) then result is exact and round is done, else set
   6171# the inex flag in status reg and continue.
   6172#
   6173	bsr.l		ext_grs			# extract G,R,S
   6174
   6175	tst.l		%d0			# are G,R,S zero?
   6176	beq.w		truncate		# yes; round is complete
   6177
   6178	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
   6179
   6180#
   6181# Use rounding mode as an index into a jump table for these modes.
   6182# All of the following assumes grs != 0.
   6183#
   6184	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
   6185	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
   6186
   6187tbl_mode:
   6188	short		rnd_near - tbl_mode
   6189	short		truncate - tbl_mode	# RZ always truncates
   6190	short		rnd_mnus - tbl_mode
   6191	short		rnd_plus - tbl_mode
   6192
   6193#################################################################
   6194#	ROUND PLUS INFINITY					#
   6195#								#
   6196#	If sign of fp number = 0 (positive), then add 1 to l.	#
   6197#################################################################
   6198rnd_plus:
   6199	tst.b		FTEMP_SGN(%a0)		# check for sign
   6200	bmi.w		truncate		# if positive then truncate
   6201
   6202	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
   6203	swap		%d1			# set up d1 for round prec.
   6204
   6205	cmpi.b		%d1, &s_mode		# is prec = sgl?
   6206	beq.w		add_sgl			# yes
   6207	bgt.w		add_dbl			# no; it's dbl
   6208	bra.w		add_ext			# no; it's ext
   6209
   6210#################################################################
   6211#	ROUND MINUS INFINITY					#
   6212#								#
   6213#	If sign of fp number = 1 (negative), then add 1 to l.	#
   6214#################################################################
   6215rnd_mnus:
   6216	tst.b		FTEMP_SGN(%a0)		# check for sign
   6217	bpl.w		truncate		# if negative then truncate
   6218
   6219	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
   6220	swap		%d1			# set up d1 for round prec.
   6221
   6222	cmpi.b		%d1, &s_mode		# is prec = sgl?
   6223	beq.w		add_sgl			# yes
   6224	bgt.w		add_dbl			# no; it's dbl
   6225	bra.w		add_ext			# no; it's ext
   6226
   6227#################################################################
   6228#	ROUND NEAREST						#
   6229#								#
   6230#	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
   6231#	Note that this will round to even in case of a tie.	#
   6232#################################################################
   6233rnd_near:
   6234	asl.l		&0x1, %d0		# shift g-bit to c-bit
   6235	bcc.w		truncate		# if (g=1) then
   6236
   6237	swap		%d1			# set up d1 for round prec.
   6238
   6239	cmpi.b		%d1, &s_mode		# is prec = sgl?
   6240	beq.w		add_sgl			# yes
   6241	bgt.w		add_dbl			# no; it's dbl
   6242	bra.w		add_ext			# no; it's ext
   6243
   6244# *** LOCAL EQUATES ***
   6245set	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
   6246set	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
   6247
   6248#########################
   6249#	ADD SINGLE	#
   6250#########################
   6251add_sgl:
   6252	add.l		&ad_1_sgl, FTEMP_HI(%a0)
   6253	bcc.b		scc_clr			# no mantissa overflow
   6254	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
   6255	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
   6256	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
   6257scc_clr:
   6258	tst.l		%d0			# test for rs = 0
   6259	bne.b		sgl_done
   6260	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
   6261sgl_done:
   6262	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
   6263	clr.l		FTEMP_LO(%a0)		# clear d2
   6264	rts
   6265
   6266#########################
   6267#	ADD EXTENDED	#
   6268#########################
   6269add_ext:
   6270	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
   6271	bcc.b		xcc_clr			# test for carry out
   6272	addq.l		&1,FTEMP_HI(%a0)	# propagate carry
   6273	bcc.b		xcc_clr
   6274	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
   6275	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
   6276	roxr.w		FTEMP_LO(%a0)
   6277	roxr.w		FTEMP_LO+2(%a0)
   6278	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
   6279xcc_clr:
   6280	tst.l		%d0			# test rs = 0
   6281	bne.b		add_ext_done
   6282	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
   6283add_ext_done:
   6284	rts
   6285
   6286#########################
   6287#	ADD DOUBLE	#
   6288#########################
   6289add_dbl:
   6290	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
   6291	bcc.b		dcc_clr			# no carry
   6292	addq.l		&0x1, FTEMP_HI(%a0)	# propagate carry
   6293	bcc.b		dcc_clr			# no carry
   6294
   6295	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
   6296	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
   6297	roxr.w		FTEMP_LO(%a0)
   6298	roxr.w		FTEMP_LO+2(%a0)
   6299	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
   6300dcc_clr:
   6301	tst.l		%d0			# test for rs = 0
   6302	bne.b		dbl_done
   6303	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
   6304
   6305dbl_done:
   6306	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
   6307	rts
   6308
   6309###########################
   6310# Truncate all other bits #
   6311###########################
   6312truncate:
   6313	swap		%d1			# select rnd prec
   6314
   6315	cmpi.b		%d1, &s_mode		# is prec sgl?
   6316	beq.w		sgl_done		# yes
   6317	bgt.b		dbl_done		# no; it's dbl
   6318	rts					# no; it's ext
   6319
   6320
   6321#
   6322# ext_grs(): extract guard, round and sticky bits according to
   6323#	     rounding precision.
   6324#
   6325# INPUT
   6326#	d0	   = extended precision g,r,s (in d0{31:29})
   6327#	d1	   = {PREC,ROUND}
   6328# OUTPUT
   6329#	d0{31:29}  = guard, round, sticky
   6330#
   6331# The ext_grs extract the guard/round/sticky bits according to the
   6332# selected rounding precision. It is called by the round subroutine
   6333# only.  All registers except d0 are kept intact. d0 becomes an
   6334# updated guard,round,sticky in d0{31:29}
   6335#
   6336# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
   6337#	 prior to usage, and needs to restore d1 to original. this
   6338#	 routine is tightly tied to the round routine and not meant to
   6339#	 uphold standard subroutine calling practices.
   6340#
   6341
   6342ext_grs:
   6343	swap		%d1			# have d1.w point to round precision
   6344	tst.b		%d1			# is rnd prec = extended?
   6345	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
   6346
   6347#
   6348# %d0 actually already hold g,r,s since _round() had it before calling
   6349# this function. so, as long as we don't disturb it, we are "returning" it.
   6350#
   6351ext_grs_ext:
   6352	swap		%d1			# yes; return to correct positions
   6353	rts
   6354
   6355ext_grs_not_ext:
   6356	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
   6357
   6358	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
   6359	bne.b		ext_grs_dbl		# no; go handle dbl
   6360
   6361#
   6362# sgl:
   6363#	96		64	  40	32		0
   6364#	-----------------------------------------------------
   6365#	| EXP	|XXXXXXX|	  |xx	|		|grs|
   6366#	-----------------------------------------------------
   6367#			<--(24)--->nn\			   /
   6368#				   ee ---------------------
   6369#				   ww		|
   6370#						v
   6371#				   gr	   new sticky
   6372#
   6373ext_grs_sgl:
   6374	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
   6375	mov.l		&30, %d2		# of the sgl prec. limits
   6376	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
   6377	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
   6378	and.l		&0x0000003f, %d2	# s bit is the or of all other
   6379	bne.b		ext_grs_st_stky		# bits to the right of g-r
   6380	tst.l		FTEMP_LO(%a0)		# test lower mantissa
   6381	bne.b		ext_grs_st_stky		# if any are set, set sticky
   6382	tst.l		%d0			# test original g,r,s
   6383	bne.b		ext_grs_st_stky		# if any are set, set sticky
   6384	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
   6385
   6386#
   6387# dbl:
   6388#	96		64		32	 11	0
   6389#	-----------------------------------------------------
   6390#	| EXP	|XXXXXXX|		|	 |xx	|grs|
   6391#	-----------------------------------------------------
   6392#						  nn\	    /
   6393#						  ee -------
   6394#						  ww	|
   6395#							v
   6396#						  gr	new sticky
   6397#
   6398ext_grs_dbl:
   6399	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
   6400	mov.l		&30, %d2		# of the dbl prec. limits
   6401	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
   6402	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
   6403	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
   6404	bne.b		ext_grs_st_stky		# other bits to the right of g-r
   6405	tst.l		%d0			# test word original g,r,s
   6406	bne.b		ext_grs_st_stky		# if any are set, set sticky
   6407	bra.b		ext_grs_end_sd		# if clear, exit
   6408
   6409ext_grs_st_stky:
   6410	bset		&rnd_stky_bit, %d3	# set sticky bit
   6411ext_grs_end_sd:
   6412	mov.l		%d3, %d0		# return grs to d0
   6413
   6414	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
   6415
   6416	swap		%d1			# restore d1 to original
   6417	rts
   6418
   6419#########################################################################
   6420# norm(): normalize the mantissa of an extended precision input. the	#
   6421#	  input operand should not be normalized already.		#
   6422#									#
   6423# XDEF ****************************************************************	#
   6424#	norm()								#
   6425#									#
   6426# XREF **************************************************************** #
   6427#	none								#
   6428#									#
   6429# INPUT *************************************************************** #
   6430#	a0 = pointer fp extended precision operand to normalize		#
   6431#									#
   6432# OUTPUT ************************************************************** #
   6433#	d0 = number of bit positions the mantissa was shifted		#
   6434#	a0 = the input operand's mantissa is normalized; the exponent	#
   6435#	     is unchanged.						#
   6436#									#
   6437#########################################################################
   6438	global		norm
   6439norm:
   6440	mov.l		%d2, -(%sp)		# create some temp regs
   6441	mov.l		%d3, -(%sp)
   6442
   6443	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
   6444	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
   6445
   6446	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
   6447	beq.b		norm_lo			# hi(man) is all zeroes!
   6448
   6449norm_hi:
   6450	lsl.l		%d2, %d0		# left shift hi(man)
   6451	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
   6452
   6453	or.l		%d3, %d0		# create hi(man)
   6454	lsl.l		%d2, %d1		# create lo(man)
   6455
   6456	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
   6457	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
   6458
   6459	mov.l		%d2, %d0		# return shift amount
   6460
   6461	mov.l		(%sp)+, %d3		# restore temp regs
   6462	mov.l		(%sp)+, %d2
   6463
   6464	rts
   6465
   6466norm_lo:
   6467	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
   6468	lsl.l		%d2, %d1		# shift lo(man)
   6469	add.l		&32, %d2		# add 32 to shft amount
   6470
   6471	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
   6472	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
   6473
   6474	mov.l		%d2, %d0		# return shift amount
   6475
   6476	mov.l		(%sp)+, %d3		# restore temp regs
   6477	mov.l		(%sp)+, %d2
   6478
   6479	rts
   6480
   6481#########################################################################
   6482# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
   6483#		- returns corresponding optype tag			#
   6484#									#
   6485# XDEF ****************************************************************	#
   6486#	unnorm_fix()							#
   6487#									#
   6488# XREF **************************************************************** #
   6489#	norm() - normalize the mantissa					#
   6490#									#
   6491# INPUT *************************************************************** #
   6492#	a0 = pointer to unnormalized extended precision number		#
   6493#									#
   6494# OUTPUT ************************************************************** #
   6495#	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
   6496#	a0 = input operand has been converted to a norm, denorm, or	#
   6497#	     zero; both the exponent and mantissa are changed.		#
   6498#									#
   6499#########################################################################
   6500
   6501	global		unnorm_fix
   6502unnorm_fix:
   6503	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
   6504	bne.b		unnorm_shift		# hi(man) is not all zeroes
   6505
   6506#
   6507# hi(man) is all zeroes so see if any bits in lo(man) are set
   6508#
   6509unnorm_chk_lo:
   6510	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
   6511	beq.w		unnorm_zero		# yes
   6512
   6513	add.w		&32, %d0		# no; fix shift distance
   6514
   6515#
   6516# d0 = # shifts needed for complete normalization
   6517#
   6518unnorm_shift:
   6519	clr.l		%d1			# clear top word
   6520	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
   6521	and.w		&0x7fff, %d1		# strip off sgn
   6522
   6523	cmp.w		%d0, %d1		# will denorm push exp < 0?
   6524	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
   6525
   6526#
   6527# exponent would not go < 0. Therefore, number stays normalized
   6528#
   6529	sub.w		%d0, %d1		# shift exponent value
   6530	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
   6531	and.w		&0x8000, %d0		# save old sign
   6532	or.w		%d0, %d1		# {sgn,new exp}
   6533	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
   6534
   6535	bsr.l		norm			# normalize UNNORM
   6536
   6537	mov.b		&NORM, %d0		# return new optype tag
   6538	rts
   6539
   6540#
   6541# exponent would go < 0, so only denormalize until exp = 0
   6542#
   6543unnorm_nrm_zero:
   6544	cmp.b		%d1, &32		# is exp <= 32?
   6545	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
   6546
   6547	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
   6548	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
   6549
   6550	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
   6551	lsl.l		%d1, %d0		# extract new lo(man)
   6552	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
   6553
   6554	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
   6555
   6556	mov.b		&DENORM, %d0		# return new optype tag
   6557	rts
   6558
   6559#
   6560# only mantissa bits set are in lo(man)
   6561#
   6562unnorm_nrm_zero_lrg:
   6563	sub.w		&32, %d1		# adjust shft amt by 32
   6564
   6565	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
   6566	lsl.l		%d1, %d0		# left shift lo(man)
   6567
   6568	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
   6569	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
   6570
   6571	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
   6572
   6573	mov.b		&DENORM, %d0		# return new optype tag
   6574	rts
   6575
   6576#
   6577# whole mantissa is zero so this UNNORM is actually a zero
   6578#
   6579unnorm_zero:
   6580	and.w		&0x8000, FTEMP_EX(%a0)	# force exponent to zero
   6581
   6582	mov.b		&ZERO, %d0		# fix optype tag
   6583	rts
   6584
   6585#########################################################################
   6586# XDEF ****************************************************************	#
   6587#	set_tag_x(): return the optype of the input ext fp number	#
   6588#									#
   6589# XREF ****************************************************************	#
   6590#	None								#
   6591#									#
   6592# INPUT ***************************************************************	#
   6593#	a0 = pointer to extended precision operand			#
   6594#									#
   6595# OUTPUT **************************************************************	#
   6596#	d0 = value of type tag						#
   6597#		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
   6598#									#
   6599# ALGORITHM ***********************************************************	#
   6600#	Simply test the exponent, j-bit, and mantissa values to		#
   6601# determine the type of operand.					#
   6602#	If it's an unnormalized zero, alter the operand and force it	#
   6603# to be a normal zero.							#
   6604#									#
   6605#########################################################################
   6606
   6607	global		set_tag_x
   6608set_tag_x:
   6609	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
   6610	andi.w		&0x7fff, %d0		# strip off sign
   6611	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
   6612	beq.b		inf_or_nan_x
   6613not_inf_or_nan_x:
   6614	btst		&0x7,FTEMP_HI(%a0)
   6615	beq.b		not_norm_x
   6616is_norm_x:
   6617	mov.b		&NORM, %d0
   6618	rts
   6619not_norm_x:
   6620	tst.w		%d0			# is exponent = 0?
   6621	bne.b		is_unnorm_x
   6622not_unnorm_x:
   6623	tst.l		FTEMP_HI(%a0)
   6624	bne.b		is_denorm_x
   6625	tst.l		FTEMP_LO(%a0)
   6626	bne.b		is_denorm_x
   6627is_zero_x:
   6628	mov.b		&ZERO, %d0
   6629	rts
   6630is_denorm_x:
   6631	mov.b		&DENORM, %d0
   6632	rts
   6633# must distinguish now "Unnormalized zeroes" which we
   6634# must convert to zero.
   6635is_unnorm_x:
   6636	tst.l		FTEMP_HI(%a0)
   6637	bne.b		is_unnorm_reg_x
   6638	tst.l		FTEMP_LO(%a0)
   6639	bne.b		is_unnorm_reg_x
   6640# it's an "unnormalized zero". let's convert it to an actual zero...
   6641	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
   6642	mov.b		&ZERO, %d0
   6643	rts
   6644is_unnorm_reg_x:
   6645	mov.b		&UNNORM, %d0
   6646	rts
   6647inf_or_nan_x:
   6648	tst.l		FTEMP_LO(%a0)
   6649	bne.b		is_nan_x
   6650	mov.l		FTEMP_HI(%a0), %d0
   6651	and.l		&0x7fffffff, %d0	# msb is a don't care!
   6652	bne.b		is_nan_x
   6653is_inf_x:
   6654	mov.b		&INF, %d0
   6655	rts
   6656is_nan_x:
   6657	btst		&0x6, FTEMP_HI(%a0)
   6658	beq.b		is_snan_x
   6659	mov.b		&QNAN, %d0
   6660	rts
   6661is_snan_x:
   6662	mov.b		&SNAN, %d0
   6663	rts
   6664
   6665#########################################################################
   6666# XDEF ****************************************************************	#
   6667#	set_tag_d(): return the optype of the input dbl fp number	#
   6668#									#
   6669# XREF ****************************************************************	#
   6670#	None								#
   6671#									#
   6672# INPUT ***************************************************************	#
   6673#	a0 = points to double precision operand				#
   6674#									#
   6675# OUTPUT **************************************************************	#
   6676#	d0 = value of type tag						#
   6677#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
   6678#									#
   6679# ALGORITHM ***********************************************************	#
   6680#	Simply test the exponent, j-bit, and mantissa values to		#
   6681# determine the type of operand.					#
   6682#									#
   6683#########################################################################
   6684
   6685	global		set_tag_d
   6686set_tag_d:
   6687	mov.l		FTEMP(%a0), %d0
   6688	mov.l		%d0, %d1
   6689
   6690	andi.l		&0x7ff00000, %d0
   6691	beq.b		zero_or_denorm_d
   6692
   6693	cmpi.l		%d0, &0x7ff00000
   6694	beq.b		inf_or_nan_d
   6695
   6696is_norm_d:
   6697	mov.b		&NORM, %d0
   6698	rts
   6699zero_or_denorm_d:
   6700	and.l		&0x000fffff, %d1
   6701	bne		is_denorm_d
   6702	tst.l		4+FTEMP(%a0)
   6703	bne		is_denorm_d
   6704is_zero_d:
   6705	mov.b		&ZERO, %d0
   6706	rts
   6707is_denorm_d:
   6708	mov.b		&DENORM, %d0
   6709	rts
   6710inf_or_nan_d:
   6711	and.l		&0x000fffff, %d1
   6712	bne		is_nan_d
   6713	tst.l		4+FTEMP(%a0)
   6714	bne		is_nan_d
   6715is_inf_d:
   6716	mov.b		&INF, %d0
   6717	rts
   6718is_nan_d:
   6719	btst		&19, %d1
   6720	bne		is_qnan_d
   6721is_snan_d:
   6722	mov.b		&SNAN, %d0
   6723	rts
   6724is_qnan_d:
   6725	mov.b		&QNAN, %d0
   6726	rts
   6727
   6728#########################################################################
   6729# XDEF ****************************************************************	#
   6730#	set_tag_s(): return the optype of the input sgl fp number	#
   6731#									#
   6732# XREF ****************************************************************	#
   6733#	None								#
   6734#									#
   6735# INPUT ***************************************************************	#
   6736#	a0 = pointer to single precision operand			#
   6737#									#
   6738# OUTPUT **************************************************************	#
   6739#	d0 = value of type tag						#
   6740#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
   6741#									#
   6742# ALGORITHM ***********************************************************	#
   6743#	Simply test the exponent, j-bit, and mantissa values to		#
   6744# determine the type of operand.					#
   6745#									#
   6746#########################################################################
   6747
   6748	global		set_tag_s
   6749set_tag_s:
   6750	mov.l		FTEMP(%a0), %d0
   6751	mov.l		%d0, %d1
   6752
   6753	andi.l		&0x7f800000, %d0
   6754	beq.b		zero_or_denorm_s
   6755
   6756	cmpi.l		%d0, &0x7f800000
   6757	beq.b		inf_or_nan_s
   6758
   6759is_norm_s:
   6760	mov.b		&NORM, %d0
   6761	rts
   6762zero_or_denorm_s:
   6763	and.l		&0x007fffff, %d1
   6764	bne		is_denorm_s
   6765is_zero_s:
   6766	mov.b		&ZERO, %d0
   6767	rts
   6768is_denorm_s:
   6769	mov.b		&DENORM, %d0
   6770	rts
   6771inf_or_nan_s:
   6772	and.l		&0x007fffff, %d1
   6773	bne		is_nan_s
   6774is_inf_s:
   6775	mov.b		&INF, %d0
   6776	rts
   6777is_nan_s:
   6778	btst		&22, %d1
   6779	bne		is_qnan_s
   6780is_snan_s:
   6781	mov.b		&SNAN, %d0
   6782	rts
   6783is_qnan_s:
   6784	mov.b		&QNAN, %d0
   6785	rts
   6786
   6787#########################################################################
   6788# XDEF ****************************************************************	#
   6789#	unf_res(): routine to produce default underflow result of a	#
   6790#		   scaled extended precision number; this is used by	#
   6791#		   fadd/fdiv/fmul/etc. emulation routines.		#
   6792#	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
   6793#		    single round prec and extended prec mode.		#
   6794#									#
   6795# XREF ****************************************************************	#
   6796#	_denorm() - denormalize according to scale factor		#
   6797#	_round() - round denormalized number according to rnd prec	#
   6798#									#
   6799# INPUT ***************************************************************	#
   6800#	a0 = pointer to extended precison operand			#
   6801#	d0 = scale factor						#
   6802#	d1 = rounding precision/mode					#
   6803#									#
   6804# OUTPUT **************************************************************	#
   6805#	a0 = pointer to default underflow result in extended precision	#
   6806#	d0.b = result FPSR_cc which caller may or may not want to save	#
   6807#									#
   6808# ALGORITHM ***********************************************************	#
   6809#	Convert the input operand to "internal format" which means the	#
   6810# exponent is extended to 16 bits and the sign is stored in the unused	#
   6811# portion of the extended precison operand. Denormalize the number	#
   6812# according to the scale factor passed in d0. Then, round the		#
   6813# denormalized result.							#
   6814#	Set the FPSR_exc bits as appropriate but return the cc bits in	#
   6815# d0 in case the caller doesn't want to save them (as is the case for	#
   6816# fmove out).								#
   6817#	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
   6818# precision and the rounding mode to single.				#
   6819#									#
   6820#########################################################################
   6821	global		unf_res
   6822unf_res:
   6823	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
   6824
   6825	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
   6826	sne		FTEMP_SGN(%a0)
   6827
   6828	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
   6829	and.w		&0x7fff, %d1
   6830	sub.w		%d0, %d1
   6831	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
   6832
   6833	mov.l		%a0, -(%sp)		# save operand ptr during calls
   6834
   6835	mov.l		0x4(%sp),%d0		# pass rnd prec.
   6836	andi.w		&0x00c0,%d0
   6837	lsr.w		&0x4,%d0
   6838	bsr.l		_denorm			# denorm result
   6839
   6840	mov.l		(%sp),%a0
   6841	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
   6842	andi.w		&0xc0,%d1		# extract rnd prec
   6843	lsr.w		&0x4,%d1
   6844	swap		%d1
   6845	mov.w		0x6(%sp),%d1
   6846	andi.w		&0x30,%d1
   6847	lsr.w		&0x4,%d1
   6848	bsr.l		_round			# round the denorm
   6849
   6850	mov.l		(%sp)+, %a0
   6851
   6852# result is now rounded properly. convert back to normal format
   6853	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
   6854	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
   6855	beq.b		unf_res_chkifzero	# no; result is positive
   6856	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
   6857	clr.b		FTEMP_SGN(%a0)		# clear temp sign
   6858
   6859# the number may have become zero after rounding. set ccodes accordingly.
   6860unf_res_chkifzero:
   6861	clr.l		%d0
   6862	tst.l		FTEMP_HI(%a0)		# is value now a zero?
   6863	bne.b		unf_res_cont		# no
   6864	tst.l		FTEMP_LO(%a0)
   6865	bne.b		unf_res_cont		# no
   6866#	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
   6867	bset		&z_bit, %d0		# yes; set zero ccode bit
   6868
   6869unf_res_cont:
   6870
   6871#
   6872# can inex1 also be set along with unfl and inex2???
   6873#
   6874# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
   6875#
   6876	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
   6877	beq.b		unf_res_end		# no
   6878	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
   6879
   6880unf_res_end:
   6881	add.l		&0x4, %sp		# clear stack
   6882	rts
   6883
   6884# unf_res() for fsglmul() and fsgldiv().
   6885	global		unf_res4
   6886unf_res4:
   6887	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
   6888
   6889	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
   6890	sne		FTEMP_SGN(%a0)
   6891
   6892	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
   6893	and.w		&0x7fff,%d1
   6894	sub.w		%d0,%d1
   6895	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
   6896
   6897	mov.l		%a0,-(%sp)		# save operand ptr during calls
   6898
   6899	clr.l		%d0			# force rnd prec = ext
   6900	bsr.l		_denorm			# denorm result
   6901
   6902	mov.l		(%sp),%a0
   6903	mov.w		&s_mode,%d1		# force rnd prec = sgl
   6904	swap		%d1
   6905	mov.w		0x6(%sp),%d1		# load rnd mode
   6906	andi.w		&0x30,%d1		# extract rnd prec
   6907	lsr.w		&0x4,%d1
   6908	bsr.l		_round			# round the denorm
   6909
   6910	mov.l		(%sp)+,%a0
   6911
   6912# result is now rounded properly. convert back to normal format
   6913	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
   6914	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
   6915	beq.b		unf_res4_chkifzero	# no; result is positive
   6916	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
   6917	clr.b		FTEMP_SGN(%a0)		# clear temp sign
   6918
   6919# the number may have become zero after rounding. set ccodes accordingly.
   6920unf_res4_chkifzero:
   6921	clr.l		%d0
   6922	tst.l		FTEMP_HI(%a0)		# is value now a zero?
   6923	bne.b		unf_res4_cont		# no
   6924	tst.l		FTEMP_LO(%a0)
   6925	bne.b		unf_res4_cont		# no
   6926#	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
   6927	bset		&z_bit,%d0		# yes; set zero ccode bit
   6928
   6929unf_res4_cont:
   6930
   6931#
   6932# can inex1 also be set along with unfl and inex2???
   6933#
   6934# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
   6935#
   6936	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
   6937	beq.b		unf_res4_end		# no
   6938	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
   6939
   6940unf_res4_end:
   6941	add.l		&0x4,%sp		# clear stack
   6942	rts
   6943
   6944#########################################################################
   6945# XDEF ****************************************************************	#
   6946#	ovf_res(): routine to produce the default overflow result of	#
   6947#		   an overflowing number.				#
   6948#	ovf_res2(): same as above but the rnd mode/prec are passed	#
   6949#		    differently.					#
   6950#									#
   6951# XREF ****************************************************************	#
   6952#	none								#
   6953#									#
   6954# INPUT ***************************************************************	#
   6955#	d1.b	= '-1' => (-); '0' => (+)				#
   6956#   ovf_res():								#
   6957#	d0	= rnd mode/prec						#
   6958#   ovf_res2():								#
   6959#	hi(d0)	= rnd prec						#
   6960#	lo(d0)	= rnd mode						#
   6961#									#
   6962# OUTPUT **************************************************************	#
   6963#	a0	= points to extended precision result			#
   6964#	d0.b	= condition code bits					#
   6965#									#
   6966# ALGORITHM ***********************************************************	#
   6967#	The default overflow result can be determined by the sign of	#
   6968# the result and the rounding mode/prec in effect. These bits are	#
   6969# concatenated together to create an index into the default result	#
   6970# table. A pointer to the correct result is returned in a0. The		#
   6971# resulting condition codes are returned in d0 in case the caller	#
   6972# doesn't want FPSR_cc altered (as is the case for fmove out).		#
   6973#									#
   6974#########################################################################
   6975
   6976	global		ovf_res
   6977ovf_res:
   6978	andi.w		&0x10,%d1		# keep result sign
   6979	lsr.b		&0x4,%d0		# shift prec/mode
   6980	or.b		%d0,%d1			# concat the two
   6981	mov.w		%d1,%d0			# make a copy
   6982	lsl.b		&0x1,%d1		# multiply d1 by 2
   6983	bra.b		ovf_res_load
   6984
   6985	global		ovf_res2
   6986ovf_res2:
   6987	and.w		&0x10, %d1		# keep result sign
   6988	or.b		%d0, %d1		# insert rnd mode
   6989	swap		%d0
   6990	or.b		%d0, %d1		# insert rnd prec
   6991	mov.w		%d1, %d0		# make a copy
   6992	lsl.b		&0x1, %d1		# shift left by 1
   6993
   6994#
   6995# use the rounding mode, precision, and result sign as in index into the
   6996# two tables below to fetch the default result and the result ccodes.
   6997#
   6998ovf_res_load:
   6999	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
   7000	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
   7001
   7002	rts
   7003
   7004tbl_ovfl_cc:
   7005	byte		0x2, 0x0, 0x0, 0x2
   7006	byte		0x2, 0x0, 0x0, 0x2
   7007	byte		0x2, 0x0, 0x0, 0x2
   7008	byte		0x0, 0x0, 0x0, 0x0
   7009	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
   7010	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
   7011	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
   7012
   7013tbl_ovfl_result:
   7014	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
   7015	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
   7016	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
   7017	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
   7018
   7019	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
   7020	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
   7021	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
   7022	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
   7023
   7024	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
   7025	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
   7026	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
   7027	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
   7028
   7029	long		0x00000000,0x00000000,0x00000000,0x00000000
   7030	long		0x00000000,0x00000000,0x00000000,0x00000000
   7031	long		0x00000000,0x00000000,0x00000000,0x00000000
   7032	long		0x00000000,0x00000000,0x00000000,0x00000000
   7033
   7034	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
   7035	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
   7036	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
   7037	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
   7038
   7039	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
   7040	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
   7041	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
   7042	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
   7043
   7044	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
   7045	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
   7046	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
   7047	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
   7048
   7049#########################################################################
   7050# XDEF ****************************************************************	#
   7051#	fout(): move from fp register to memory or data register	#
   7052#									#
   7053# XREF ****************************************************************	#
   7054#	_round() - needed to create EXOP for sgl/dbl precision		#
   7055#	norm() - needed to create EXOP for extended precision		#
   7056#	ovf_res() - create default overflow result for sgl/dbl precision#
   7057#	unf_res() - create default underflow result for sgl/dbl prec.	#
   7058#	dst_dbl() - create rounded dbl precision result.		#
   7059#	dst_sgl() - create rounded sgl precision result.		#
   7060#	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
   7061#	bindec() - convert FP binary number to packed number.		#
   7062#	_mem_write() - write data to memory.				#
   7063#	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
   7064#	_dmem_write_{byte,word,long}() - write data to memory.		#
   7065#	store_dreg_{b,w,l}() - store data to data register file.	#
   7066#	facc_out_{b,w,l,d,x}() - data access error occurred.		#
   7067#									#
   7068# INPUT ***************************************************************	#
   7069#	a0 = pointer to extended precision source operand		#
   7070#	d0 = round prec,mode						#
   7071#									#
   7072# OUTPUT **************************************************************	#
   7073#	fp0 : intermediate underflow or overflow result if		#
   7074#	      OVFL/UNFL occurred for a sgl or dbl operand		#
   7075#									#
   7076# ALGORITHM ***********************************************************	#
   7077#	This routine is accessed by many handlers that need to do an	#
   7078# opclass three move of an operand out to memory.			#
   7079#	Decode an fmove out (opclass 3) instruction to determine if	#
   7080# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
   7081# register or memory. The algorithm uses a standard "fmove" to create	#
   7082# the rounded result. Also, since exceptions are disabled, this also	#
   7083# create the correct OPERR default result if appropriate.		#
   7084#	For sgl or dbl precision, overflow or underflow can occur. If	#
   7085# either occurs and is enabled, the EXOP.				#
   7086#	For extended precision, the stacked <ea> must be fixed along	#
   7087# w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
   7088# the source is a denorm and if underflow is enabled, an EXOP must be	#
   7089# created.								#
   7090#	For packed, the k-factor must be fetched from the instruction	#
   7091# word or a data register. The <ea> must be fixed as w/ extended	#
   7092# precision. Then, bindec() is called to create the appropriate		#
   7093# packed result.							#
   7094#	If at any time an access error is flagged by one of the move-	#
   7095# to-memory routines, then a special exit must be made so that the	#
   7096# access error can be handled properly.					#
   7097#									#
   7098#########################################################################
   7099
   7100	global		fout
   7101fout:
   7102	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
   7103	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
   7104	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
   7105
   7106	swbeg		&0x8
   7107tbl_fout:
   7108	short		fout_long	-	tbl_fout
   7109	short		fout_sgl	-	tbl_fout
   7110	short		fout_ext	-	tbl_fout
   7111	short		fout_pack	-	tbl_fout
   7112	short		fout_word	-	tbl_fout
   7113	short		fout_dbl	-	tbl_fout
   7114	short		fout_byte	-	tbl_fout
   7115	short		fout_pack	-	tbl_fout
   7116
   7117#################################################################
   7118# fmove.b out ###################################################
   7119#################################################################
   7120
   7121# Only "Unimplemented Data Type" exceptions enter here. The operand
   7122# is either a DENORM or a NORM.
   7123fout_byte:
   7124	tst.b		STAG(%a6)		# is operand normalized?
   7125	bne.b		fout_byte_denorm	# no
   7126
   7127	fmovm.x		SRC(%a0),&0x80		# load value
   7128
   7129fout_byte_norm:
   7130	fmov.l		%d0,%fpcr		# insert rnd prec,mode
   7131
   7132	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
   7133
   7134	fmov.l		&0x0,%fpcr		# clear FPCR
   7135	fmov.l		%fpsr,%d1		# fetch FPSR
   7136	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
   7137
   7138	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   7139	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   7140	beq.b		fout_byte_dn		# must save to integer regfile
   7141
   7142	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   7143	bsr.l		_dmem_write_byte	# write byte
   7144
   7145	tst.l		%d1			# did dstore fail?
   7146	bne.l		facc_out_b		# yes
   7147
   7148	rts
   7149
   7150fout_byte_dn:
   7151	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   7152	andi.w		&0x7,%d1
   7153	bsr.l		store_dreg_b
   7154	rts
   7155
   7156fout_byte_denorm:
   7157	mov.l		SRC_EX(%a0),%d1
   7158	andi.l		&0x80000000,%d1		# keep DENORM sign
   7159	ori.l		&0x00800000,%d1		# make smallest sgl
   7160	fmov.s		%d1,%fp0
   7161	bra.b		fout_byte_norm
   7162
   7163#################################################################
   7164# fmove.w out ###################################################
   7165#################################################################
   7166
   7167# Only "Unimplemented Data Type" exceptions enter here. The operand
   7168# is either a DENORM or a NORM.
   7169fout_word:
   7170	tst.b		STAG(%a6)		# is operand normalized?
   7171	bne.b		fout_word_denorm	# no
   7172
   7173	fmovm.x		SRC(%a0),&0x80		# load value
   7174
   7175fout_word_norm:
   7176	fmov.l		%d0,%fpcr		# insert rnd prec:mode
   7177
   7178	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
   7179
   7180	fmov.l		&0x0,%fpcr		# clear FPCR
   7181	fmov.l		%fpsr,%d1		# fetch FPSR
   7182	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
   7183
   7184	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   7185	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   7186	beq.b		fout_word_dn		# must save to integer regfile
   7187
   7188	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   7189	bsr.l		_dmem_write_word	# write word
   7190
   7191	tst.l		%d1			# did dstore fail?
   7192	bne.l		facc_out_w		# yes
   7193
   7194	rts
   7195
   7196fout_word_dn:
   7197	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   7198	andi.w		&0x7,%d1
   7199	bsr.l		store_dreg_w
   7200	rts
   7201
   7202fout_word_denorm:
   7203	mov.l		SRC_EX(%a0),%d1
   7204	andi.l		&0x80000000,%d1		# keep DENORM sign
   7205	ori.l		&0x00800000,%d1		# make smallest sgl
   7206	fmov.s		%d1,%fp0
   7207	bra.b		fout_word_norm
   7208
   7209#################################################################
   7210# fmove.l out ###################################################
   7211#################################################################
   7212
   7213# Only "Unimplemented Data Type" exceptions enter here. The operand
   7214# is either a DENORM or a NORM.
   7215fout_long:
   7216	tst.b		STAG(%a6)		# is operand normalized?
   7217	bne.b		fout_long_denorm	# no
   7218
   7219	fmovm.x		SRC(%a0),&0x80		# load value
   7220
   7221fout_long_norm:
   7222	fmov.l		%d0,%fpcr		# insert rnd prec:mode
   7223
   7224	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
   7225
   7226	fmov.l		&0x0,%fpcr		# clear FPCR
   7227	fmov.l		%fpsr,%d1		# fetch FPSR
   7228	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
   7229
   7230fout_long_write:
   7231	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   7232	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   7233	beq.b		fout_long_dn		# must save to integer regfile
   7234
   7235	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   7236	bsr.l		_dmem_write_long	# write long
   7237
   7238	tst.l		%d1			# did dstore fail?
   7239	bne.l		facc_out_l		# yes
   7240
   7241	rts
   7242
   7243fout_long_dn:
   7244	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   7245	andi.w		&0x7,%d1
   7246	bsr.l		store_dreg_l
   7247	rts
   7248
   7249fout_long_denorm:
   7250	mov.l		SRC_EX(%a0),%d1
   7251	andi.l		&0x80000000,%d1		# keep DENORM sign
   7252	ori.l		&0x00800000,%d1		# make smallest sgl
   7253	fmov.s		%d1,%fp0
   7254	bra.b		fout_long_norm
   7255
   7256#################################################################
   7257# fmove.x out ###################################################
   7258#################################################################
   7259
   7260# Only "Unimplemented Data Type" exceptions enter here. The operand
   7261# is either a DENORM or a NORM.
   7262# The DENORM causes an Underflow exception.
   7263fout_ext:
   7264
   7265# we copy the extended precision result to FP_SCR0 so that the reserved
   7266# 16-bit field gets zeroed. we do this since we promise not to disturb
   7267# what's at SRC(a0).
   7268	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   7269	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
   7270	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   7271	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   7272
   7273	fmovm.x		SRC(%a0),&0x80		# return result
   7274
   7275	bsr.l		_calc_ea_fout		# fix stacked <ea>
   7276
   7277	mov.l		%a0,%a1			# pass: dst addr
   7278	lea		FP_SCR0(%a6),%a0	# pass: src addr
   7279	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
   7280
   7281# we must not yet write the extended precision data to the stack
   7282# in the pre-decrement case from supervisor mode or else we'll corrupt
   7283# the stack frame. so, leave it in FP_SRC for now and deal with it later...
   7284	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   7285	beq.b		fout_ext_a7
   7286
   7287	bsr.l		_dmem_write		# write ext prec number to memory
   7288
   7289	tst.l		%d1			# did dstore fail?
   7290	bne.w		fout_ext_err		# yes
   7291
   7292	tst.b		STAG(%a6)		# is operand normalized?
   7293	bne.b		fout_ext_denorm		# no
   7294	rts
   7295
   7296# the number is a DENORM. must set the underflow exception bit
   7297fout_ext_denorm:
   7298	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
   7299
   7300	mov.b		FPCR_ENABLE(%a6),%d0
   7301	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
   7302	bne.b		fout_ext_exc		# yes
   7303	rts
   7304
   7305# we don't want to do the write if the exception occurred in supervisor mode
   7306# so _mem_write2() handles this for us.
   7307fout_ext_a7:
   7308	bsr.l		_mem_write2		# write ext prec number to memory
   7309
   7310	tst.l		%d1			# did dstore fail?
   7311	bne.w		fout_ext_err		# yes
   7312
   7313	tst.b		STAG(%a6)		# is operand normalized?
   7314	bne.b		fout_ext_denorm		# no
   7315	rts
   7316
   7317fout_ext_exc:
   7318	lea		FP_SCR0(%a6),%a0
   7319	bsr.l		norm			# normalize the mantissa
   7320	neg.w		%d0			# new exp = -(shft amt)
   7321	andi.w		&0x7fff,%d0
   7322	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
   7323	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   7324	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   7325	rts
   7326
   7327fout_ext_err:
   7328	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
   7329	bra.l		facc_out_x
   7330
   7331#########################################################################
   7332# fmove.s out ###########################################################
   7333#########################################################################
   7334fout_sgl:
   7335	andi.b		&0x30,%d0		# clear rnd prec
   7336	ori.b		&s_mode*0x10,%d0	# insert sgl prec
   7337	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
   7338
   7339#
   7340# operand is a normalized number. first, we check to see if the move out
   7341# would cause either an underflow or overflow. these cases are handled
   7342# separately. otherwise, set the FPCR to the proper rounding mode and
   7343# execute the move.
   7344#
   7345	mov.w		SRC_EX(%a0),%d0		# extract exponent
   7346	andi.w		&0x7fff,%d0		# strip sign
   7347
   7348	cmpi.w		%d0,&SGL_HI		# will operand overflow?
   7349	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
   7350	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
   7351	cmpi.w		%d0,&SGL_LO		# will operand underflow?
   7352	blt.w		fout_sgl_unfl		# yes; go handle underflow
   7353
   7354#
   7355# NORMs(in range) can be stored out by a simple "fmov.s"
   7356# Unnormalized inputs can come through this point.
   7357#
   7358fout_sgl_exg:
   7359	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
   7360
   7361	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   7362	fmov.l		&0x0,%fpsr		# clear FPSR
   7363
   7364	fmov.s		%fp0,%d0		# store does convert and round
   7365
   7366	fmov.l		&0x0,%fpcr		# clear FPCR
   7367	fmov.l		%fpsr,%d1		# save FPSR
   7368
   7369	or.w		%d1,2+USER_FPSR(%a6)	# set possible inex2/ainex
   7370
   7371fout_sgl_exg_write:
   7372	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   7373	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   7374	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
   7375
   7376	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   7377	bsr.l		_dmem_write_long	# write long
   7378
   7379	tst.l		%d1			# did dstore fail?
   7380	bne.l		facc_out_l		# yes
   7381
   7382	rts
   7383
   7384fout_sgl_exg_write_dn:
   7385	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   7386	andi.w		&0x7,%d1
   7387	bsr.l		store_dreg_l
   7388	rts
   7389
   7390#
   7391# here, we know that the operand would UNFL if moved out to single prec,
   7392# so, denorm and round and then use generic store single routine to
   7393# write the value to memory.
   7394#
   7395fout_sgl_unfl:
   7396	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
   7397
   7398	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   7399	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   7400	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   7401	mov.l		%a0,-(%sp)
   7402
   7403	clr.l		%d0			# pass: S.F. = 0
   7404
   7405	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
   7406	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
   7407
   7408	lea		FP_SCR0(%a6),%a0
   7409	bsr.l		norm			# normalize the DENORM
   7410
   7411fout_sgl_unfl_cont:
   7412	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   7413	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   7414	bsr.l		unf_res			# calc default underflow result
   7415
   7416	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
   7417	bsr.l		dst_sgl			# convert to single prec
   7418
   7419	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   7420	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   7421	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
   7422
   7423	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   7424	bsr.l		_dmem_write_long	# write long
   7425
   7426	tst.l		%d1			# did dstore fail?
   7427	bne.l		facc_out_l		# yes
   7428
   7429	bra.b		fout_sgl_unfl_chkexc
   7430
   7431fout_sgl_unfl_dn:
   7432	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   7433	andi.w		&0x7,%d1
   7434	bsr.l		store_dreg_l
   7435
   7436fout_sgl_unfl_chkexc:
   7437	mov.b		FPCR_ENABLE(%a6),%d1
   7438	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
   7439	bne.w		fout_sd_exc_unfl	# yes
   7440	addq.l		&0x4,%sp
   7441	rts
   7442
   7443#
   7444# it's definitely an overflow so call ovf_res to get the correct answer
   7445#
   7446fout_sgl_ovfl:
   7447	tst.b		3+SRC_HI(%a0)		# is result inexact?
   7448	bne.b		fout_sgl_ovfl_inex2
   7449	tst.l		SRC_LO(%a0)		# is result inexact?
   7450	bne.b		fout_sgl_ovfl_inex2
   7451	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
   7452	bra.b		fout_sgl_ovfl_cont
   7453fout_sgl_ovfl_inex2:
   7454	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
   7455
   7456fout_sgl_ovfl_cont:
   7457	mov.l		%a0,-(%sp)
   7458
   7459# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
   7460# overflow result. DON'T save the returned ccodes from ovf_res() since
   7461# fmove out doesn't alter them.
   7462	tst.b		SRC_EX(%a0)		# is operand negative?
   7463	smi		%d1			# set if so
   7464	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
   7465	bsr.l		ovf_res			# calc OVFL result
   7466	fmovm.x		(%a0),&0x80		# load default overflow result
   7467	fmov.s		%fp0,%d0		# store to single
   7468
   7469	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
   7470	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
   7471	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
   7472
   7473	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
   7474	bsr.l		_dmem_write_long	# write long
   7475
   7476	tst.l		%d1			# did dstore fail?
   7477	bne.l		facc_out_l		# yes
   7478
   7479	bra.b		fout_sgl_ovfl_chkexc
   7480
   7481fout_sgl_ovfl_dn:
   7482	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
   7483	andi.w		&0x7,%d1
   7484	bsr.l		store_dreg_l
   7485
   7486fout_sgl_ovfl_chkexc:
   7487	mov.b		FPCR_ENABLE(%a6),%d1
   7488	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
   7489	bne.w		fout_sd_exc_ovfl	# yes
   7490	addq.l		&0x4,%sp
   7491	rts
   7492
   7493#
   7494# move out MAY overflow:
   7495# (1) force the exp to 0x3fff
   7496# (2) do a move w/ appropriate rnd mode
   7497# (3) if exp still equals zero, then insert original exponent
   7498#	for the correct result.
   7499#     if exp now equals one, then it overflowed so call ovf_res.
   7500#
   7501fout_sgl_may_ovfl:
   7502	mov.w		SRC_EX(%a0),%d1		# fetch current sign
   7503	andi.w		&0x8000,%d1		# keep it,clear exp
   7504	ori.w		&0x3fff,%d1		# insert exp = 0
   7505	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
   7506	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
   7507	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
   7508
   7509	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   7510
   7511	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
   7512	fmov.l		&0x0,%fpcr		# clear FPCR
   7513
   7514	fabs.x		%fp0			# need absolute value
   7515	fcmp.b		%fp0,&0x2		# did exponent increase?
   7516	fblt.w		fout_sgl_exg		# no; go finish NORM
   7517	bra.w		fout_sgl_ovfl		# yes; go handle overflow
   7518
   7519################
   7520
   7521fout_sd_exc_unfl:
   7522	mov.l		(%sp)+,%a0
   7523
   7524	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   7525	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   7526	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   7527
   7528	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
   7529	bne.b		fout_sd_exc_cont	# no
   7530
   7531	lea		FP_SCR0(%a6),%a0
   7532	bsr.l		norm
   7533	neg.l		%d0
   7534	andi.w		&0x7fff,%d0
   7535	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
   7536	bra.b		fout_sd_exc_cont
   7537
   7538fout_sd_exc:
   7539fout_sd_exc_ovfl:
   7540	mov.l		(%sp)+,%a0		# restore a0
   7541
   7542	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   7543	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   7544	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   7545
   7546fout_sd_exc_cont:
   7547	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
   7548	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
   7549	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
   7550
   7551	mov.b		3+L_SCR3(%a6),%d1
   7552	lsr.b		&0x4,%d1
   7553	andi.w		&0x0c,%d1
   7554	swap		%d1
   7555	mov.b		3+L_SCR3(%a6),%d1
   7556	lsr.b		&0x4,%d1
   7557	andi.w		&0x03,%d1
   7558	clr.l		%d0			# pass: zero g,r,s
   7559	bsr.l		_round			# round the DENORM
   7560
   7561	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
   7562	beq.b		fout_sd_exc_done	# no
   7563	bset		&0x7,FP_SCR0_EX(%a6)	# yes
   7564
   7565fout_sd_exc_done:
   7566	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   7567	rts
   7568
   7569#################################################################
   7570# fmove.d out ###################################################
   7571#################################################################
   7572fout_dbl:
   7573	andi.b		&0x30,%d0		# clear rnd prec
   7574	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   7575	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
   7576
   7577#
   7578# operand is a normalized number. first, we check to see if the move out
   7579# would cause either an underflow or overflow. these cases are handled
   7580# separately. otherwise, set the FPCR to the proper rounding mode and
   7581# execute the move.
   7582#
   7583	mov.w		SRC_EX(%a0),%d0		# extract exponent
   7584	andi.w		&0x7fff,%d0		# strip sign
   7585
   7586	cmpi.w		%d0,&DBL_HI		# will operand overflow?
   7587	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
   7588	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
   7589	cmpi.w		%d0,&DBL_LO		# will operand underflow?
   7590	blt.w		fout_dbl_unfl		# yes; go handle underflow
   7591
   7592#
   7593# NORMs(in range) can be stored out by a simple "fmov.d"
   7594# Unnormalized inputs can come through this point.
   7595#
   7596fout_dbl_exg:
   7597	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
   7598
   7599	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   7600	fmov.l		&0x0,%fpsr		# clear FPSR
   7601
   7602	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
   7603
   7604	fmov.l		&0x0,%fpcr		# clear FPCR
   7605	fmov.l		%fpsr,%d0		# save FPSR
   7606
   7607	or.w		%d0,2+USER_FPSR(%a6)	# set possible inex2/ainex
   7608
   7609	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
   7610	lea		L_SCR1(%a6),%a0		# pass: src addr
   7611	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
   7612	bsr.l		_dmem_write		# store dbl fop to memory
   7613
   7614	tst.l		%d1			# did dstore fail?
   7615	bne.l		facc_out_d		# yes
   7616
   7617	rts					# no; so we're finished
   7618
   7619#
   7620# here, we know that the operand would UNFL if moved out to double prec,
   7621# so, denorm and round and then use generic store double routine to
   7622# write the value to memory.
   7623#
   7624fout_dbl_unfl:
   7625	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
   7626
   7627	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   7628	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   7629	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   7630	mov.l		%a0,-(%sp)
   7631
   7632	clr.l		%d0			# pass: S.F. = 0
   7633
   7634	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
   7635	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
   7636
   7637	lea		FP_SCR0(%a6),%a0
   7638	bsr.l		norm			# normalize the DENORM
   7639
   7640fout_dbl_unfl_cont:
   7641	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   7642	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   7643	bsr.l		unf_res			# calc default underflow result
   7644
   7645	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
   7646	bsr.l		dst_dbl			# convert to single prec
   7647	mov.l		%d0,L_SCR1(%a6)
   7648	mov.l		%d1,L_SCR2(%a6)
   7649
   7650	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
   7651	lea		L_SCR1(%a6),%a0		# pass: src addr
   7652	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
   7653	bsr.l		_dmem_write		# store dbl fop to memory
   7654
   7655	tst.l		%d1			# did dstore fail?
   7656	bne.l		facc_out_d		# yes
   7657
   7658	mov.b		FPCR_ENABLE(%a6),%d1
   7659	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
   7660	bne.w		fout_sd_exc_unfl	# yes
   7661	addq.l		&0x4,%sp
   7662	rts
   7663
   7664#
   7665# it's definitely an overflow so call ovf_res to get the correct answer
   7666#
   7667fout_dbl_ovfl:
   7668	mov.w		2+SRC_LO(%a0),%d0
   7669	andi.w		&0x7ff,%d0
   7670	bne.b		fout_dbl_ovfl_inex2
   7671
   7672	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
   7673	bra.b		fout_dbl_ovfl_cont
   7674fout_dbl_ovfl_inex2:
   7675	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
   7676
   7677fout_dbl_ovfl_cont:
   7678	mov.l		%a0,-(%sp)
   7679
   7680# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
   7681# overflow result. DON'T save the returned ccodes from ovf_res() since
   7682# fmove out doesn't alter them.
   7683	tst.b		SRC_EX(%a0)		# is operand negative?
   7684	smi		%d1			# set if so
   7685	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
   7686	bsr.l		ovf_res			# calc OVFL result
   7687	fmovm.x		(%a0),&0x80		# load default overflow result
   7688	fmov.d		%fp0,L_SCR1(%a6)	# store to double
   7689
   7690	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
   7691	lea		L_SCR1(%a6),%a0		# pass: src addr
   7692	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
   7693	bsr.l		_dmem_write		# store dbl fop to memory
   7694
   7695	tst.l		%d1			# did dstore fail?
   7696	bne.l		facc_out_d		# yes
   7697
   7698	mov.b		FPCR_ENABLE(%a6),%d1
   7699	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
   7700	bne.w		fout_sd_exc_ovfl	# yes
   7701	addq.l		&0x4,%sp
   7702	rts
   7703
   7704#
   7705# move out MAY overflow:
   7706# (1) force the exp to 0x3fff
   7707# (2) do a move w/ appropriate rnd mode
   7708# (3) if exp still equals zero, then insert original exponent
   7709#	for the correct result.
   7710#     if exp now equals one, then it overflowed so call ovf_res.
   7711#
   7712fout_dbl_may_ovfl:
   7713	mov.w		SRC_EX(%a0),%d1		# fetch current sign
   7714	andi.w		&0x8000,%d1		# keep it,clear exp
   7715	ori.w		&0x3fff,%d1		# insert exp = 0
   7716	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
   7717	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
   7718	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
   7719
   7720	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   7721
   7722	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
   7723	fmov.l		&0x0,%fpcr		# clear FPCR
   7724
   7725	fabs.x		%fp0			# need absolute value
   7726	fcmp.b		%fp0,&0x2		# did exponent increase?
   7727	fblt.w		fout_dbl_exg		# no; go finish NORM
   7728	bra.w		fout_dbl_ovfl		# yes; go handle overflow
   7729
   7730#########################################################################
   7731# XDEF ****************************************************************	#
   7732#	dst_dbl(): create double precision value from extended prec.	#
   7733#									#
   7734# XREF ****************************************************************	#
   7735#	None								#
   7736#									#
   7737# INPUT ***************************************************************	#
   7738#	a0 = pointer to source operand in extended precision		#
   7739#									#
   7740# OUTPUT **************************************************************	#
   7741#	d0 = hi(double precision result)				#
   7742#	d1 = lo(double precision result)				#
   7743#									#
   7744# ALGORITHM ***********************************************************	#
   7745#									#
   7746#  Changes extended precision to double precision.			#
   7747#  Note: no attempt is made to round the extended value to double.	#
   7748#	dbl_sign = ext_sign						#
   7749#	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
   7750#	get rid of ext integer bit					#
   7751#	dbl_mant = ext_mant{62:12}					#
   7752#									#
   7753#		---------------   ---------------    ---------------	#
   7754#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
   7755#		---------------   ---------------    ---------------	#
   7756#		 95	    64    63 62	      32      31     11	  0	#
   7757#				     |			     |		#
   7758#				     |			     |		#
   7759#				     |			     |		#
   7760#			             v			     v		#
   7761#			      ---------------   ---------------		#
   7762#  double   ->		      |s|exp| mant  |   |  mant       |		#
   7763#			      ---------------   ---------------		#
   7764#			      63     51   32   31	       0	#
   7765#									#
   7766#########################################################################
   7767
   7768dst_dbl:
   7769	clr.l		%d0			# clear d0
   7770	mov.w		FTEMP_EX(%a0),%d0	# get exponent
   7771	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
   7772	addi.w		&DBL_BIAS,%d0		# add double precision bias
   7773	tst.b		FTEMP_HI(%a0)		# is number a denorm?
   7774	bmi.b		dst_get_dupper		# no
   7775	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
   7776dst_get_dupper:
   7777	swap		%d0			# d0 now in upper word
   7778	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
   7779	tst.b		FTEMP_EX(%a0)		# test sign
   7780	bpl.b		dst_get_dman		# if positive, go process mantissa
   7781	bset		&0x1f,%d0		# if negative, set sign
   7782dst_get_dman:
   7783	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
   7784	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
   7785	or.l		%d1,%d0			# put these bits in ms word of double
   7786	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
   7787	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
   7788	mov.l		&21,%d0			# load shift count
   7789	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
   7790	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
   7791	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
   7792	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
   7793	mov.l		L_SCR2(%a6),%d1
   7794	or.l		%d0,%d1			# put them in double result
   7795	mov.l		L_SCR1(%a6),%d0
   7796	rts
   7797
   7798#########################################################################
   7799# XDEF ****************************************************************	#
   7800#	dst_sgl(): create single precision value from extended prec	#
   7801#									#
   7802# XREF ****************************************************************	#
   7803#									#
   7804# INPUT ***************************************************************	#
   7805#	a0 = pointer to source operand in extended precision		#
   7806#									#
   7807# OUTPUT **************************************************************	#
   7808#	d0 = single precision result					#
   7809#									#
   7810# ALGORITHM ***********************************************************	#
   7811#									#
   7812# Changes extended precision to single precision.			#
   7813#	sgl_sign = ext_sign						#
   7814#	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
   7815#	get rid of ext integer bit					#
   7816#	sgl_mant = ext_mant{62:12}					#
   7817#									#
   7818#		---------------   ---------------    ---------------	#
   7819#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
   7820#		---------------   ---------------    ---------------	#
   7821#		 95	    64    63 62	   40 32      31     12	  0	#
   7822#				     |	   |				#
   7823#				     |	   |				#
   7824#				     |	   |				#
   7825#			             v     v				#
   7826#			      ---------------				#
   7827#  single   ->		      |s|exp| mant  |				#
   7828#			      ---------------				#
   7829#			      31     22     0				#
   7830#									#
   7831#########################################################################
   7832
   7833dst_sgl:
   7834	clr.l		%d0
   7835	mov.w		FTEMP_EX(%a0),%d0	# get exponent
   7836	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
   7837	addi.w		&SGL_BIAS,%d0		# add single precision bias
   7838	tst.b		FTEMP_HI(%a0)		# is number a denorm?
   7839	bmi.b		dst_get_supper		# no
   7840	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
   7841dst_get_supper:
   7842	swap		%d0			# put exp in upper word of d0
   7843	lsl.l		&0x7,%d0		# shift it into single exp bits
   7844	tst.b		FTEMP_EX(%a0)		# test sign
   7845	bpl.b		dst_get_sman		# if positive, continue
   7846	bset		&0x1f,%d0		# if negative, put in sign first
   7847dst_get_sman:
   7848	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
   7849	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
   7850	lsr.l		&0x8,%d1		# and put them flush right
   7851	or.l		%d1,%d0			# put these bits in ms word of single
   7852	rts
   7853
   7854##############################################################################
   7855fout_pack:
   7856	bsr.l		_calc_ea_fout		# fetch the <ea>
   7857	mov.l		%a0,-(%sp)
   7858
   7859	mov.b		STAG(%a6),%d0		# fetch input type
   7860	bne.w		fout_pack_not_norm	# input is not NORM
   7861
   7862fout_pack_norm:
   7863	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
   7864	beq.b		fout_pack_s		# static
   7865
   7866fout_pack_d:
   7867	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
   7868	lsr.b		&0x4,%d1
   7869	andi.w		&0x7,%d1
   7870
   7871	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
   7872
   7873	bra.b		fout_pack_type
   7874fout_pack_s:
   7875	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
   7876
   7877fout_pack_type:
   7878	bfexts		%d0{&25:&7},%d0		# extract k-factor
   7879	mov.l	%d0,-(%sp)
   7880
   7881	lea		FP_SRC(%a6),%a0		# pass: ptr to input
   7882
   7883# bindec is currently scrambling FP_SRC for denorm inputs.
   7884# we'll have to change this, but for now, tough luck!!!
   7885	bsr.l		bindec			# convert xprec to packed
   7886
   7887#	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
   7888	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
   7889
   7890	mov.l	(%sp)+,%d0
   7891
   7892	tst.b		3+FP_SCR0_EX(%a6)
   7893	bne.b		fout_pack_set
   7894	tst.l		FP_SCR0_HI(%a6)
   7895	bne.b		fout_pack_set
   7896	tst.l		FP_SCR0_LO(%a6)
   7897	bne.b		fout_pack_set
   7898
   7899# add the extra condition that only if the k-factor was zero, too, should
   7900# we zero the exponent
   7901	tst.l		%d0
   7902	bne.b		fout_pack_set
   7903# "mantissa" is all zero which means that the answer is zero. but, the '040
   7904# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
   7905# if the mantissa is zero, I will zero the exponent, too.
   7906# the question now is whether the exponents sign bit is allowed to be non-zero
   7907# for a zero, also...
   7908	andi.w		&0xf000,FP_SCR0(%a6)
   7909
   7910fout_pack_set:
   7911
   7912	lea		FP_SCR0(%a6),%a0	# pass: src addr
   7913
   7914fout_pack_write:
   7915	mov.l		(%sp)+,%a1		# pass: dst addr
   7916	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
   7917
   7918	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   7919	beq.b		fout_pack_a7
   7920
   7921	bsr.l		_dmem_write		# write ext prec number to memory
   7922
   7923	tst.l		%d1			# did dstore fail?
   7924	bne.w		fout_ext_err		# yes
   7925
   7926	rts
   7927
   7928# we don't want to do the write if the exception occurred in supervisor mode
   7929# so _mem_write2() handles this for us.
   7930fout_pack_a7:
   7931	bsr.l		_mem_write2		# write ext prec number to memory
   7932
   7933	tst.l		%d1			# did dstore fail?
   7934	bne.w		fout_ext_err		# yes
   7935
   7936	rts
   7937
   7938fout_pack_not_norm:
   7939	cmpi.b		%d0,&DENORM		# is it a DENORM?
   7940	beq.w		fout_pack_norm		# yes
   7941	lea		FP_SRC(%a6),%a0
   7942	clr.w		2+FP_SRC_EX(%a6)
   7943	cmpi.b		%d0,&SNAN		# is it an SNAN?
   7944	beq.b		fout_pack_snan		# yes
   7945	bra.b		fout_pack_write		# no
   7946
   7947fout_pack_snan:
   7948	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
   7949	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
   7950	bra.b		fout_pack_write
   7951
   7952#########################################################################
   7953# XDEF ****************************************************************	#
   7954#	fmul(): emulates the fmul instruction				#
   7955#	fsmul(): emulates the fsmul instruction				#
   7956#	fdmul(): emulates the fdmul instruction				#
   7957#									#
   7958# XREF ****************************************************************	#
   7959#	scale_to_zero_src() - scale src exponent to zero		#
   7960#	scale_to_zero_dst() - scale dst exponent to zero		#
   7961#	unf_res() - return default underflow result			#
   7962#	ovf_res() - return default overflow result			#
   7963#	res_qnan() - return QNAN result					#
   7964#	res_snan() - return SNAN result					#
   7965#									#
   7966# INPUT ***************************************************************	#
   7967#	a0 = pointer to extended precision source operand		#
   7968#	a1 = pointer to extended precision destination operand		#
   7969#	d0  rnd prec,mode						#
   7970#									#
   7971# OUTPUT **************************************************************	#
   7972#	fp0 = result							#
   7973#	fp1 = EXOP (if exception occurred)				#
   7974#									#
   7975# ALGORITHM ***********************************************************	#
   7976#	Handle NANs, infinities, and zeroes as special cases. Divide	#
   7977# norms/denorms into ext/sgl/dbl precision.				#
   7978#	For norms/denorms, scale the exponents such that a multiply	#
   7979# instruction won't cause an exception. Use the regular fmul to		#
   7980# compute a result. Check if the regular operands would have taken	#
   7981# an exception. If so, return the default overflow/underflow result	#
   7982# and return the EXOP if exceptions are enabled. Else, scale the	#
   7983# result operand to the proper exponent.				#
   7984#									#
   7985#########################################################################
   7986
   7987	align		0x10
   7988tbl_fmul_ovfl:
   7989	long		0x3fff - 0x7ffe		# ext_max
   7990	long		0x3fff - 0x407e		# sgl_max
   7991	long		0x3fff - 0x43fe		# dbl_max
   7992tbl_fmul_unfl:
   7993	long		0x3fff + 0x0001		# ext_unfl
   7994	long		0x3fff - 0x3f80		# sgl_unfl
   7995	long		0x3fff - 0x3c00		# dbl_unfl
   7996
   7997	global		fsmul
   7998fsmul:
   7999	andi.b		&0x30,%d0		# clear rnd prec
   8000	ori.b		&s_mode*0x10,%d0	# insert sgl prec
   8001	bra.b		fmul
   8002
   8003	global		fdmul
   8004fdmul:
   8005	andi.b		&0x30,%d0
   8006	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   8007
   8008	global		fmul
   8009fmul:
   8010	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   8011
   8012	clr.w		%d1
   8013	mov.b		DTAG(%a6),%d1
   8014	lsl.b		&0x3,%d1
   8015	or.b		STAG(%a6),%d1		# combine src tags
   8016	bne.w		fmul_not_norm		# optimize on non-norm input
   8017
   8018fmul_norm:
   8019	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   8020	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   8021	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   8022
   8023	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   8024	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   8025	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   8026
   8027	bsr.l		scale_to_zero_src	# scale src exponent
   8028	mov.l		%d0,-(%sp)		# save scale factor 1
   8029
   8030	bsr.l		scale_to_zero_dst	# scale dst exponent
   8031
   8032	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
   8033
   8034	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
   8035	lsr.b		&0x6,%d1		# shift to lo bits
   8036	mov.l		(%sp)+,%d0		# load S.F.
   8037	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
   8038	beq.w		fmul_may_ovfl		# result may rnd to overflow
   8039	blt.w		fmul_ovfl		# result will overflow
   8040
   8041	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
   8042	beq.w		fmul_may_unfl		# result may rnd to no unfl
   8043	bgt.w		fmul_unfl		# result will underflow
   8044
   8045#
   8046# NORMAL:
   8047# - the result of the multiply operation will neither overflow nor underflow.
   8048# - do the multiply to the proper precision and rounding mode.
   8049# - scale the result exponent using the scale factor. if both operands were
   8050# normalized then we really don't need to go through this scaling. but for now,
   8051# this will do.
   8052#
   8053fmul_normal:
   8054	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   8055
   8056	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8057	fmov.l		&0x0,%fpsr		# clear FPSR
   8058
   8059	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   8060
   8061	fmov.l		%fpsr,%d1		# save status
   8062	fmov.l		&0x0,%fpcr		# clear FPCR
   8063
   8064	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8065
   8066fmul_normal_exit:
   8067	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   8068	mov.l		%d2,-(%sp)		# save d2
   8069	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
   8070	mov.l		%d1,%d2			# make a copy
   8071	andi.l		&0x7fff,%d1		# strip sign
   8072	andi.w		&0x8000,%d2		# keep old sign
   8073	sub.l		%d0,%d1			# add scale factor
   8074	or.w		%d2,%d1			# concat old sign,new exp
   8075	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   8076	mov.l		(%sp)+,%d2		# restore d2
   8077	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   8078	rts
   8079
   8080#
   8081# OVERFLOW:
   8082# - the result of the multiply operation is an overflow.
   8083# - do the multiply to the proper precision and rounding mode in order to
   8084# set the inexact bits.
   8085# - calculate the default result and return it in fp0.
   8086# - if overflow or inexact is enabled, we need a multiply result rounded to
   8087# extended precision. if the original operation was extended, then we have this
   8088# result. if the original operation was single or double, we have to do another
   8089# multiply using extended precision and the correct rounding mode. the result
   8090# of this operation then has its exponent scaled by -0x6000 to create the
   8091# exceptional operand.
   8092#
   8093fmul_ovfl:
   8094	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   8095
   8096	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8097	fmov.l		&0x0,%fpsr		# clear FPSR
   8098
   8099	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   8100
   8101	fmov.l		%fpsr,%d1		# save status
   8102	fmov.l		&0x0,%fpcr		# clear FPCR
   8103
   8104	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8105
   8106# save setting this until now because this is where fmul_may_ovfl may jump in
   8107fmul_ovfl_tst:
   8108	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   8109
   8110	mov.b		FPCR_ENABLE(%a6),%d1
   8111	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   8112	bne.b		fmul_ovfl_ena		# yes
   8113
   8114# calculate the default result
   8115fmul_ovfl_dis:
   8116	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   8117	sne		%d1			# set sign param accordingly
   8118	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
   8119	bsr.l		ovf_res			# calculate default result
   8120	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   8121	fmovm.x		(%a0),&0x80		# return default result in fp0
   8122	rts
   8123
   8124#
   8125# OVFL is enabled; Create EXOP:
   8126# - if precision is extended, then we have the EXOP. simply bias the exponent
   8127# with an extra -0x6000. if the precision is single or double, we need to
   8128# calculate a result rounded to extended precision.
   8129#
   8130fmul_ovfl_ena:
   8131	mov.l		L_SCR3(%a6),%d1
   8132	andi.b		&0xc0,%d1		# test the rnd prec
   8133	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
   8134
   8135fmul_ovfl_ena_cont:
   8136	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
   8137
   8138	mov.l		%d2,-(%sp)		# save d2
   8139	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   8140	mov.w		%d1,%d2			# make a copy
   8141	andi.l		&0x7fff,%d1		# strip sign
   8142	sub.l		%d0,%d1			# add scale factor
   8143	subi.l		&0x6000,%d1		# subtract bias
   8144	andi.w		&0x7fff,%d1		# clear sign bit
   8145	andi.w		&0x8000,%d2		# keep old sign
   8146	or.w		%d2,%d1			# concat old sign,new exp
   8147	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   8148	mov.l		(%sp)+,%d2		# restore d2
   8149	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   8150	bra.b		fmul_ovfl_dis
   8151
   8152fmul_ovfl_ena_sd:
   8153	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   8154
   8155	mov.l		L_SCR3(%a6),%d1
   8156	andi.b		&0x30,%d1		# keep rnd mode only
   8157	fmov.l		%d1,%fpcr		# set FPCR
   8158
   8159	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   8160
   8161	fmov.l		&0x0,%fpcr		# clear FPCR
   8162	bra.b		fmul_ovfl_ena_cont
   8163
   8164#
   8165# may OVERFLOW:
   8166# - the result of the multiply operation MAY overflow.
   8167# - do the multiply to the proper precision and rounding mode in order to
   8168# set the inexact bits.
   8169# - calculate the default result and return it in fp0.
   8170#
   8171fmul_may_ovfl:
   8172	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   8173
   8174	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8175	fmov.l		&0x0,%fpsr		# clear FPSR
   8176
   8177	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   8178
   8179	fmov.l		%fpsr,%d1		# save status
   8180	fmov.l		&0x0,%fpcr		# clear FPCR
   8181
   8182	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8183
   8184	fabs.x		%fp0,%fp1		# make a copy of result
   8185	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
   8186	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
   8187
   8188# no, it didn't overflow; we have correct result
   8189	bra.w		fmul_normal_exit
   8190
   8191#
   8192# UNDERFLOW:
   8193# - the result of the multiply operation is an underflow.
   8194# - do the multiply to the proper precision and rounding mode in order to
   8195# set the inexact bits.
   8196# - calculate the default result and return it in fp0.
   8197# - if overflow or inexact is enabled, we need a multiply result rounded to
   8198# extended precision. if the original operation was extended, then we have this
   8199# result. if the original operation was single or double, we have to do another
   8200# multiply using extended precision and the correct rounding mode. the result
   8201# of this operation then has its exponent scaled by -0x6000 to create the
   8202# exceptional operand.
   8203#
   8204fmul_unfl:
   8205	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   8206
   8207# for fun, let's use only extended precision, round to zero. then, let
   8208# the unf_res() routine figure out all the rest.
   8209# will we get the correct answer.
   8210	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   8211
   8212	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   8213	fmov.l		&0x0,%fpsr		# clear FPSR
   8214
   8215	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   8216
   8217	fmov.l		%fpsr,%d1		# save status
   8218	fmov.l		&0x0,%fpcr		# clear FPCR
   8219
   8220	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8221
   8222	mov.b		FPCR_ENABLE(%a6),%d1
   8223	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   8224	bne.b		fmul_unfl_ena		# yes
   8225
   8226fmul_unfl_dis:
   8227	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   8228
   8229	lea		FP_SCR0(%a6),%a0	# pass: result addr
   8230	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   8231	bsr.l		unf_res			# calculate default result
   8232	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
   8233	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   8234	rts
   8235
   8236#
   8237# UNFL is enabled.
   8238#
   8239fmul_unfl_ena:
   8240	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
   8241
   8242	mov.l		L_SCR3(%a6),%d1
   8243	andi.b		&0xc0,%d1		# is precision extended?
   8244	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
   8245
   8246# if the rnd mode is anything but RZ, then we have to re-do the above
   8247# multiplication because we used RZ for all.
   8248	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8249
   8250fmul_unfl_ena_cont:
   8251	fmov.l		&0x0,%fpsr		# clear FPSR
   8252
   8253	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
   8254
   8255	fmov.l		&0x0,%fpcr		# clear FPCR
   8256
   8257	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
   8258	mov.l		%d2,-(%sp)		# save d2
   8259	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   8260	mov.l		%d1,%d2			# make a copy
   8261	andi.l		&0x7fff,%d1		# strip sign
   8262	andi.w		&0x8000,%d2		# keep old sign
   8263	sub.l		%d0,%d1			# add scale factor
   8264	addi.l		&0x6000,%d1		# add bias
   8265	andi.w		&0x7fff,%d1
   8266	or.w		%d2,%d1			# concat old sign,new exp
   8267	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   8268	mov.l		(%sp)+,%d2		# restore d2
   8269	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   8270	bra.w		fmul_unfl_dis
   8271
   8272fmul_unfl_ena_sd:
   8273	mov.l		L_SCR3(%a6),%d1
   8274	andi.b		&0x30,%d1		# use only rnd mode
   8275	fmov.l		%d1,%fpcr		# set FPCR
   8276
   8277	bra.b		fmul_unfl_ena_cont
   8278
   8279# MAY UNDERFLOW:
   8280# -use the correct rounding mode and precision. this code favors operations
   8281# that do not underflow.
   8282fmul_may_unfl:
   8283	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   8284
   8285	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8286	fmov.l		&0x0,%fpsr		# clear FPSR
   8287
   8288	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
   8289
   8290	fmov.l		%fpsr,%d1		# save status
   8291	fmov.l		&0x0,%fpcr		# clear FPCR
   8292
   8293	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8294
   8295	fabs.x		%fp0,%fp1		# make a copy of result
   8296	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
   8297	fbgt.w		fmul_normal_exit	# no; no underflow occurred
   8298	fblt.w		fmul_unfl		# yes; underflow occurred
   8299
   8300#
   8301# we still don't know if underflow occurred. result is ~ equal to 2. but,
   8302# we don't know if the result was an underflow that rounded up to a 2 or
   8303# a normalized number that rounded down to a 2. so, redo the entire operation
   8304# using RZ as the rounding mode to see what the pre-rounded result is.
   8305# this case should be relatively rare.
   8306#
   8307	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
   8308
   8309	mov.l		L_SCR3(%a6),%d1
   8310	andi.b		&0xc0,%d1		# keep rnd prec
   8311	ori.b		&rz_mode*0x10,%d1	# insert RZ
   8312
   8313	fmov.l		%d1,%fpcr		# set FPCR
   8314	fmov.l		&0x0,%fpsr		# clear FPSR
   8315
   8316	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
   8317
   8318	fmov.l		&0x0,%fpcr		# clear FPCR
   8319	fabs.x		%fp1			# make absolute value
   8320	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
   8321	fbge.w		fmul_normal_exit	# no; no underflow occurred
   8322	bra.w		fmul_unfl		# yes, underflow occurred
   8323
   8324################################################################################
   8325
   8326#
   8327# Multiply: inputs are not both normalized; what are they?
   8328#
   8329fmul_not_norm:
   8330	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
   8331	jmp		(tbl_fmul_op.b,%pc,%d1.w)
   8332
   8333	swbeg		&48
   8334tbl_fmul_op:
   8335	short		fmul_norm	- tbl_fmul_op # NORM x NORM
   8336	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
   8337	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
   8338	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
   8339	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
   8340	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
   8341	short		tbl_fmul_op	- tbl_fmul_op #
   8342	short		tbl_fmul_op	- tbl_fmul_op #
   8343
   8344	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
   8345	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
   8346	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
   8347	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
   8348	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
   8349	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
   8350	short		tbl_fmul_op	- tbl_fmul_op #
   8351	short		tbl_fmul_op	- tbl_fmul_op #
   8352
   8353	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
   8354	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
   8355	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
   8356	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
   8357	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
   8358	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
   8359	short		tbl_fmul_op	- tbl_fmul_op #
   8360	short		tbl_fmul_op	- tbl_fmul_op #
   8361
   8362	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
   8363	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
   8364	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
   8365	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
   8366	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
   8367	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
   8368	short		tbl_fmul_op	- tbl_fmul_op #
   8369	short		tbl_fmul_op	- tbl_fmul_op #
   8370
   8371	short		fmul_norm	- tbl_fmul_op # NORM x NORM
   8372	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
   8373	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
   8374	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
   8375	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
   8376	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
   8377	short		tbl_fmul_op	- tbl_fmul_op #
   8378	short		tbl_fmul_op	- tbl_fmul_op #
   8379
   8380	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
   8381	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
   8382	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
   8383	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
   8384	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
   8385	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
   8386	short		tbl_fmul_op	- tbl_fmul_op #
   8387	short		tbl_fmul_op	- tbl_fmul_op #
   8388
   8389fmul_res_operr:
   8390	bra.l		res_operr
   8391fmul_res_snan:
   8392	bra.l		res_snan
   8393fmul_res_qnan:
   8394	bra.l		res_qnan
   8395
   8396#
   8397# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
   8398#
   8399	global		fmul_zero		# global for fsglmul
   8400fmul_zero:
   8401	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
   8402	mov.b		DST_EX(%a1),%d1
   8403	eor.b		%d0,%d1
   8404	bpl.b		fmul_zero_p		# result ZERO is pos.
   8405fmul_zero_n:
   8406	fmov.s		&0x80000000,%fp0	# load -ZERO
   8407	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
   8408	rts
   8409fmul_zero_p:
   8410	fmov.s		&0x00000000,%fp0	# load +ZERO
   8411	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
   8412	rts
   8413
   8414#
   8415# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
   8416#
   8417# Note: The j-bit for an infinity is a don't-care. However, to be
   8418# strictly compatible w/ the 68881/882, we make sure to return an
   8419# INF w/ the j-bit set if the input INF j-bit was set. Destination
   8420# INFs take priority.
   8421#
   8422	global		fmul_inf_dst		# global for fsglmul
   8423fmul_inf_dst:
   8424	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
   8425	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
   8426	mov.b		DST_EX(%a1),%d1
   8427	eor.b		%d0,%d1
   8428	bpl.b		fmul_inf_dst_p		# result INF is pos.
   8429fmul_inf_dst_n:
   8430	fabs.x		%fp0			# clear result sign
   8431	fneg.x		%fp0			# set result sign
   8432	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
   8433	rts
   8434fmul_inf_dst_p:
   8435	fabs.x		%fp0			# clear result sign
   8436	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
   8437	rts
   8438
   8439	global		fmul_inf_src		# global for fsglmul
   8440fmul_inf_src:
   8441	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
   8442	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
   8443	mov.b		DST_EX(%a1),%d1
   8444	eor.b		%d0,%d1
   8445	bpl.b		fmul_inf_dst_p		# result INF is pos.
   8446	bra.b		fmul_inf_dst_n
   8447
   8448#########################################################################
   8449# XDEF ****************************************************************	#
   8450#	fin(): emulates the fmove instruction				#
   8451#	fsin(): emulates the fsmove instruction				#
   8452#	fdin(): emulates the fdmove instruction				#
   8453#									#
   8454# XREF ****************************************************************	#
   8455#	norm() - normalize mantissa for EXOP on denorm			#
   8456#	scale_to_zero_src() - scale src exponent to zero		#
   8457#	ovf_res() - return default overflow result			#
   8458#	unf_res() - return default underflow result			#
   8459#	res_qnan_1op() - return QNAN result				#
   8460#	res_snan_1op() - return SNAN result				#
   8461#									#
   8462# INPUT ***************************************************************	#
   8463#	a0 = pointer to extended precision source operand		#
   8464#	d0 = round prec/mode						#
   8465#									#
   8466# OUTPUT **************************************************************	#
   8467#	fp0 = result							#
   8468#	fp1 = EXOP (if exception occurred)				#
   8469#									#
   8470# ALGORITHM ***********************************************************	#
   8471#	Handle NANs, infinities, and zeroes as special cases. Divide	#
   8472# norms into extended, single, and double precision.			#
   8473#	Norms can be emulated w/ a regular fmove instruction. For	#
   8474# sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
   8475# if the result would have overflowed/underflowed. If so, use unf_res()	#
   8476# or ovf_res() to return the default result. Also return EXOP if	#
   8477# exception is enabled. If no exception, return the default result.	#
   8478#	Unnorms don't pass through here.				#
   8479#									#
   8480#########################################################################
   8481
   8482	global		fsin
   8483fsin:
   8484	andi.b		&0x30,%d0		# clear rnd prec
   8485	ori.b		&s_mode*0x10,%d0	# insert sgl precision
   8486	bra.b		fin
   8487
   8488	global		fdin
   8489fdin:
   8490	andi.b		&0x30,%d0		# clear rnd prec
   8491	ori.b		&d_mode*0x10,%d0	# insert dbl precision
   8492
   8493	global		fin
   8494fin:
   8495	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   8496
   8497	mov.b		STAG(%a6),%d1		# fetch src optype tag
   8498	bne.w		fin_not_norm		# optimize on non-norm input
   8499
   8500#
   8501# FP MOVE IN: NORMs and DENORMs ONLY!
   8502#
   8503fin_norm:
   8504	andi.b		&0xc0,%d0		# is precision extended?
   8505	bne.w		fin_not_ext		# no, so go handle dbl or sgl
   8506
   8507#
   8508# precision selected is extended. so...we cannot get an underflow
   8509# or overflow because of rounding to the correct precision. so...
   8510# skip the scaling and unscaling...
   8511#
   8512	tst.b		SRC_EX(%a0)		# is the operand negative?
   8513	bpl.b		fin_norm_done		# no
   8514	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
   8515fin_norm_done:
   8516	fmovm.x		SRC(%a0),&0x80		# return result in fp0
   8517	rts
   8518
   8519#
   8520# for an extended precision DENORM, the UNFL exception bit is set
   8521# the accrued bit is NOT set in this instance(no inexactness!)
   8522#
   8523fin_denorm:
   8524	andi.b		&0xc0,%d0		# is precision extended?
   8525	bne.w		fin_not_ext		# no, so go handle dbl or sgl
   8526
   8527	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   8528	tst.b		SRC_EX(%a0)		# is the operand negative?
   8529	bpl.b		fin_denorm_done		# no
   8530	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
   8531fin_denorm_done:
   8532	fmovm.x		SRC(%a0),&0x80		# return result in fp0
   8533	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
   8534	bne.b		fin_denorm_unfl_ena	# yes
   8535	rts
   8536
   8537#
   8538# the input is an extended DENORM and underflow is enabled in the FPCR.
   8539# normalize the mantissa and add the bias of 0x6000 to the resulting negative
   8540# exponent and insert back into the operand.
   8541#
   8542fin_denorm_unfl_ena:
   8543	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   8544	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   8545	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   8546	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   8547	bsr.l		norm			# normalize result
   8548	neg.w		%d0			# new exponent = -(shft val)
   8549	addi.w		&0x6000,%d0		# add new bias to exponent
   8550	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
   8551	andi.w		&0x8000,%d1		# keep old sign
   8552	andi.w		&0x7fff,%d0		# clear sign position
   8553	or.w		%d1,%d0			# concat new exo,old sign
   8554	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   8555	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   8556	rts
   8557
   8558#
   8559# operand is to be rounded to single or double precision
   8560#
   8561fin_not_ext:
   8562	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
   8563	bne.b		fin_dbl
   8564
   8565#
   8566# operand is to be rounded to single precision
   8567#
   8568fin_sgl:
   8569	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   8570	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   8571	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   8572	bsr.l		scale_to_zero_src	# calculate scale factor
   8573
   8574	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
   8575	bge.w		fin_sd_unfl		# yes; go handle underflow
   8576	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
   8577	beq.w		fin_sd_may_ovfl		# maybe; go check
   8578	blt.w		fin_sd_ovfl		# yes; go handle overflow
   8579
   8580#
   8581# operand will NOT overflow or underflow when moved into the fp reg file
   8582#
   8583fin_sd_normal:
   8584	fmov.l		&0x0,%fpsr		# clear FPSR
   8585	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8586
   8587	fmov.x		FP_SCR0(%a6),%fp0	# perform move
   8588
   8589	fmov.l		%fpsr,%d1		# save FPSR
   8590	fmov.l		&0x0,%fpcr		# clear FPCR
   8591
   8592	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8593
   8594fin_sd_normal_exit:
   8595	mov.l		%d2,-(%sp)		# save d2
   8596	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   8597	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
   8598	mov.w		%d1,%d2			# make a copy
   8599	andi.l		&0x7fff,%d1		# strip sign
   8600	sub.l		%d0,%d1			# add scale factor
   8601	andi.w		&0x8000,%d2		# keep old sign
   8602	or.w		%d1,%d2			# concat old sign,new exponent
   8603	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
   8604	mov.l		(%sp)+,%d2		# restore d2
   8605	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   8606	rts
   8607
   8608#
   8609# operand is to be rounded to double precision
   8610#
   8611fin_dbl:
   8612	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   8613	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   8614	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   8615	bsr.l		scale_to_zero_src	# calculate scale factor
   8616
   8617	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
   8618	bge.w		fin_sd_unfl		# yes; go handle underflow
   8619	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
   8620	beq.w		fin_sd_may_ovfl		# maybe; go check
   8621	blt.w		fin_sd_ovfl		# yes; go handle overflow
   8622	bra.w		fin_sd_normal		# no; ho handle normalized op
   8623
   8624#
   8625# operand WILL underflow when moved in to the fp register file
   8626#
   8627fin_sd_unfl:
   8628	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   8629
   8630	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
   8631	bpl.b		fin_sd_unfl_tst
   8632	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
   8633
   8634# if underflow or inexact is enabled, then go calculate the EXOP first.
   8635fin_sd_unfl_tst:
   8636	mov.b		FPCR_ENABLE(%a6),%d1
   8637	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   8638	bne.b		fin_sd_unfl_ena		# yes
   8639
   8640fin_sd_unfl_dis:
   8641	lea		FP_SCR0(%a6),%a0	# pass: result addr
   8642	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   8643	bsr.l		unf_res			# calculate default result
   8644	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
   8645	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   8646	rts
   8647
   8648#
   8649# operand will underflow AND underflow or inexact is enabled.
   8650# Therefore, we must return the result rounded to extended precision.
   8651#
   8652fin_sd_unfl_ena:
   8653	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
   8654	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
   8655	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
   8656
   8657	mov.l		%d2,-(%sp)		# save d2
   8658	mov.w		%d1,%d2			# make a copy
   8659	andi.l		&0x7fff,%d1		# strip sign
   8660	sub.l		%d0,%d1			# subtract scale factor
   8661	andi.w		&0x8000,%d2		# extract old sign
   8662	addi.l		&0x6000,%d1		# add new bias
   8663	andi.w		&0x7fff,%d1
   8664	or.w		%d1,%d2			# concat old sign,new exp
   8665	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
   8666	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
   8667	mov.l		(%sp)+,%d2		# restore d2
   8668	bra.b		fin_sd_unfl_dis
   8669
   8670#
   8671# operand WILL overflow.
   8672#
   8673fin_sd_ovfl:
   8674	fmov.l		&0x0,%fpsr		# clear FPSR
   8675	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8676
   8677	fmov.x		FP_SCR0(%a6),%fp0	# perform move
   8678
   8679	fmov.l		&0x0,%fpcr		# clear FPCR
   8680	fmov.l		%fpsr,%d1		# save FPSR
   8681
   8682	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8683
   8684fin_sd_ovfl_tst:
   8685	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   8686
   8687	mov.b		FPCR_ENABLE(%a6),%d1
   8688	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   8689	bne.b		fin_sd_ovfl_ena		# yes
   8690
   8691#
   8692# OVFL is not enabled; therefore, we must create the default result by
   8693# calling ovf_res().
   8694#
   8695fin_sd_ovfl_dis:
   8696	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   8697	sne		%d1			# set sign param accordingly
   8698	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
   8699	bsr.l		ovf_res			# calculate default result
   8700	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   8701	fmovm.x		(%a0),&0x80		# return default result in fp0
   8702	rts
   8703
   8704#
   8705# OVFL is enabled.
   8706# the INEX2 bit has already been updated by the round to the correct precision.
   8707# now, round to extended(and don't alter the FPSR).
   8708#
   8709fin_sd_ovfl_ena:
   8710	mov.l		%d2,-(%sp)		# save d2
   8711	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   8712	mov.l		%d1,%d2			# make a copy
   8713	andi.l		&0x7fff,%d1		# strip sign
   8714	andi.w		&0x8000,%d2		# keep old sign
   8715	sub.l		%d0,%d1			# add scale factor
   8716	sub.l		&0x6000,%d1		# subtract bias
   8717	andi.w		&0x7fff,%d1
   8718	or.w		%d2,%d1
   8719	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   8720	mov.l		(%sp)+,%d2		# restore d2
   8721	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   8722	bra.b		fin_sd_ovfl_dis
   8723
   8724#
   8725# the move in MAY overflow. so...
   8726#
   8727fin_sd_may_ovfl:
   8728	fmov.l		&0x0,%fpsr		# clear FPSR
   8729	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8730
   8731	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
   8732
   8733	fmov.l		%fpsr,%d1		# save status
   8734	fmov.l		&0x0,%fpcr		# clear FPCR
   8735
   8736	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8737
   8738	fabs.x		%fp0,%fp1		# make a copy of result
   8739	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
   8740	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
   8741
   8742# no, it didn't overflow; we have correct result
   8743	bra.w		fin_sd_normal_exit
   8744
   8745##########################################################################
   8746
   8747#
   8748# operand is not a NORM: check its optype and branch accordingly
   8749#
   8750fin_not_norm:
   8751	cmpi.b		%d1,&DENORM		# weed out DENORM
   8752	beq.w		fin_denorm
   8753	cmpi.b		%d1,&SNAN		# weed out SNANs
   8754	beq.l		res_snan_1op
   8755	cmpi.b		%d1,&QNAN		# weed out QNANs
   8756	beq.l		res_qnan_1op
   8757
   8758#
   8759# do the fmove in; at this point, only possible ops are ZERO and INF.
   8760# use fmov to determine ccodes.
   8761# prec:mode should be zero at this point but it won't affect answer anyways.
   8762#
   8763	fmov.x		SRC(%a0),%fp0		# do fmove in
   8764	fmov.l		%fpsr,%d0		# no exceptions possible
   8765	rol.l		&0x8,%d0		# put ccodes in lo byte
   8766	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
   8767	rts
   8768
   8769#########################################################################
   8770# XDEF ****************************************************************	#
   8771#	fdiv(): emulates the fdiv instruction				#
   8772#	fsdiv(): emulates the fsdiv instruction				#
   8773#	fddiv(): emulates the fddiv instruction				#
   8774#									#
   8775# XREF ****************************************************************	#
   8776#	scale_to_zero_src() - scale src exponent to zero		#
   8777#	scale_to_zero_dst() - scale dst exponent to zero		#
   8778#	unf_res() - return default underflow result			#
   8779#	ovf_res() - return default overflow result			#
   8780#	res_qnan() - return QNAN result					#
   8781#	res_snan() - return SNAN result					#
   8782#									#
   8783# INPUT ***************************************************************	#
   8784#	a0 = pointer to extended precision source operand		#
   8785#	a1 = pointer to extended precision destination operand		#
   8786#	d0  rnd prec,mode						#
   8787#									#
   8788# OUTPUT **************************************************************	#
   8789#	fp0 = result							#
   8790#	fp1 = EXOP (if exception occurred)				#
   8791#									#
   8792# ALGORITHM ***********************************************************	#
   8793#	Handle NANs, infinities, and zeroes as special cases. Divide	#
   8794# norms/denorms into ext/sgl/dbl precision.				#
   8795#	For norms/denorms, scale the exponents such that a divide	#
   8796# instruction won't cause an exception. Use the regular fdiv to		#
   8797# compute a result. Check if the regular operands would have taken	#
   8798# an exception. If so, return the default overflow/underflow result	#
   8799# and return the EXOP if exceptions are enabled. Else, scale the	#
   8800# result operand to the proper exponent.				#
   8801#									#
   8802#########################################################################
   8803
   8804	align		0x10
   8805tbl_fdiv_unfl:
   8806	long		0x3fff - 0x0000		# ext_unfl
   8807	long		0x3fff - 0x3f81		# sgl_unfl
   8808	long		0x3fff - 0x3c01		# dbl_unfl
   8809
   8810tbl_fdiv_ovfl:
   8811	long		0x3fff - 0x7ffe		# ext overflow exponent
   8812	long		0x3fff - 0x407e		# sgl overflow exponent
   8813	long		0x3fff - 0x43fe		# dbl overflow exponent
   8814
   8815	global		fsdiv
   8816fsdiv:
   8817	andi.b		&0x30,%d0		# clear rnd prec
   8818	ori.b		&s_mode*0x10,%d0	# insert sgl prec
   8819	bra.b		fdiv
   8820
   8821	global		fddiv
   8822fddiv:
   8823	andi.b		&0x30,%d0		# clear rnd prec
   8824	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   8825
   8826	global		fdiv
   8827fdiv:
   8828	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   8829
   8830	clr.w		%d1
   8831	mov.b		DTAG(%a6),%d1
   8832	lsl.b		&0x3,%d1
   8833	or.b		STAG(%a6),%d1		# combine src tags
   8834
   8835	bne.w		fdiv_not_norm		# optimize on non-norm input
   8836
   8837#
   8838# DIVIDE: NORMs and DENORMs ONLY!
   8839#
   8840fdiv_norm:
   8841	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
   8842	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
   8843	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
   8844
   8845	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   8846	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   8847	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   8848
   8849	bsr.l		scale_to_zero_src	# scale src exponent
   8850	mov.l		%d0,-(%sp)		# save scale factor 1
   8851
   8852	bsr.l		scale_to_zero_dst	# scale dst exponent
   8853
   8854	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
   8855	add.l		%d0,(%sp)
   8856
   8857	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
   8858	lsr.b		&0x6,%d1		# shift to lo bits
   8859	mov.l		(%sp)+,%d0		# load S.F.
   8860	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
   8861	ble.w		fdiv_may_ovfl		# result will overflow
   8862
   8863	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
   8864	beq.w		fdiv_may_unfl		# maybe
   8865	bgt.w		fdiv_unfl		# yes; go handle underflow
   8866
   8867fdiv_normal:
   8868	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   8869
   8870	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
   8871	fmov.l		&0x0,%fpsr		# clear FPSR
   8872
   8873	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
   8874
   8875	fmov.l		%fpsr,%d1		# save FPSR
   8876	fmov.l		&0x0,%fpcr		# clear FPCR
   8877
   8878	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8879
   8880fdiv_normal_exit:
   8881	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
   8882	mov.l		%d2,-(%sp)		# store d2
   8883	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
   8884	mov.l		%d1,%d2			# make a copy
   8885	andi.l		&0x7fff,%d1		# strip sign
   8886	andi.w		&0x8000,%d2		# keep old sign
   8887	sub.l		%d0,%d1			# add scale factor
   8888	or.w		%d2,%d1			# concat old sign,new exp
   8889	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   8890	mov.l		(%sp)+,%d2		# restore d2
   8891	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   8892	rts
   8893
   8894tbl_fdiv_ovfl2:
   8895	long		0x7fff
   8896	long		0x407f
   8897	long		0x43ff
   8898
   8899fdiv_no_ovfl:
   8900	mov.l		(%sp)+,%d0		# restore scale factor
   8901	bra.b		fdiv_normal_exit
   8902
   8903fdiv_may_ovfl:
   8904	mov.l		%d0,-(%sp)		# save scale factor
   8905
   8906	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   8907
   8908	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   8909	fmov.l		&0x0,%fpsr		# set FPSR
   8910
   8911	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
   8912
   8913	fmov.l		%fpsr,%d0
   8914	fmov.l		&0x0,%fpcr
   8915
   8916	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
   8917
   8918	fmovm.x		&0x01,-(%sp)		# save result to stack
   8919	mov.w		(%sp),%d0		# fetch new exponent
   8920	add.l		&0xc,%sp		# clear result from stack
   8921	andi.l		&0x7fff,%d0		# strip sign
   8922	sub.l		(%sp),%d0		# add scale factor
   8923	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
   8924	blt.b		fdiv_no_ovfl
   8925	mov.l		(%sp)+,%d0
   8926
   8927fdiv_ovfl_tst:
   8928	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   8929
   8930	mov.b		FPCR_ENABLE(%a6),%d1
   8931	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   8932	bne.b		fdiv_ovfl_ena		# yes
   8933
   8934fdiv_ovfl_dis:
   8935	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   8936	sne		%d1			# set sign param accordingly
   8937	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
   8938	bsr.l		ovf_res			# calculate default result
   8939	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
   8940	fmovm.x		(%a0),&0x80		# return default result in fp0
   8941	rts
   8942
   8943fdiv_ovfl_ena:
   8944	mov.l		L_SCR3(%a6),%d1
   8945	andi.b		&0xc0,%d1		# is precision extended?
   8946	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
   8947
   8948fdiv_ovfl_ena_cont:
   8949	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
   8950
   8951	mov.l		%d2,-(%sp)		# save d2
   8952	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   8953	mov.w		%d1,%d2			# make a copy
   8954	andi.l		&0x7fff,%d1		# strip sign
   8955	sub.l		%d0,%d1			# add scale factor
   8956	subi.l		&0x6000,%d1		# subtract bias
   8957	andi.w		&0x7fff,%d1		# clear sign bit
   8958	andi.w		&0x8000,%d2		# keep old sign
   8959	or.w		%d2,%d1			# concat old sign,new exp
   8960	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   8961	mov.l		(%sp)+,%d2		# restore d2
   8962	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   8963	bra.b		fdiv_ovfl_dis
   8964
   8965fdiv_ovfl_ena_sd:
   8966	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
   8967
   8968	mov.l		L_SCR3(%a6),%d1
   8969	andi.b		&0x30,%d1		# keep rnd mode
   8970	fmov.l		%d1,%fpcr		# set FPCR
   8971
   8972	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
   8973
   8974	fmov.l		&0x0,%fpcr		# clear FPCR
   8975	bra.b		fdiv_ovfl_ena_cont
   8976
   8977fdiv_unfl:
   8978	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   8979
   8980	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   8981
   8982	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
   8983	fmov.l		&0x0,%fpsr		# clear FPSR
   8984
   8985	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
   8986
   8987	fmov.l		%fpsr,%d1		# save status
   8988	fmov.l		&0x0,%fpcr		# clear FPCR
   8989
   8990	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   8991
   8992	mov.b		FPCR_ENABLE(%a6),%d1
   8993	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   8994	bne.b		fdiv_unfl_ena		# yes
   8995
   8996fdiv_unfl_dis:
   8997	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   8998
   8999	lea		FP_SCR0(%a6),%a0	# pass: result addr
   9000	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   9001	bsr.l		unf_res			# calculate default result
   9002	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
   9003	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   9004	rts
   9005
   9006#
   9007# UNFL is enabled.
   9008#
   9009fdiv_unfl_ena:
   9010	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
   9011
   9012	mov.l		L_SCR3(%a6),%d1
   9013	andi.b		&0xc0,%d1		# is precision extended?
   9014	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
   9015
   9016	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   9017
   9018fdiv_unfl_ena_cont:
   9019	fmov.l		&0x0,%fpsr		# clear FPSR
   9020
   9021	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
   9022
   9023	fmov.l		&0x0,%fpcr		# clear FPCR
   9024
   9025	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
   9026	mov.l		%d2,-(%sp)		# save d2
   9027	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   9028	mov.l		%d1,%d2			# make a copy
   9029	andi.l		&0x7fff,%d1		# strip sign
   9030	andi.w		&0x8000,%d2		# keep old sign
   9031	sub.l		%d0,%d1			# add scale factoer
   9032	addi.l		&0x6000,%d1		# add bias
   9033	andi.w		&0x7fff,%d1
   9034	or.w		%d2,%d1			# concat old sign,new exp
   9035	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
   9036	mov.l		(%sp)+,%d2		# restore d2
   9037	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   9038	bra.w		fdiv_unfl_dis
   9039
   9040fdiv_unfl_ena_sd:
   9041	mov.l		L_SCR3(%a6),%d1
   9042	andi.b		&0x30,%d1		# use only rnd mode
   9043	fmov.l		%d1,%fpcr		# set FPCR
   9044
   9045	bra.b		fdiv_unfl_ena_cont
   9046
   9047#
   9048# the divide operation MAY underflow:
   9049#
   9050fdiv_may_unfl:
   9051	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
   9052
   9053	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   9054	fmov.l		&0x0,%fpsr		# clear FPSR
   9055
   9056	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
   9057
   9058	fmov.l		%fpsr,%d1		# save status
   9059	fmov.l		&0x0,%fpcr		# clear FPCR
   9060
   9061	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   9062
   9063	fabs.x		%fp0,%fp1		# make a copy of result
   9064	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
   9065	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
   9066	fblt.w		fdiv_unfl		# yes; underflow occurred
   9067
   9068#
   9069# we still don't know if underflow occurred. result is ~ equal to 1. but,
   9070# we don't know if the result was an underflow that rounded up to a 1
   9071# or a normalized number that rounded down to a 1. so, redo the entire
   9072# operation using RZ as the rounding mode to see what the pre-rounded
   9073# result is. this case should be relatively rare.
   9074#
   9075	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
   9076
   9077	mov.l		L_SCR3(%a6),%d1
   9078	andi.b		&0xc0,%d1		# keep rnd prec
   9079	ori.b		&rz_mode*0x10,%d1	# insert RZ
   9080
   9081	fmov.l		%d1,%fpcr		# set FPCR
   9082	fmov.l		&0x0,%fpsr		# clear FPSR
   9083
   9084	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
   9085
   9086	fmov.l		&0x0,%fpcr		# clear FPCR
   9087	fabs.x		%fp1			# make absolute value
   9088	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
   9089	fbge.w		fdiv_normal_exit	# no; no underflow occurred
   9090	bra.w		fdiv_unfl		# yes; underflow occurred
   9091
   9092############################################################################
   9093
   9094#
   9095# Divide: inputs are not both normalized; what are they?
   9096#
   9097fdiv_not_norm:
   9098	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
   9099	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
   9100
   9101	swbeg		&48
   9102tbl_fdiv_op:
   9103	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
   9104	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
   9105	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
   9106	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
   9107	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
   9108	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
   9109	short		tbl_fdiv_op	- tbl_fdiv_op #
   9110	short		tbl_fdiv_op	- tbl_fdiv_op #
   9111
   9112	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
   9113	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
   9114	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
   9115	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
   9116	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
   9117	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
   9118	short		tbl_fdiv_op	- tbl_fdiv_op #
   9119	short		tbl_fdiv_op	- tbl_fdiv_op #
   9120
   9121	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
   9122	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
   9123	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
   9124	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
   9125	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
   9126	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
   9127	short		tbl_fdiv_op	- tbl_fdiv_op #
   9128	short		tbl_fdiv_op	- tbl_fdiv_op #
   9129
   9130	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
   9131	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
   9132	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
   9133	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
   9134	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
   9135	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
   9136	short		tbl_fdiv_op	- tbl_fdiv_op #
   9137	short		tbl_fdiv_op	- tbl_fdiv_op #
   9138
   9139	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
   9140	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
   9141	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
   9142	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
   9143	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
   9144	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
   9145	short		tbl_fdiv_op	- tbl_fdiv_op #
   9146	short		tbl_fdiv_op	- tbl_fdiv_op #
   9147
   9148	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
   9149	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
   9150	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
   9151	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
   9152	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
   9153	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
   9154	short		tbl_fdiv_op	- tbl_fdiv_op #
   9155	short		tbl_fdiv_op	- tbl_fdiv_op #
   9156
   9157fdiv_res_qnan:
   9158	bra.l		res_qnan
   9159fdiv_res_snan:
   9160	bra.l		res_snan
   9161fdiv_res_operr:
   9162	bra.l		res_operr
   9163
   9164	global		fdiv_zero_load		# global for fsgldiv
   9165fdiv_zero_load:
   9166	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
   9167	mov.b		DST_EX(%a1),%d1		# or of input signs.
   9168	eor.b		%d0,%d1
   9169	bpl.b		fdiv_zero_load_p	# result is positive
   9170	fmov.s		&0x80000000,%fp0	# load a -ZERO
   9171	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
   9172	rts
   9173fdiv_zero_load_p:
   9174	fmov.s		&0x00000000,%fp0	# load a +ZERO
   9175	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
   9176	rts
   9177
   9178#
   9179# The destination was In Range and the source was a ZERO. The result,
   9180# Therefore, is an INF w/ the proper sign.
   9181# So, determine the sign and return a new INF (w/ the j-bit cleared).
   9182#
   9183	global		fdiv_inf_load		# global for fsgldiv
   9184fdiv_inf_load:
   9185	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
   9186	mov.b		SRC_EX(%a0),%d0		# load both signs
   9187	mov.b		DST_EX(%a1),%d1
   9188	eor.b		%d0,%d1
   9189	bpl.b		fdiv_inf_load_p		# result is positive
   9190	fmov.s		&0xff800000,%fp0	# make result -INF
   9191	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
   9192	rts
   9193fdiv_inf_load_p:
   9194	fmov.s		&0x7f800000,%fp0	# make result +INF
   9195	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
   9196	rts
   9197
   9198#
   9199# The destination was an INF w/ an In Range or ZERO source, the result is
   9200# an INF w/ the proper sign.
   9201# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
   9202# dst INF is set, then then j-bit of the result INF is also set).
   9203#
   9204	global		fdiv_inf_dst		# global for fsgldiv
   9205fdiv_inf_dst:
   9206	mov.b		DST_EX(%a1),%d0		# load both signs
   9207	mov.b		SRC_EX(%a0),%d1
   9208	eor.b		%d0,%d1
   9209	bpl.b		fdiv_inf_dst_p		# result is positive
   9210
   9211	fmovm.x		DST(%a1),&0x80		# return result in fp0
   9212	fabs.x		%fp0			# clear sign bit
   9213	fneg.x		%fp0			# set sign bit
   9214	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
   9215	rts
   9216
   9217fdiv_inf_dst_p:
   9218	fmovm.x		DST(%a1),&0x80		# return result in fp0
   9219	fabs.x		%fp0			# return positive INF
   9220	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
   9221	rts
   9222
   9223#########################################################################
   9224# XDEF ****************************************************************	#
   9225#	fneg(): emulates the fneg instruction				#
   9226#	fsneg(): emulates the fsneg instruction				#
   9227#	fdneg(): emulates the fdneg instruction				#
   9228#									#
   9229# XREF ****************************************************************	#
   9230#	norm() - normalize a denorm to provide EXOP			#
   9231#	scale_to_zero_src() - scale sgl/dbl source exponent		#
   9232#	ovf_res() - return default overflow result			#
   9233#	unf_res() - return default underflow result			#
   9234#	res_qnan_1op() - return QNAN result				#
   9235#	res_snan_1op() - return SNAN result				#
   9236#									#
   9237# INPUT ***************************************************************	#
   9238#	a0 = pointer to extended precision source operand		#
   9239#	d0 = rnd prec,mode						#
   9240#									#
   9241# OUTPUT **************************************************************	#
   9242#	fp0 = result							#
   9243#	fp1 = EXOP (if exception occurred)				#
   9244#									#
   9245# ALGORITHM ***********************************************************	#
   9246#	Handle NANs, zeroes, and infinities as special cases. Separate	#
   9247# norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
   9248# emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
   9249# and an actual fneg performed to see if overflow/underflow would have	#
   9250# occurred. If so, return default underflow/overflow result. Else,	#
   9251# scale the result exponent and return result. FPSR gets set based on	#
   9252# the result value.							#
   9253#									#
   9254#########################################################################
   9255
   9256	global		fsneg
   9257fsneg:
   9258	andi.b		&0x30,%d0		# clear rnd prec
   9259	ori.b		&s_mode*0x10,%d0	# insert sgl precision
   9260	bra.b		fneg
   9261
   9262	global		fdneg
   9263fdneg:
   9264	andi.b		&0x30,%d0		# clear rnd prec
   9265	ori.b		&d_mode*0x10,%d0	# insert dbl prec
   9266
   9267	global		fneg
   9268fneg:
   9269	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   9270	mov.b		STAG(%a6),%d1
   9271	bne.w		fneg_not_norm		# optimize on non-norm input
   9272
   9273#
   9274# NEGATE SIGN : norms and denorms ONLY!
   9275#
   9276fneg_norm:
   9277	andi.b		&0xc0,%d0		# is precision extended?
   9278	bne.w		fneg_not_ext		# no; go handle sgl or dbl
   9279
   9280#
   9281# precision selected is extended. so...we can not get an underflow
   9282# or overflow because of rounding to the correct precision. so...
   9283# skip the scaling and unscaling...
   9284#
   9285	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   9286	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   9287	mov.w		SRC_EX(%a0),%d0
   9288	eori.w		&0x8000,%d0		# negate sign
   9289	bpl.b		fneg_norm_load		# sign is positive
   9290	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   9291fneg_norm_load:
   9292	mov.w		%d0,FP_SCR0_EX(%a6)
   9293	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   9294	rts
   9295
   9296#
   9297# for an extended precision DENORM, the UNFL exception bit is set
   9298# the accrued bit is NOT set in this instance(no inexactness!)
   9299#
   9300fneg_denorm:
   9301	andi.b		&0xc0,%d0		# is precision extended?
   9302	bne.b		fneg_not_ext		# no; go handle sgl or dbl
   9303
   9304	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   9305
   9306	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   9307	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   9308	mov.w		SRC_EX(%a0),%d0
   9309	eori.w		&0x8000,%d0		# negate sign
   9310	bpl.b		fneg_denorm_done	# no
   9311	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
   9312fneg_denorm_done:
   9313	mov.w		%d0,FP_SCR0_EX(%a6)
   9314	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   9315
   9316	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
   9317	bne.b		fneg_ext_unfl_ena	# yes
   9318	rts
   9319
   9320#
   9321# the input is an extended DENORM and underflow is enabled in the FPCR.
   9322# normalize the mantissa and add the bias of 0x6000 to the resulting negative
   9323# exponent and insert back into the operand.
   9324#
   9325fneg_ext_unfl_ena:
   9326	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   9327	bsr.l		norm			# normalize result
   9328	neg.w		%d0			# new exponent = -(shft val)
   9329	addi.w		&0x6000,%d0		# add new bias to exponent
   9330	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
   9331	andi.w		&0x8000,%d1		# keep old sign
   9332	andi.w		&0x7fff,%d0		# clear sign position
   9333	or.w		%d1,%d0			# concat old sign, new exponent
   9334	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   9335	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   9336	rts
   9337
   9338#
   9339# operand is either single or double
   9340#
   9341fneg_not_ext:
   9342	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
   9343	bne.b		fneg_dbl
   9344
   9345#
   9346# operand is to be rounded to single precision
   9347#
   9348fneg_sgl:
   9349	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   9350	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   9351	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   9352	bsr.l		scale_to_zero_src	# calculate scale factor
   9353
   9354	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
   9355	bge.w		fneg_sd_unfl		# yes; go handle underflow
   9356	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
   9357	beq.w		fneg_sd_may_ovfl	# maybe; go check
   9358	blt.w		fneg_sd_ovfl		# yes; go handle overflow
   9359
   9360#
   9361# operand will NOT overflow or underflow when moved in to the fp reg file
   9362#
   9363fneg_sd_normal:
   9364	fmov.l		&0x0,%fpsr		# clear FPSR
   9365	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   9366
   9367	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
   9368
   9369	fmov.l		%fpsr,%d1		# save FPSR
   9370	fmov.l		&0x0,%fpcr		# clear FPCR
   9371
   9372	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   9373
   9374fneg_sd_normal_exit:
   9375	mov.l		%d2,-(%sp)		# save d2
   9376	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   9377	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
   9378	mov.w		%d1,%d2			# make a copy
   9379	andi.l		&0x7fff,%d1		# strip sign
   9380	sub.l		%d0,%d1			# add scale factor
   9381	andi.w		&0x8000,%d2		# keep old sign
   9382	or.w		%d1,%d2			# concat old sign,new exp
   9383	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
   9384	mov.l		(%sp)+,%d2		# restore d2
   9385	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   9386	rts
   9387
   9388#
   9389# operand is to be rounded to double precision
   9390#
   9391fneg_dbl:
   9392	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   9393	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   9394	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   9395	bsr.l		scale_to_zero_src	# calculate scale factor
   9396
   9397	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
   9398	bge.b		fneg_sd_unfl		# yes; go handle underflow
   9399	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
   9400	beq.w		fneg_sd_may_ovfl	# maybe; go check
   9401	blt.w		fneg_sd_ovfl		# yes; go handle overflow
   9402	bra.w		fneg_sd_normal		# no; ho handle normalized op
   9403
   9404#
   9405# operand WILL underflow when moved in to the fp register file
   9406#
   9407fneg_sd_unfl:
   9408	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   9409
   9410	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
   9411	bpl.b		fneg_sd_unfl_tst
   9412	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
   9413
   9414# if underflow or inexact is enabled, go calculate EXOP first.
   9415fneg_sd_unfl_tst:
   9416	mov.b		FPCR_ENABLE(%a6),%d1
   9417	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
   9418	bne.b		fneg_sd_unfl_ena	# yes
   9419
   9420fneg_sd_unfl_dis:
   9421	lea		FP_SCR0(%a6),%a0	# pass: result addr
   9422	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
   9423	bsr.l		unf_res			# calculate default result
   9424	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
   9425	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   9426	rts
   9427
   9428#
   9429# operand will underflow AND underflow is enabled.
   9430# Therefore, we must return the result rounded to extended precision.
   9431#
   9432fneg_sd_unfl_ena:
   9433	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
   9434	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
   9435	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
   9436
   9437	mov.l		%d2,-(%sp)		# save d2
   9438	mov.l		%d1,%d2			# make a copy
   9439	andi.l		&0x7fff,%d1		# strip sign
   9440	andi.w		&0x8000,%d2		# keep old sign
   9441	sub.l		%d0,%d1			# subtract scale factor
   9442	addi.l		&0x6000,%d1		# add new bias
   9443	andi.w		&0x7fff,%d1
   9444	or.w		%d2,%d1			# concat new sign,new exp
   9445	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
   9446	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
   9447	mov.l		(%sp)+,%d2		# restore d2
   9448	bra.b		fneg_sd_unfl_dis
   9449
   9450#
   9451# operand WILL overflow.
   9452#
   9453fneg_sd_ovfl:
   9454	fmov.l		&0x0,%fpsr		# clear FPSR
   9455	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   9456
   9457	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
   9458
   9459	fmov.l		&0x0,%fpcr		# clear FPCR
   9460	fmov.l		%fpsr,%d1		# save FPSR
   9461
   9462	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   9463
   9464fneg_sd_ovfl_tst:
   9465	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
   9466
   9467	mov.b		FPCR_ENABLE(%a6),%d1
   9468	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
   9469	bne.b		fneg_sd_ovfl_ena	# yes
   9470
   9471#
   9472# OVFL is not enabled; therefore, we must create the default result by
   9473# calling ovf_res().
   9474#
   9475fneg_sd_ovfl_dis:
   9476	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
   9477	sne		%d1			# set sign param accordingly
   9478	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
   9479	bsr.l		ovf_res			# calculate default result
   9480	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
   9481	fmovm.x		(%a0),&0x80		# return default result in fp0
   9482	rts
   9483
   9484#
   9485# OVFL is enabled.
   9486# the INEX2 bit has already been updated by the round to the correct precision.
   9487# now, round to extended(and don't alter the FPSR).
   9488#
   9489fneg_sd_ovfl_ena:
   9490	mov.l		%d2,-(%sp)		# save d2
   9491	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
   9492	mov.l		%d1,%d2			# make a copy
   9493	andi.l		&0x7fff,%d1		# strip sign
   9494	andi.w		&0x8000,%d2		# keep old sign
   9495	sub.l		%d0,%d1			# add scale factor
   9496	subi.l		&0x6000,%d1		# subtract bias
   9497	andi.w		&0x7fff,%d1
   9498	or.w		%d2,%d1			# concat sign,exp
   9499	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
   9500	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   9501	mov.l		(%sp)+,%d2		# restore d2
   9502	bra.b		fneg_sd_ovfl_dis
   9503
   9504#
   9505# the move in MAY underflow. so...
   9506#
   9507fneg_sd_may_ovfl:
   9508	fmov.l		&0x0,%fpsr		# clear FPSR
   9509	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   9510
   9511	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
   9512
   9513	fmov.l		%fpsr,%d1		# save status
   9514	fmov.l		&0x0,%fpcr		# clear FPCR
   9515
   9516	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   9517
   9518	fabs.x		%fp0,%fp1		# make a copy of result
   9519	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
   9520	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
   9521
   9522# no, it didn't overflow; we have correct result
   9523	bra.w		fneg_sd_normal_exit
   9524
   9525##########################################################################
   9526
   9527#
   9528# input is not normalized; what is it?
   9529#
   9530fneg_not_norm:
   9531	cmpi.b		%d1,&DENORM		# weed out DENORM
   9532	beq.w		fneg_denorm
   9533	cmpi.b		%d1,&SNAN		# weed out SNAN
   9534	beq.l		res_snan_1op
   9535	cmpi.b		%d1,&QNAN		# weed out QNAN
   9536	beq.l		res_qnan_1op
   9537
   9538#
   9539# do the fneg; at this point, only possible ops are ZERO and INF.
   9540# use fneg to determine ccodes.
   9541# prec:mode should be zero at this point but it won't affect answer anyways.
   9542#
   9543	fneg.x		SRC_EX(%a0),%fp0	# do fneg
   9544	fmov.l		%fpsr,%d0
   9545	rol.l		&0x8,%d0		# put ccodes in lo byte
   9546	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
   9547	rts
   9548
   9549#########################################################################
   9550# XDEF ****************************************************************	#
   9551#	ftst(): emulates the ftest instruction				#
   9552#									#
   9553# XREF ****************************************************************	#
   9554#	res{s,q}nan_1op() - set NAN result for monadic instruction	#
   9555#									#
   9556# INPUT ***************************************************************	#
   9557#	a0 = pointer to extended precision source operand		#
   9558#									#
   9559# OUTPUT **************************************************************	#
   9560#	none								#
   9561#									#
   9562# ALGORITHM ***********************************************************	#
   9563#	Check the source operand tag (STAG) and set the FPCR according	#
   9564# to the operand type and sign.						#
   9565#									#
   9566#########################################################################
   9567
   9568	global		ftst
   9569ftst:
   9570	mov.b		STAG(%a6),%d1
   9571	bne.b		ftst_not_norm		# optimize on non-norm input
   9572
   9573#
   9574# Norm:
   9575#
   9576ftst_norm:
   9577	tst.b		SRC_EX(%a0)		# is operand negative?
   9578	bmi.b		ftst_norm_m		# yes
   9579	rts
   9580ftst_norm_m:
   9581	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   9582	rts
   9583
   9584#
   9585# input is not normalized; what is it?
   9586#
   9587ftst_not_norm:
   9588	cmpi.b		%d1,&ZERO		# weed out ZERO
   9589	beq.b		ftst_zero
   9590	cmpi.b		%d1,&INF		# weed out INF
   9591	beq.b		ftst_inf
   9592	cmpi.b		%d1,&SNAN		# weed out SNAN
   9593	beq.l		res_snan_1op
   9594	cmpi.b		%d1,&QNAN		# weed out QNAN
   9595	beq.l		res_qnan_1op
   9596
   9597#
   9598# Denorm:
   9599#
   9600ftst_denorm:
   9601	tst.b		SRC_EX(%a0)		# is operand negative?
   9602	bmi.b		ftst_denorm_m		# yes
   9603	rts
   9604ftst_denorm_m:
   9605	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   9606	rts
   9607
   9608#
   9609# Infinity:
   9610#
   9611ftst_inf:
   9612	tst.b		SRC_EX(%a0)		# is operand negative?
   9613	bmi.b		ftst_inf_m		# yes
   9614ftst_inf_p:
   9615	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
   9616	rts
   9617ftst_inf_m:
   9618	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
   9619	rts
   9620
   9621#
   9622# Zero:
   9623#
   9624ftst_zero:
   9625	tst.b		SRC_EX(%a0)		# is operand negative?
   9626	bmi.b		ftst_zero_m		# yes
   9627ftst_zero_p:
   9628	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   9629	rts
   9630ftst_zero_m:
   9631	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
   9632	rts
   9633
   9634#########################################################################
   9635# XDEF ****************************************************************	#
   9636#	fint(): emulates the fint instruction				#
   9637#									#
   9638# XREF ****************************************************************	#
   9639#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
   9640#									#
   9641# INPUT ***************************************************************	#
   9642#	a0 = pointer to extended precision source operand		#
   9643#	d0 = round precision/mode					#
   9644#									#
   9645# OUTPUT **************************************************************	#
   9646#	fp0 = result							#
   9647#									#
   9648# ALGORITHM ***********************************************************	#
   9649#	Separate according to operand type. Unnorms don't pass through	#
   9650# here. For norms, load the rounding mode/prec, execute a "fint", then	#
   9651# store the resulting FPSR bits.					#
   9652#	For denorms, force the j-bit to a one and do the same as for	#
   9653# norms. Denorms are so low that the answer will either be a zero or a	#
   9654# one.									#
   9655#	For zeroes/infs/NANs, return the same while setting the FPSR	#
   9656# as appropriate.							#
   9657#									#
   9658#########################################################################
   9659
   9660	global		fint
   9661fint:
   9662	mov.b		STAG(%a6),%d1
   9663	bne.b		fint_not_norm		# optimize on non-norm input
   9664
   9665#
   9666# Norm:
   9667#
   9668fint_norm:
   9669	andi.b		&0x30,%d0		# set prec = ext
   9670
   9671	fmov.l		%d0,%fpcr		# set FPCR
   9672	fmov.l		&0x0,%fpsr		# clear FPSR
   9673
   9674	fint.x		SRC(%a0),%fp0		# execute fint
   9675
   9676	fmov.l		&0x0,%fpcr		# clear FPCR
   9677	fmov.l		%fpsr,%d0		# save FPSR
   9678	or.l		%d0,USER_FPSR(%a6)	# set exception bits
   9679
   9680	rts
   9681
   9682#
   9683# input is not normalized; what is it?
   9684#
   9685fint_not_norm:
   9686	cmpi.b		%d1,&ZERO		# weed out ZERO
   9687	beq.b		fint_zero
   9688	cmpi.b		%d1,&INF		# weed out INF
   9689	beq.b		fint_inf
   9690	cmpi.b		%d1,&DENORM		# weed out DENORM
   9691	beq.b		fint_denorm
   9692	cmpi.b		%d1,&SNAN		# weed out SNAN
   9693	beq.l		res_snan_1op
   9694	bra.l		res_qnan_1op		# weed out QNAN
   9695
   9696#
   9697# Denorm:
   9698#
   9699# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
   9700# also, the INEX2 and AINEX exception bits will be set.
   9701# so, we could either set these manually or force the DENORM
   9702# to a very small NORM and ship it to the NORM routine.
   9703# I do the latter.
   9704#
   9705fint_denorm:
   9706	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
   9707	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
   9708	lea		FP_SCR0(%a6),%a0
   9709	bra.b		fint_norm
   9710
   9711#
   9712# Zero:
   9713#
   9714fint_zero:
   9715	tst.b		SRC_EX(%a0)		# is ZERO negative?
   9716	bmi.b		fint_zero_m		# yes
   9717fint_zero_p:
   9718	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
   9719	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   9720	rts
   9721fint_zero_m:
   9722	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
   9723	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
   9724	rts
   9725
   9726#
   9727# Infinity:
   9728#
   9729fint_inf:
   9730	fmovm.x		SRC(%a0),&0x80		# return result in fp0
   9731	tst.b		SRC_EX(%a0)		# is INF negative?
   9732	bmi.b		fint_inf_m		# yes
   9733fint_inf_p:
   9734	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
   9735	rts
   9736fint_inf_m:
   9737	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
   9738	rts
   9739
   9740#########################################################################
   9741# XDEF ****************************************************************	#
   9742#	fintrz(): emulates the fintrz instruction			#
   9743#									#
   9744# XREF ****************************************************************	#
   9745#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
   9746#									#
   9747# INPUT ***************************************************************	#
   9748#	a0 = pointer to extended precision source operand		#
   9749#	d0 = round precision/mode					#
   9750#									#
   9751# OUTPUT **************************************************************	#
   9752#	fp0 = result							#
   9753#									#
   9754# ALGORITHM ***********************************************************	#
   9755#	Separate according to operand type. Unnorms don't pass through	#
   9756# here. For norms, load the rounding mode/prec, execute a "fintrz",	#
   9757# then store the resulting FPSR bits.					#
   9758#	For denorms, force the j-bit to a one and do the same as for	#
   9759# norms. Denorms are so low that the answer will either be a zero or a	#
   9760# one.									#
   9761#	For zeroes/infs/NANs, return the same while setting the FPSR	#
   9762# as appropriate.							#
   9763#									#
   9764#########################################################################
   9765
   9766	global		fintrz
   9767fintrz:
   9768	mov.b		STAG(%a6),%d1
   9769	bne.b		fintrz_not_norm		# optimize on non-norm input
   9770
   9771#
   9772# Norm:
   9773#
   9774fintrz_norm:
   9775	fmov.l		&0x0,%fpsr		# clear FPSR
   9776
   9777	fintrz.x	SRC(%a0),%fp0		# execute fintrz
   9778
   9779	fmov.l		%fpsr,%d0		# save FPSR
   9780	or.l		%d0,USER_FPSR(%a6)	# set exception bits
   9781
   9782	rts
   9783
   9784#
   9785# input is not normalized; what is it?
   9786#
   9787fintrz_not_norm:
   9788	cmpi.b		%d1,&ZERO		# weed out ZERO
   9789	beq.b		fintrz_zero
   9790	cmpi.b		%d1,&INF		# weed out INF
   9791	beq.b		fintrz_inf
   9792	cmpi.b		%d1,&DENORM		# weed out DENORM
   9793	beq.b		fintrz_denorm
   9794	cmpi.b		%d1,&SNAN		# weed out SNAN
   9795	beq.l		res_snan_1op
   9796	bra.l		res_qnan_1op		# weed out QNAN
   9797
   9798#
   9799# Denorm:
   9800#
   9801# for DENORMs, the result will be (+/-)ZERO.
   9802# also, the INEX2 and AINEX exception bits will be set.
   9803# so, we could either set these manually or force the DENORM
   9804# to a very small NORM and ship it to the NORM routine.
   9805# I do the latter.
   9806#
   9807fintrz_denorm:
   9808	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
   9809	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
   9810	lea		FP_SCR0(%a6),%a0
   9811	bra.b		fintrz_norm
   9812
   9813#
   9814# Zero:
   9815#
   9816fintrz_zero:
   9817	tst.b		SRC_EX(%a0)		# is ZERO negative?
   9818	bmi.b		fintrz_zero_m		# yes
   9819fintrz_zero_p:
   9820	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
   9821	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
   9822	rts
   9823fintrz_zero_m:
   9824	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
   9825	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
   9826	rts
   9827
   9828#
   9829# Infinity:
   9830#
   9831fintrz_inf:
   9832	fmovm.x		SRC(%a0),&0x80		# return result in fp0
   9833	tst.b		SRC_EX(%a0)		# is INF negative?
   9834	bmi.b		fintrz_inf_m		# yes
   9835fintrz_inf_p:
   9836	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
   9837	rts
   9838fintrz_inf_m:
   9839	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
   9840	rts
   9841
   9842#########################################################################
   9843# XDEF ****************************************************************	#
   9844#	fabs():  emulates the fabs instruction				#
   9845#	fsabs(): emulates the fsabs instruction				#
   9846#	fdabs(): emulates the fdabs instruction				#
   9847#									#
   9848# XREF **************************************************************** #
   9849#	norm() - normalize denorm mantissa to provide EXOP		#
   9850#	scale_to_zero_src() - make exponent. = 0; get scale factor	#
   9851#	unf_res() - calculate underflow result				#
   9852#	ovf_res() - calculate overflow result				#
   9853#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
   9854#									#
   9855# INPUT *************************************************************** #
   9856#	a0 = pointer to extended precision source operand		#
   9857#	d0 = rnd precision/mode						#
   9858#									#
   9859# OUTPUT ************************************************************** #
   9860#	fp0 = result							#
   9861#	fp1 = EXOP (if exception occurred)				#
   9862#									#
   9863# ALGORITHM ***********************************************************	#
   9864#	Handle NANs, infinities, and zeroes as special cases. Divide	#
   9865# norms into extended, single, and double precision.			#
   9866#	Simply clear sign for extended precision norm. Ext prec denorm	#
   9867# gets an EXOP created for it since it's an underflow.			#
   9868#	Double and single precision can overflow and underflow. First,	#
   9869# scale the operand such that the exponent is zero. Perform an "fabs"	#
   9870# using the correct rnd mode/prec. Check to see if the original		#
   9871# exponent would take an exception. If so, use unf_res() or ovf_res()	#
   9872# to calculate the default result. Also, create the EXOP for the	#
   9873# exceptional case. If no exception should occur, insert the correct	#
   9874# result exponent and return.						#
   9875#	Unnorms don't pass through here.				#
   9876#									#
   9877#########################################################################
   9878
   9879	global		fsabs
   9880fsabs:
   9881	andi.b		&0x30,%d0		# clear rnd prec
   9882	ori.b		&s_mode*0x10,%d0	# insert sgl precision
   9883	bra.b		fabs
   9884
   9885	global		fdabs
   9886fdabs:
   9887	andi.b		&0x30,%d0		# clear rnd prec
   9888	ori.b		&d_mode*0x10,%d0	# insert dbl precision
   9889
   9890	global		fabs
   9891fabs:
   9892	mov.l		%d0,L_SCR3(%a6)		# store rnd info
   9893	mov.b		STAG(%a6),%d1
   9894	bne.w		fabs_not_norm		# optimize on non-norm input
   9895
   9896#
   9897# ABSOLUTE VALUE: norms and denorms ONLY!
   9898#
   9899fabs_norm:
   9900	andi.b		&0xc0,%d0		# is precision extended?
   9901	bne.b		fabs_not_ext		# no; go handle sgl or dbl
   9902
   9903#
   9904# precision selected is extended. so...we can not get an underflow
   9905# or overflow because of rounding to the correct precision. so...
   9906# skip the scaling and unscaling...
   9907#
   9908	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   9909	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   9910	mov.w		SRC_EX(%a0),%d1
   9911	bclr		&15,%d1			# force absolute value
   9912	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
   9913	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
   9914	rts
   9915
   9916#
   9917# for an extended precision DENORM, the UNFL exception bit is set
   9918# the accrued bit is NOT set in this instance(no inexactness!)
   9919#
   9920fabs_denorm:
   9921	andi.b		&0xc0,%d0		# is precision extended?
   9922	bne.b		fabs_not_ext		# no
   9923
   9924	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
   9925
   9926	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   9927	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   9928	mov.w		SRC_EX(%a0),%d0
   9929	bclr		&15,%d0			# clear sign
   9930	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
   9931
   9932	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
   9933
   9934	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
   9935	bne.b		fabs_ext_unfl_ena
   9936	rts
   9937
   9938#
   9939# the input is an extended DENORM and underflow is enabled in the FPCR.
   9940# normalize the mantissa and add the bias of 0x6000 to the resulting negative
   9941# exponent and insert back into the operand.
   9942#
   9943fabs_ext_unfl_ena:
   9944	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   9945	bsr.l		norm			# normalize result
   9946	neg.w		%d0			# new exponent = -(shft val)
   9947	addi.w		&0x6000,%d0		# add new bias to exponent
   9948	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
   9949	andi.w		&0x8000,%d1		# keep old sign
   9950	andi.w		&0x7fff,%d0		# clear sign position
   9951	or.w		%d1,%d0			# concat old sign, new exponent
   9952	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   9953	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
   9954	rts
   9955
   9956#
   9957# operand is either single or double
   9958#
   9959fabs_not_ext:
   9960	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
   9961	bne.b		fabs_dbl
   9962
   9963#
   9964# operand is to be rounded to single precision
   9965#
   9966fabs_sgl:
   9967	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
   9968	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
   9969	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
   9970	bsr.l		scale_to_zero_src	# calculate scale factor
   9971
   9972	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
   9973	bge.w		fabs_sd_unfl		# yes; go handle underflow
   9974	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
   9975	beq.w		fabs_sd_may_ovfl	# maybe; go check
   9976	blt.w		fabs_sd_ovfl		# yes; go handle overflow
   9977
   9978#
   9979# operand will NOT overflow or underflow when moved in to the fp reg file
   9980#
   9981fabs_sd_normal:
   9982	fmov.l		&0x0,%fpsr		# clear FPSR
   9983	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
   9984
   9985	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
   9986
   9987	fmov.l		%fpsr,%d1		# save FPSR
   9988	fmov.l		&0x0,%fpcr		# clear FPCR
   9989
   9990	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
   9991
   9992fabs_sd_normal_exit:
   9993	mov.l		%d2,-(%sp)		# save d2
   9994	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
   9995	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
   9996	mov.l		%d1,%d2			# make a copy
   9997	andi.l		&0x7fff,%d1		# strip sign
   9998	sub.l		%d0,%d1			# add scale factor
   9999	andi.w		&0x8000,%d2		# keep old sign
  10000	or.w		%d1,%d2			# concat old sign,new exp
  10001	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
  10002	mov.l		(%sp)+,%d2		# restore d2
  10003	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
  10004	rts
  10005
  10006#
  10007# operand is to be rounded to double precision
  10008#
  10009fabs_dbl:
  10010	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  10011	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  10012	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  10013	bsr.l		scale_to_zero_src	# calculate scale factor
  10014
  10015	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
  10016	bge.b		fabs_sd_unfl		# yes; go handle underflow
  10017	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
  10018	beq.w		fabs_sd_may_ovfl	# maybe; go check
  10019	blt.w		fabs_sd_ovfl		# yes; go handle overflow
  10020	bra.w		fabs_sd_normal		# no; ho handle normalized op
  10021
  10022#
  10023# operand WILL underflow when moved in to the fp register file
  10024#
  10025fabs_sd_unfl:
  10026	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  10027
  10028	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
  10029
  10030# if underflow or inexact is enabled, go calculate EXOP first.
  10031	mov.b		FPCR_ENABLE(%a6),%d1
  10032	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  10033	bne.b		fabs_sd_unfl_ena	# yes
  10034
  10035fabs_sd_unfl_dis:
  10036	lea		FP_SCR0(%a6),%a0	# pass: result addr
  10037	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  10038	bsr.l		unf_res			# calculate default result
  10039	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
  10040	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  10041	rts
  10042
  10043#
  10044# operand will underflow AND underflow is enabled.
  10045# Therefore, we must return the result rounded to extended precision.
  10046#
  10047fabs_sd_unfl_ena:
  10048	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
  10049	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
  10050	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
  10051
  10052	mov.l		%d2,-(%sp)		# save d2
  10053	mov.l		%d1,%d2			# make a copy
  10054	andi.l		&0x7fff,%d1		# strip sign
  10055	andi.w		&0x8000,%d2		# keep old sign
  10056	sub.l		%d0,%d1			# subtract scale factor
  10057	addi.l		&0x6000,%d1		# add new bias
  10058	andi.w		&0x7fff,%d1
  10059	or.w		%d2,%d1			# concat new sign,new exp
  10060	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
  10061	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
  10062	mov.l		(%sp)+,%d2		# restore d2
  10063	bra.b		fabs_sd_unfl_dis
  10064
  10065#
  10066# operand WILL overflow.
  10067#
  10068fabs_sd_ovfl:
  10069	fmov.l		&0x0,%fpsr		# clear FPSR
  10070	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  10071
  10072	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
  10073
  10074	fmov.l		&0x0,%fpcr		# clear FPCR
  10075	fmov.l		%fpsr,%d1		# save FPSR
  10076
  10077	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  10078
  10079fabs_sd_ovfl_tst:
  10080	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  10081
  10082	mov.b		FPCR_ENABLE(%a6),%d1
  10083	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  10084	bne.b		fabs_sd_ovfl_ena	# yes
  10085
  10086#
  10087# OVFL is not enabled; therefore, we must create the default result by
  10088# calling ovf_res().
  10089#
  10090fabs_sd_ovfl_dis:
  10091	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
  10092	sne		%d1			# set sign param accordingly
  10093	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
  10094	bsr.l		ovf_res			# calculate default result
  10095	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
  10096	fmovm.x		(%a0),&0x80		# return default result in fp0
  10097	rts
  10098
  10099#
  10100# OVFL is enabled.
  10101# the INEX2 bit has already been updated by the round to the correct precision.
  10102# now, round to extended(and don't alter the FPSR).
  10103#
  10104fabs_sd_ovfl_ena:
  10105	mov.l		%d2,-(%sp)		# save d2
  10106	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  10107	mov.l		%d1,%d2			# make a copy
  10108	andi.l		&0x7fff,%d1		# strip sign
  10109	andi.w		&0x8000,%d2		# keep old sign
  10110	sub.l		%d0,%d1			# add scale factor
  10111	subi.l		&0x6000,%d1		# subtract bias
  10112	andi.w		&0x7fff,%d1
  10113	or.w		%d2,%d1			# concat sign,exp
  10114	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  10115	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  10116	mov.l		(%sp)+,%d2		# restore d2
  10117	bra.b		fabs_sd_ovfl_dis
  10118
  10119#
  10120# the move in MAY underflow. so...
  10121#
  10122fabs_sd_may_ovfl:
  10123	fmov.l		&0x0,%fpsr		# clear FPSR
  10124	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  10125
  10126	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
  10127
  10128	fmov.l		%fpsr,%d1		# save status
  10129	fmov.l		&0x0,%fpcr		# clear FPCR
  10130
  10131	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  10132
  10133	fabs.x		%fp0,%fp1		# make a copy of result
  10134	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
  10135	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
  10136
  10137# no, it didn't overflow; we have correct result
  10138	bra.w		fabs_sd_normal_exit
  10139
  10140##########################################################################
  10141
  10142#
  10143# input is not normalized; what is it?
  10144#
  10145fabs_not_norm:
  10146	cmpi.b		%d1,&DENORM		# weed out DENORM
  10147	beq.w		fabs_denorm
  10148	cmpi.b		%d1,&SNAN		# weed out SNAN
  10149	beq.l		res_snan_1op
  10150	cmpi.b		%d1,&QNAN		# weed out QNAN
  10151	beq.l		res_qnan_1op
  10152
  10153	fabs.x		SRC(%a0),%fp0		# force absolute value
  10154
  10155	cmpi.b		%d1,&INF		# weed out INF
  10156	beq.b		fabs_inf
  10157fabs_zero:
  10158	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
  10159	rts
  10160fabs_inf:
  10161	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
  10162	rts
  10163
  10164#########################################################################
  10165# XDEF ****************************************************************	#
  10166#	fcmp(): fp compare op routine					#
  10167#									#
  10168# XREF ****************************************************************	#
  10169#	res_qnan() - return QNAN result					#
  10170#	res_snan() - return SNAN result					#
  10171#									#
  10172# INPUT ***************************************************************	#
  10173#	a0 = pointer to extended precision source operand		#
  10174#	a1 = pointer to extended precision destination operand		#
  10175#	d0 = round prec/mode						#
  10176#									#
  10177# OUTPUT ************************************************************** #
  10178#	None								#
  10179#									#
  10180# ALGORITHM ***********************************************************	#
  10181#	Handle NANs and denorms as special cases. For everything else,	#
  10182# just use the actual fcmp instruction to produce the correct condition	#
  10183# codes.								#
  10184#									#
  10185#########################################################################
  10186
  10187	global		fcmp
  10188fcmp:
  10189	clr.w		%d1
  10190	mov.b		DTAG(%a6),%d1
  10191	lsl.b		&0x3,%d1
  10192	or.b		STAG(%a6),%d1
  10193	bne.b		fcmp_not_norm		# optimize on non-norm input
  10194
  10195#
  10196# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
  10197#
  10198fcmp_norm:
  10199	fmovm.x		DST(%a1),&0x80		# load dst op
  10200
  10201	fcmp.x		%fp0,SRC(%a0)		# do compare
  10202
  10203	fmov.l		%fpsr,%d0		# save FPSR
  10204	rol.l		&0x8,%d0		# extract ccode bits
  10205	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
  10206
  10207	rts
  10208
  10209#
  10210# fcmp: inputs are not both normalized; what are they?
  10211#
  10212fcmp_not_norm:
  10213	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
  10214	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
  10215
  10216	swbeg		&48
  10217tbl_fcmp_op:
  10218	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
  10219	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
  10220	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
  10221	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
  10222	short		fcmp_nrm_dnrm	- tbl_fcmp_op # NORM - DENORM
  10223	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
  10224	short		tbl_fcmp_op	- tbl_fcmp_op #
  10225	short		tbl_fcmp_op	- tbl_fcmp_op #
  10226
  10227	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
  10228	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
  10229	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
  10230	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
  10231	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
  10232	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
  10233	short		tbl_fcmp_op	- tbl_fcmp_op #
  10234	short		tbl_fcmp_op	- tbl_fcmp_op #
  10235
  10236	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
  10237	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
  10238	short		fcmp_norm	- tbl_fcmp_op # INF - INF
  10239	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
  10240	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
  10241	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
  10242	short		tbl_fcmp_op	- tbl_fcmp_op #
  10243	short		tbl_fcmp_op	- tbl_fcmp_op #
  10244
  10245	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
  10246	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
  10247	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
  10248	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
  10249	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
  10250	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
  10251	short		tbl_fcmp_op	- tbl_fcmp_op #
  10252	short		tbl_fcmp_op	- tbl_fcmp_op #
  10253
  10254	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
  10255	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
  10256	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
  10257	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
  10258	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
  10259	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
  10260	short		tbl_fcmp_op	- tbl_fcmp_op #
  10261	short		tbl_fcmp_op	- tbl_fcmp_op #
  10262
  10263	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
  10264	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
  10265	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
  10266	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
  10267	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
  10268	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
  10269	short		tbl_fcmp_op	- tbl_fcmp_op #
  10270	short		tbl_fcmp_op	- tbl_fcmp_op #
  10271
  10272# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
  10273# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
  10274fcmp_res_qnan:
  10275	bsr.l		res_qnan
  10276	andi.b		&0xf7,FPSR_CC(%a6)
  10277	rts
  10278fcmp_res_snan:
  10279	bsr.l		res_snan
  10280	andi.b		&0xf7,FPSR_CC(%a6)
  10281	rts
  10282
  10283#
  10284# DENORMs are a little more difficult.
  10285# If you have a 2 DENORMs, then you can just force the j-bit to a one
  10286# and use the fcmp_norm routine.
  10287# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
  10288# and use the fcmp_norm routine.
  10289# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
  10290# But with a DENORM and a NORM of the same sign, the neg bit is set if the
  10291# (1) signs are (+) and the DENORM is the dst or
  10292# (2) signs are (-) and the DENORM is the src
  10293#
  10294
  10295fcmp_dnrm_s:
  10296	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  10297	mov.l		SRC_HI(%a0),%d0
  10298	bset		&31,%d0			# DENORM src; make into small norm
  10299	mov.l		%d0,FP_SCR0_HI(%a6)
  10300	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  10301	lea		FP_SCR0(%a6),%a0
  10302	bra.w		fcmp_norm
  10303
  10304fcmp_dnrm_d:
  10305	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
  10306	mov.l		DST_HI(%a1),%d0
  10307	bset		&31,%d0			# DENORM src; make into small norm
  10308	mov.l		%d0,FP_SCR0_HI(%a6)
  10309	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
  10310	lea		FP_SCR0(%a6),%a1
  10311	bra.w		fcmp_norm
  10312
  10313fcmp_dnrm_sd:
  10314	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
  10315	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  10316	mov.l		DST_HI(%a1),%d0
  10317	bset		&31,%d0			# DENORM dst; make into small norm
  10318	mov.l		%d0,FP_SCR1_HI(%a6)
  10319	mov.l		SRC_HI(%a0),%d0
  10320	bset		&31,%d0			# DENORM dst; make into small norm
  10321	mov.l		%d0,FP_SCR0_HI(%a6)
  10322	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
  10323	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  10324	lea		FP_SCR1(%a6),%a1
  10325	lea		FP_SCR0(%a6),%a0
  10326	bra.w		fcmp_norm
  10327
  10328fcmp_nrm_dnrm:
  10329	mov.b		SRC_EX(%a0),%d0		# determine if like signs
  10330	mov.b		DST_EX(%a1),%d1
  10331	eor.b		%d0,%d1
  10332	bmi.w		fcmp_dnrm_s
  10333
  10334# signs are the same, so must determine the answer ourselves.
  10335	tst.b		%d0			# is src op negative?
  10336	bmi.b		fcmp_nrm_dnrm_m		# yes
  10337	rts
  10338fcmp_nrm_dnrm_m:
  10339	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
  10340	rts
  10341
  10342fcmp_dnrm_nrm:
  10343	mov.b		SRC_EX(%a0),%d0		# determine if like signs
  10344	mov.b		DST_EX(%a1),%d1
  10345	eor.b		%d0,%d1
  10346	bmi.w		fcmp_dnrm_d
  10347
  10348# signs are the same, so must determine the answer ourselves.
  10349	tst.b		%d0			# is src op negative?
  10350	bpl.b		fcmp_dnrm_nrm_m		# no
  10351	rts
  10352fcmp_dnrm_nrm_m:
  10353	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
  10354	rts
  10355
  10356#########################################################################
  10357# XDEF ****************************************************************	#
  10358#	fsglmul(): emulates the fsglmul instruction			#
  10359#									#
  10360# XREF ****************************************************************	#
  10361#	scale_to_zero_src() - scale src exponent to zero		#
  10362#	scale_to_zero_dst() - scale dst exponent to zero		#
  10363#	unf_res4() - return default underflow result for sglop		#
  10364#	ovf_res() - return default overflow result			#
  10365#	res_qnan() - return QNAN result					#
  10366#	res_snan() - return SNAN result					#
  10367#									#
  10368# INPUT ***************************************************************	#
  10369#	a0 = pointer to extended precision source operand		#
  10370#	a1 = pointer to extended precision destination operand		#
  10371#	d0  rnd prec,mode						#
  10372#									#
  10373# OUTPUT **************************************************************	#
  10374#	fp0 = result							#
  10375#	fp1 = EXOP (if exception occurred)				#
  10376#									#
  10377# ALGORITHM ***********************************************************	#
  10378#	Handle NANs, infinities, and zeroes as special cases. Divide	#
  10379# norms/denorms into ext/sgl/dbl precision.				#
  10380#	For norms/denorms, scale the exponents such that a multiply	#
  10381# instruction won't cause an exception. Use the regular fsglmul to	#
  10382# compute a result. Check if the regular operands would have taken	#
  10383# an exception. If so, return the default overflow/underflow result	#
  10384# and return the EXOP if exceptions are enabled. Else, scale the	#
  10385# result operand to the proper exponent.				#
  10386#									#
  10387#########################################################################
  10388
  10389	global		fsglmul
  10390fsglmul:
  10391	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  10392
  10393	clr.w		%d1
  10394	mov.b		DTAG(%a6),%d1
  10395	lsl.b		&0x3,%d1
  10396	or.b		STAG(%a6),%d1
  10397
  10398	bne.w		fsglmul_not_norm	# optimize on non-norm input
  10399
  10400fsglmul_norm:
  10401	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
  10402	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
  10403	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
  10404
  10405	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  10406	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  10407	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  10408
  10409	bsr.l		scale_to_zero_src	# scale exponent
  10410	mov.l		%d0,-(%sp)		# save scale factor 1
  10411
  10412	bsr.l		scale_to_zero_dst	# scale dst exponent
  10413
  10414	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
  10415
  10416	cmpi.l		%d0,&0x3fff-0x7ffe	# would result ovfl?
  10417	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
  10418	blt.w		fsglmul_ovfl		# result will overflow
  10419
  10420	cmpi.l		%d0,&0x3fff+0x0001	# would result unfl?
  10421	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
  10422	bgt.w		fsglmul_unfl		# result will underflow
  10423
  10424fsglmul_normal:
  10425	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  10426
  10427	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  10428	fmov.l		&0x0,%fpsr		# clear FPSR
  10429
  10430	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
  10431
  10432	fmov.l		%fpsr,%d1		# save status
  10433	fmov.l		&0x0,%fpcr		# clear FPCR
  10434
  10435	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  10436
  10437fsglmul_normal_exit:
  10438	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  10439	mov.l		%d2,-(%sp)		# save d2
  10440	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
  10441	mov.l		%d1,%d2			# make a copy
  10442	andi.l		&0x7fff,%d1		# strip sign
  10443	andi.w		&0x8000,%d2		# keep old sign
  10444	sub.l		%d0,%d1			# add scale factor
  10445	or.w		%d2,%d1			# concat old sign,new exp
  10446	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  10447	mov.l		(%sp)+,%d2		# restore d2
  10448	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
  10449	rts
  10450
  10451fsglmul_ovfl:
  10452	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  10453
  10454	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  10455	fmov.l		&0x0,%fpsr		# clear FPSR
  10456
  10457	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
  10458
  10459	fmov.l		%fpsr,%d1		# save status
  10460	fmov.l		&0x0,%fpcr		# clear FPCR
  10461
  10462	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  10463
  10464fsglmul_ovfl_tst:
  10465
  10466# save setting this until now because this is where fsglmul_may_ovfl may jump in
  10467	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
  10468
  10469	mov.b		FPCR_ENABLE(%a6),%d1
  10470	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  10471	bne.b		fsglmul_ovfl_ena	# yes
  10472
  10473fsglmul_ovfl_dis:
  10474	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
  10475	sne		%d1			# set sign param accordingly
  10476	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
  10477	andi.b		&0x30,%d0		# force prec = ext
  10478	bsr.l		ovf_res			# calculate default result
  10479	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
  10480	fmovm.x		(%a0),&0x80		# return default result in fp0
  10481	rts
  10482
  10483fsglmul_ovfl_ena:
  10484	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
  10485
  10486	mov.l		%d2,-(%sp)		# save d2
  10487	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  10488	mov.l		%d1,%d2			# make a copy
  10489	andi.l		&0x7fff,%d1		# strip sign
  10490	sub.l		%d0,%d1			# add scale factor
  10491	subi.l		&0x6000,%d1		# subtract bias
  10492	andi.w		&0x7fff,%d1
  10493	andi.w		&0x8000,%d2		# keep old sign
  10494	or.w		%d2,%d1			# concat old sign,new exp
  10495	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  10496	mov.l		(%sp)+,%d2		# restore d2
  10497	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  10498	bra.b		fsglmul_ovfl_dis
  10499
  10500fsglmul_may_ovfl:
  10501	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  10502
  10503	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  10504	fmov.l		&0x0,%fpsr		# clear FPSR
  10505
  10506	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
  10507
  10508	fmov.l		%fpsr,%d1		# save status
  10509	fmov.l		&0x0,%fpcr		# clear FPCR
  10510
  10511	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  10512
  10513	fabs.x		%fp0,%fp1		# make a copy of result
  10514	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
  10515	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
  10516
  10517# no, it didn't overflow; we have correct result
  10518	bra.w		fsglmul_normal_exit
  10519
  10520fsglmul_unfl:
  10521	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  10522
  10523	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  10524
  10525	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
  10526	fmov.l		&0x0,%fpsr		# clear FPSR
  10527
  10528	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
  10529
  10530	fmov.l		%fpsr,%d1		# save status
  10531	fmov.l		&0x0,%fpcr		# clear FPCR
  10532
  10533	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  10534
  10535	mov.b		FPCR_ENABLE(%a6),%d1
  10536	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  10537	bne.b		fsglmul_unfl_ena	# yes
  10538
  10539fsglmul_unfl_dis:
  10540	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  10541
  10542	lea		FP_SCR0(%a6),%a0	# pass: result addr
  10543	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  10544	bsr.l		unf_res4		# calculate default result
  10545	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
  10546	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  10547	rts
  10548
  10549#
  10550# UNFL is enabled.
  10551#
  10552fsglmul_unfl_ena:
  10553	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
  10554
  10555	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  10556	fmov.l		&0x0,%fpsr		# clear FPSR
  10557
  10558	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
  10559
  10560	fmov.l		&0x0,%fpcr		# clear FPCR
  10561
  10562	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
  10563	mov.l		%d2,-(%sp)		# save d2
  10564	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  10565	mov.l		%d1,%d2			# make a copy
  10566	andi.l		&0x7fff,%d1		# strip sign
  10567	andi.w		&0x8000,%d2		# keep old sign
  10568	sub.l		%d0,%d1			# add scale factor
  10569	addi.l		&0x6000,%d1		# add bias
  10570	andi.w		&0x7fff,%d1
  10571	or.w		%d2,%d1			# concat old sign,new exp
  10572	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  10573	mov.l		(%sp)+,%d2		# restore d2
  10574	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  10575	bra.w		fsglmul_unfl_dis
  10576
  10577fsglmul_may_unfl:
  10578	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  10579
  10580	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  10581	fmov.l		&0x0,%fpsr		# clear FPSR
  10582
  10583	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
  10584
  10585	fmov.l		%fpsr,%d1		# save status
  10586	fmov.l		&0x0,%fpcr		# clear FPCR
  10587
  10588	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  10589
  10590	fabs.x		%fp0,%fp1		# make a copy of result
  10591	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
  10592	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
  10593	fblt.w		fsglmul_unfl		# yes; underflow occurred
  10594
  10595#
  10596# we still don't know if underflow occurred. result is ~ equal to 2. but,
  10597# we don't know if the result was an underflow that rounded up to a 2 or
  10598# a normalized number that rounded down to a 2. so, redo the entire operation
  10599# using RZ as the rounding mode to see what the pre-rounded result is.
  10600# this case should be relatively rare.
  10601#
  10602	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
  10603
  10604	mov.l		L_SCR3(%a6),%d1
  10605	andi.b		&0xc0,%d1		# keep rnd prec
  10606	ori.b		&rz_mode*0x10,%d1	# insert RZ
  10607
  10608	fmov.l		%d1,%fpcr		# set FPCR
  10609	fmov.l		&0x0,%fpsr		# clear FPSR
  10610
  10611	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
  10612
  10613	fmov.l		&0x0,%fpcr		# clear FPCR
  10614	fabs.x		%fp1			# make absolute value
  10615	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
  10616	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
  10617	bra.w		fsglmul_unfl		# yes, underflow occurred
  10618
  10619##############################################################################
  10620
  10621#
  10622# Single Precision Multiply: inputs are not both normalized; what are they?
  10623#
  10624fsglmul_not_norm:
  10625	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
  10626	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
  10627
  10628	swbeg		&48
  10629tbl_fsglmul_op:
  10630	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
  10631	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
  10632	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
  10633	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
  10634	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
  10635	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
  10636	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  10637	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  10638
  10639	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
  10640	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
  10641	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
  10642	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
  10643	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
  10644	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
  10645	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  10646	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  10647
  10648	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
  10649	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
  10650	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
  10651	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
  10652	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
  10653	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
  10654	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  10655	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  10656
  10657	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
  10658	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
  10659	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
  10660	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
  10661	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
  10662	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
  10663	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  10664	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  10665
  10666	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
  10667	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
  10668	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
  10669	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
  10670	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
  10671	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
  10672	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  10673	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  10674
  10675	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
  10676	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
  10677	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
  10678	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
  10679	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
  10680	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
  10681	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  10682	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  10683
  10684fsglmul_res_operr:
  10685	bra.l		res_operr
  10686fsglmul_res_snan:
  10687	bra.l		res_snan
  10688fsglmul_res_qnan:
  10689	bra.l		res_qnan
  10690fsglmul_zero:
  10691	bra.l		fmul_zero
  10692fsglmul_inf_src:
  10693	bra.l		fmul_inf_src
  10694fsglmul_inf_dst:
  10695	bra.l		fmul_inf_dst
  10696
  10697#########################################################################
  10698# XDEF ****************************************************************	#
  10699#	fsgldiv(): emulates the fsgldiv instruction			#
  10700#									#
  10701# XREF ****************************************************************	#
  10702#	scale_to_zero_src() - scale src exponent to zero		#
  10703#	scale_to_zero_dst() - scale dst exponent to zero		#
  10704#	unf_res4() - return default underflow result for sglop		#
  10705#	ovf_res() - return default overflow result			#
  10706#	res_qnan() - return QNAN result					#
  10707#	res_snan() - return SNAN result					#
  10708#									#
  10709# INPUT ***************************************************************	#
  10710#	a0 = pointer to extended precision source operand		#
  10711#	a1 = pointer to extended precision destination operand		#
  10712#	d0  rnd prec,mode						#
  10713#									#
  10714# OUTPUT **************************************************************	#
  10715#	fp0 = result							#
  10716#	fp1 = EXOP (if exception occurred)				#
  10717#									#
  10718# ALGORITHM ***********************************************************	#
  10719#	Handle NANs, infinities, and zeroes as special cases. Divide	#
  10720# norms/denorms into ext/sgl/dbl precision.				#
  10721#	For norms/denorms, scale the exponents such that a divide	#
  10722# instruction won't cause an exception. Use the regular fsgldiv to	#
  10723# compute a result. Check if the regular operands would have taken	#
  10724# an exception. If so, return the default overflow/underflow result	#
  10725# and return the EXOP if exceptions are enabled. Else, scale the	#
  10726# result operand to the proper exponent.				#
  10727#									#
  10728#########################################################################
  10729
  10730	global		fsgldiv
  10731fsgldiv:
  10732	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  10733
  10734	clr.w		%d1
  10735	mov.b		DTAG(%a6),%d1
  10736	lsl.b		&0x3,%d1
  10737	or.b		STAG(%a6),%d1		# combine src tags
  10738
  10739	bne.w		fsgldiv_not_norm	# optimize on non-norm input
  10740
  10741#
  10742# DIVIDE: NORMs and DENORMs ONLY!
  10743#
  10744fsgldiv_norm:
  10745	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
  10746	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
  10747	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
  10748
  10749	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  10750	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  10751	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  10752
  10753	bsr.l		scale_to_zero_src	# calculate scale factor 1
  10754	mov.l		%d0,-(%sp)		# save scale factor 1
  10755
  10756	bsr.l		scale_to_zero_dst	# calculate scale factor 2
  10757
  10758	neg.l		(%sp)			# S.F. = scale1 - scale2
  10759	add.l		%d0,(%sp)
  10760
  10761	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
  10762	lsr.b		&0x6,%d1
  10763	mov.l		(%sp)+,%d0
  10764	cmpi.l		%d0,&0x3fff-0x7ffe
  10765	ble.w		fsgldiv_may_ovfl
  10766
  10767	cmpi.l		%d0,&0x3fff-0x0000	# will result underflow?
  10768	beq.w		fsgldiv_may_unfl	# maybe
  10769	bgt.w		fsgldiv_unfl		# yes; go handle underflow
  10770
  10771fsgldiv_normal:
  10772	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  10773
  10774	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
  10775	fmov.l		&0x0,%fpsr		# clear FPSR
  10776
  10777	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
  10778
  10779	fmov.l		%fpsr,%d1		# save FPSR
  10780	fmov.l		&0x0,%fpcr		# clear FPCR
  10781
  10782	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  10783
  10784fsgldiv_normal_exit:
  10785	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
  10786	mov.l		%d2,-(%sp)		# save d2
  10787	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
  10788	mov.l		%d1,%d2			# make a copy
  10789	andi.l		&0x7fff,%d1		# strip sign
  10790	andi.w		&0x8000,%d2		# keep old sign
  10791	sub.l		%d0,%d1			# add scale factor
  10792	or.w		%d2,%d1			# concat old sign,new exp
  10793	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  10794	mov.l		(%sp)+,%d2		# restore d2
  10795	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
  10796	rts
  10797
  10798fsgldiv_may_ovfl:
  10799	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  10800
  10801	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  10802	fmov.l		&0x0,%fpsr		# set FPSR
  10803
  10804	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
  10805
  10806	fmov.l		%fpsr,%d1
  10807	fmov.l		&0x0,%fpcr
  10808
  10809	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
  10810
  10811	fmovm.x		&0x01,-(%sp)		# save result to stack
  10812	mov.w		(%sp),%d1		# fetch new exponent
  10813	add.l		&0xc,%sp		# clear result
  10814	andi.l		&0x7fff,%d1		# strip sign
  10815	sub.l		%d0,%d1			# add scale factor
  10816	cmp.l		%d1,&0x7fff		# did divide overflow?
  10817	blt.b		fsgldiv_normal_exit
  10818
  10819fsgldiv_ovfl_tst:
  10820	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
  10821
  10822	mov.b		FPCR_ENABLE(%a6),%d1
  10823	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  10824	bne.b		fsgldiv_ovfl_ena	# yes
  10825
  10826fsgldiv_ovfl_dis:
  10827	btst		&neg_bit,FPSR_CC(%a6)	# is result negative
  10828	sne		%d1			# set sign param accordingly
  10829	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
  10830	andi.b		&0x30,%d0		# kill precision
  10831	bsr.l		ovf_res			# calculate default result
  10832	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
  10833	fmovm.x		(%a0),&0x80		# return default result in fp0
  10834	rts
  10835
  10836fsgldiv_ovfl_ena:
  10837	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
  10838
  10839	mov.l		%d2,-(%sp)		# save d2
  10840	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  10841	mov.l		%d1,%d2			# make a copy
  10842	andi.l		&0x7fff,%d1		# strip sign
  10843	andi.w		&0x8000,%d2		# keep old sign
  10844	sub.l		%d0,%d1			# add scale factor
  10845	subi.l		&0x6000,%d1		# subtract new bias
  10846	andi.w		&0x7fff,%d1		# clear ms bit
  10847	or.w		%d2,%d1			# concat old sign,new exp
  10848	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  10849	mov.l		(%sp)+,%d2		# restore d2
  10850	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  10851	bra.b		fsgldiv_ovfl_dis
  10852
  10853fsgldiv_unfl:
  10854	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  10855
  10856	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  10857
  10858	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
  10859	fmov.l		&0x0,%fpsr		# clear FPSR
  10860
  10861	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
  10862
  10863	fmov.l		%fpsr,%d1		# save status
  10864	fmov.l		&0x0,%fpcr		# clear FPCR
  10865
  10866	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  10867
  10868	mov.b		FPCR_ENABLE(%a6),%d1
  10869	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  10870	bne.b		fsgldiv_unfl_ena	# yes
  10871
  10872fsgldiv_unfl_dis:
  10873	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  10874
  10875	lea		FP_SCR0(%a6),%a0	# pass: result addr
  10876	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  10877	bsr.l		unf_res4		# calculate default result
  10878	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
  10879	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  10880	rts
  10881
  10882#
  10883# UNFL is enabled.
  10884#
  10885fsgldiv_unfl_ena:
  10886	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
  10887
  10888	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  10889	fmov.l		&0x0,%fpsr		# clear FPSR
  10890
  10891	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
  10892
  10893	fmov.l		&0x0,%fpcr		# clear FPCR
  10894
  10895	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
  10896	mov.l		%d2,-(%sp)		# save d2
  10897	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  10898	mov.l		%d1,%d2			# make a copy
  10899	andi.l		&0x7fff,%d1		# strip sign
  10900	andi.w		&0x8000,%d2		# keep old sign
  10901	sub.l		%d0,%d1			# add scale factor
  10902	addi.l		&0x6000,%d1		# add bias
  10903	andi.w		&0x7fff,%d1		# clear top bit
  10904	or.w		%d2,%d1			# concat old sign, new exp
  10905	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  10906	mov.l		(%sp)+,%d2		# restore d2
  10907	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  10908	bra.b		fsgldiv_unfl_dis
  10909
  10910#
  10911# the divide operation MAY underflow:
  10912#
  10913fsgldiv_may_unfl:
  10914	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  10915
  10916	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  10917	fmov.l		&0x0,%fpsr		# clear FPSR
  10918
  10919	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
  10920
  10921	fmov.l		%fpsr,%d1		# save status
  10922	fmov.l		&0x0,%fpcr		# clear FPCR
  10923
  10924	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  10925
  10926	fabs.x		%fp0,%fp1		# make a copy of result
  10927	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
  10928	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
  10929	fblt.w		fsgldiv_unfl		# yes; underflow occurred
  10930
  10931#
  10932# we still don't know if underflow occurred. result is ~ equal to 1. but,
  10933# we don't know if the result was an underflow that rounded up to a 1
  10934# or a normalized number that rounded down to a 1. so, redo the entire
  10935# operation using RZ as the rounding mode to see what the pre-rounded
  10936# result is. this case should be relatively rare.
  10937#
  10938	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
  10939
  10940	clr.l		%d1			# clear scratch register
  10941	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
  10942
  10943	fmov.l		%d1,%fpcr		# set FPCR
  10944	fmov.l		&0x0,%fpsr		# clear FPSR
  10945
  10946	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
  10947
  10948	fmov.l		&0x0,%fpcr		# clear FPCR
  10949	fabs.x		%fp1			# make absolute value
  10950	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
  10951	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
  10952	bra.w		fsgldiv_unfl		# yes; underflow occurred
  10953
  10954############################################################################
  10955
  10956#
  10957# Divide: inputs are not both normalized; what are they?
  10958#
  10959fsgldiv_not_norm:
  10960	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
  10961	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
  10962
  10963	swbeg		&48
  10964tbl_fsgldiv_op:
  10965	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
  10966	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
  10967	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
  10968	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
  10969	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
  10970	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
  10971	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  10972	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  10973
  10974	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
  10975	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
  10976	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
  10977	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
  10978	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
  10979	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
  10980	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  10981	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  10982
  10983	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
  10984	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
  10985	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
  10986	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
  10987	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
  10988	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
  10989	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  10990	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  10991
  10992	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
  10993	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
  10994	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
  10995	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
  10996	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
  10997	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
  10998	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  10999	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  11000
  11001	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
  11002	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
  11003	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
  11004	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
  11005	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
  11006	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
  11007	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  11008	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  11009
  11010	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
  11011	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
  11012	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
  11013	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
  11014	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
  11015	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
  11016	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  11017	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  11018
  11019fsgldiv_res_qnan:
  11020	bra.l		res_qnan
  11021fsgldiv_res_snan:
  11022	bra.l		res_snan
  11023fsgldiv_res_operr:
  11024	bra.l		res_operr
  11025fsgldiv_inf_load:
  11026	bra.l		fdiv_inf_load
  11027fsgldiv_zero_load:
  11028	bra.l		fdiv_zero_load
  11029fsgldiv_inf_dst:
  11030	bra.l		fdiv_inf_dst
  11031
  11032#########################################################################
  11033# XDEF ****************************************************************	#
  11034#	fadd(): emulates the fadd instruction				#
  11035#	fsadd(): emulates the fadd instruction				#
  11036#	fdadd(): emulates the fdadd instruction				#
  11037#									#
  11038# XREF ****************************************************************	#
  11039#	addsub_scaler2() - scale the operands so they won't take exc	#
  11040#	ovf_res() - return default overflow result			#
  11041#	unf_res() - return default underflow result			#
  11042#	res_qnan() - set QNAN result					#
  11043#	res_snan() - set SNAN result					#
  11044#	res_operr() - set OPERR result					#
  11045#	scale_to_zero_src() - set src operand exponent equal to zero	#
  11046#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
  11047#									#
  11048# INPUT ***************************************************************	#
  11049#	a0 = pointer to extended precision source operand		#
  11050#	a1 = pointer to extended precision destination operand		#
  11051#									#
  11052# OUTPUT **************************************************************	#
  11053#	fp0 = result							#
  11054#	fp1 = EXOP (if exception occurred)				#
  11055#									#
  11056# ALGORITHM ***********************************************************	#
  11057#	Handle NANs, infinities, and zeroes as special cases. Divide	#
  11058# norms into extended, single, and double precision.			#
  11059#	Do addition after scaling exponents such that exception won't	#
  11060# occur. Then, check result exponent to see if exception would have	#
  11061# occurred. If so, return default result and maybe EXOP. Else, insert	#
  11062# the correct result exponent and return. Set FPSR bits as appropriate.	#
  11063#									#
  11064#########################################################################
  11065
  11066	global		fsadd
  11067fsadd:
  11068	andi.b		&0x30,%d0		# clear rnd prec
  11069	ori.b		&s_mode*0x10,%d0	# insert sgl prec
  11070	bra.b		fadd
  11071
  11072	global		fdadd
  11073fdadd:
  11074	andi.b		&0x30,%d0		# clear rnd prec
  11075	ori.b		&d_mode*0x10,%d0	# insert dbl prec
  11076
  11077	global		fadd
  11078fadd:
  11079	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  11080
  11081	clr.w		%d1
  11082	mov.b		DTAG(%a6),%d1
  11083	lsl.b		&0x3,%d1
  11084	or.b		STAG(%a6),%d1		# combine src tags
  11085
  11086	bne.w		fadd_not_norm		# optimize on non-norm input
  11087
  11088#
  11089# ADD: norms and denorms
  11090#
  11091fadd_norm:
  11092	bsr.l		addsub_scaler2		# scale exponents
  11093
  11094fadd_zero_entry:
  11095	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  11096
  11097	fmov.l		&0x0,%fpsr		# clear FPSR
  11098	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  11099
  11100	fadd.x		FP_SCR0(%a6),%fp0	# execute add
  11101
  11102	fmov.l		&0x0,%fpcr		# clear FPCR
  11103	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
  11104
  11105	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
  11106
  11107	fbeq.w		fadd_zero_exit		# if result is zero, end now
  11108
  11109	mov.l		%d2,-(%sp)		# save d2
  11110
  11111	fmovm.x		&0x01,-(%sp)		# save result to stack
  11112
  11113	mov.w		2+L_SCR3(%a6),%d1
  11114	lsr.b		&0x6,%d1
  11115
  11116	mov.w		(%sp),%d2		# fetch new sign, exp
  11117	andi.l		&0x7fff,%d2		# strip sign
  11118	sub.l		%d0,%d2			# add scale factor
  11119
  11120	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
  11121	bge.b		fadd_ovfl		# yes
  11122
  11123	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
  11124	blt.w		fadd_unfl		# yes
  11125	beq.w		fadd_may_unfl		# maybe; go find out
  11126
  11127fadd_normal:
  11128	mov.w		(%sp),%d1
  11129	andi.w		&0x8000,%d1		# keep sign
  11130	or.w		%d2,%d1			# concat sign,new exp
  11131	mov.w		%d1,(%sp)		# insert new exponent
  11132
  11133	fmovm.x		(%sp)+,&0x80		# return result in fp0
  11134
  11135	mov.l		(%sp)+,%d2		# restore d2
  11136	rts
  11137
  11138fadd_zero_exit:
  11139#	fmov.s		&0x00000000,%fp0	# return zero in fp0
  11140	rts
  11141
  11142tbl_fadd_ovfl:
  11143	long		0x7fff			# ext ovfl
  11144	long		0x407f			# sgl ovfl
  11145	long		0x43ff			# dbl ovfl
  11146
  11147tbl_fadd_unfl:
  11148	long	        0x0000			# ext unfl
  11149	long		0x3f81			# sgl unfl
  11150	long		0x3c01			# dbl unfl
  11151
  11152fadd_ovfl:
  11153	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  11154
  11155	mov.b		FPCR_ENABLE(%a6),%d1
  11156	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  11157	bne.b		fadd_ovfl_ena		# yes
  11158
  11159	add.l		&0xc,%sp
  11160fadd_ovfl_dis:
  11161	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
  11162	sne		%d1			# set sign param accordingly
  11163	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
  11164	bsr.l		ovf_res			# calculate default result
  11165	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
  11166	fmovm.x		(%a0),&0x80		# return default result in fp0
  11167	mov.l		(%sp)+,%d2		# restore d2
  11168	rts
  11169
  11170fadd_ovfl_ena:
  11171	mov.b		L_SCR3(%a6),%d1
  11172	andi.b		&0xc0,%d1		# is precision extended?
  11173	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
  11174
  11175fadd_ovfl_ena_cont:
  11176	mov.w		(%sp),%d1
  11177	andi.w		&0x8000,%d1		# keep sign
  11178	subi.l		&0x6000,%d2		# add extra bias
  11179	andi.w		&0x7fff,%d2
  11180	or.w		%d2,%d1			# concat sign,new exp
  11181	mov.w		%d1,(%sp)		# insert new exponent
  11182
  11183	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
  11184	bra.b		fadd_ovfl_dis
  11185
  11186fadd_ovfl_ena_sd:
  11187	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  11188
  11189	mov.l		L_SCR3(%a6),%d1
  11190	andi.b		&0x30,%d1		# keep rnd mode
  11191	fmov.l		%d1,%fpcr		# set FPCR
  11192
  11193	fadd.x		FP_SCR0(%a6),%fp0	# execute add
  11194
  11195	fmov.l		&0x0,%fpcr		# clear FPCR
  11196
  11197	add.l		&0xc,%sp
  11198	fmovm.x		&0x01,-(%sp)
  11199	bra.b		fadd_ovfl_ena_cont
  11200
  11201fadd_unfl:
  11202	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  11203
  11204	add.l		&0xc,%sp
  11205
  11206	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  11207
  11208	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
  11209	fmov.l		&0x0,%fpsr		# clear FPSR
  11210
  11211	fadd.x		FP_SCR0(%a6),%fp0	# execute add
  11212
  11213	fmov.l		&0x0,%fpcr		# clear FPCR
  11214	fmov.l		%fpsr,%d1		# save status
  11215
  11216	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
  11217
  11218	mov.b		FPCR_ENABLE(%a6),%d1
  11219	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  11220	bne.b		fadd_unfl_ena		# yes
  11221
  11222fadd_unfl_dis:
  11223	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  11224
  11225	lea		FP_SCR0(%a6),%a0	# pass: result addr
  11226	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  11227	bsr.l		unf_res			# calculate default result
  11228	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
  11229	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  11230	mov.l		(%sp)+,%d2		# restore d2
  11231	rts
  11232
  11233fadd_unfl_ena:
  11234	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
  11235
  11236	mov.l		L_SCR3(%a6),%d1
  11237	andi.b		&0xc0,%d1		# is precision extended?
  11238	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
  11239
  11240	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  11241
  11242fadd_unfl_ena_cont:
  11243	fmov.l		&0x0,%fpsr		# clear FPSR
  11244
  11245	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
  11246
  11247	fmov.l		&0x0,%fpcr		# clear FPCR
  11248
  11249	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
  11250	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  11251	mov.l		%d1,%d2			# make a copy
  11252	andi.l		&0x7fff,%d1		# strip sign
  11253	andi.w		&0x8000,%d2		# keep old sign
  11254	sub.l		%d0,%d1			# add scale factor
  11255	addi.l		&0x6000,%d1		# add new bias
  11256	andi.w		&0x7fff,%d1		# clear top bit
  11257	or.w		%d2,%d1			# concat sign,new exp
  11258	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  11259	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  11260	bra.w		fadd_unfl_dis
  11261
  11262fadd_unfl_ena_sd:
  11263	mov.l		L_SCR3(%a6),%d1
  11264	andi.b		&0x30,%d1		# use only rnd mode
  11265	fmov.l		%d1,%fpcr		# set FPCR
  11266
  11267	bra.b		fadd_unfl_ena_cont
  11268
  11269#
  11270# result is equal to the smallest normalized number in the selected precision
  11271# if the precision is extended, this result could not have come from an
  11272# underflow that rounded up.
  11273#
  11274fadd_may_unfl:
  11275	mov.l		L_SCR3(%a6),%d1
  11276	andi.b		&0xc0,%d1
  11277	beq.w		fadd_normal		# yes; no underflow occurred
  11278
  11279	mov.l		0x4(%sp),%d1		# extract hi(man)
  11280	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
  11281	bne.w		fadd_normal		# no; no underflow occurred
  11282
  11283	tst.l		0x8(%sp)		# is lo(man) = 0x0?
  11284	bne.w		fadd_normal		# no; no underflow occurred
  11285
  11286	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
  11287	beq.w		fadd_normal		# no; no underflow occurred
  11288
  11289#
  11290# ok, so now the result has a exponent equal to the smallest normalized
  11291# exponent for the selected precision. also, the mantissa is equal to
  11292# 0x8000000000000000 and this mantissa is the result of rounding non-zero
  11293# g,r,s.
  11294# now, we must determine whether the pre-rounded result was an underflow
  11295# rounded "up" or a normalized number rounded "down".
  11296# so, we do this be re-executing the add using RZ as the rounding mode and
  11297# seeing if the new result is smaller or equal to the current result.
  11298#
  11299	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
  11300
  11301	mov.l		L_SCR3(%a6),%d1
  11302	andi.b		&0xc0,%d1		# keep rnd prec
  11303	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
  11304	fmov.l		%d1,%fpcr		# set FPCR
  11305	fmov.l		&0x0,%fpsr		# clear FPSR
  11306
  11307	fadd.x		FP_SCR0(%a6),%fp1	# execute add
  11308
  11309	fmov.l		&0x0,%fpcr		# clear FPCR
  11310
  11311	fabs.x		%fp0			# compare absolute values
  11312	fabs.x		%fp1
  11313	fcmp.x		%fp0,%fp1		# is first result > second?
  11314
  11315	fbgt.w		fadd_unfl		# yes; it's an underflow
  11316	bra.w		fadd_normal		# no; it's not an underflow
  11317
  11318##########################################################################
  11319
  11320#
  11321# Add: inputs are not both normalized; what are they?
  11322#
  11323fadd_not_norm:
  11324	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
  11325	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
  11326
  11327	swbeg		&48
  11328tbl_fadd_op:
  11329	short		fadd_norm	- tbl_fadd_op # NORM + NORM
  11330	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
  11331	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
  11332	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
  11333	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
  11334	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
  11335	short		tbl_fadd_op	- tbl_fadd_op #
  11336	short		tbl_fadd_op	- tbl_fadd_op #
  11337
  11338	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
  11339	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
  11340	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
  11341	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
  11342	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
  11343	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
  11344	short		tbl_fadd_op	- tbl_fadd_op #
  11345	short		tbl_fadd_op	- tbl_fadd_op #
  11346
  11347	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
  11348	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
  11349	short		fadd_inf_2	- tbl_fadd_op # INF + INF
  11350	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
  11351	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
  11352	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
  11353	short		tbl_fadd_op	- tbl_fadd_op #
  11354	short		tbl_fadd_op	- tbl_fadd_op #
  11355
  11356	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
  11357	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
  11358	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
  11359	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
  11360	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
  11361	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
  11362	short		tbl_fadd_op	- tbl_fadd_op #
  11363	short		tbl_fadd_op	- tbl_fadd_op #
  11364
  11365	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
  11366	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
  11367	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
  11368	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
  11369	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
  11370	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
  11371	short		tbl_fadd_op	- tbl_fadd_op #
  11372	short		tbl_fadd_op	- tbl_fadd_op #
  11373
  11374	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
  11375	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
  11376	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
  11377	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
  11378	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
  11379	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
  11380	short		tbl_fadd_op	- tbl_fadd_op #
  11381	short		tbl_fadd_op	- tbl_fadd_op #
  11382
  11383fadd_res_qnan:
  11384	bra.l		res_qnan
  11385fadd_res_snan:
  11386	bra.l		res_snan
  11387
  11388#
  11389# both operands are ZEROes
  11390#
  11391fadd_zero_2:
  11392	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
  11393	mov.b		DST_EX(%a1),%d1
  11394	eor.b		%d0,%d1
  11395	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
  11396
  11397# the signs are the same. so determine whether they are positive or negative
  11398# and return the appropriately signed zero.
  11399	tst.b		%d0			# are ZEROes positive or negative?
  11400	bmi.b		fadd_zero_rm		# negative
  11401	fmov.s		&0x00000000,%fp0	# return +ZERO
  11402	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
  11403	rts
  11404
  11405#
  11406# the ZEROes have opposite signs:
  11407# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
  11408# - -ZERO is returned in the case of RM.
  11409#
  11410fadd_zero_2_chk_rm:
  11411	mov.b		3+L_SCR3(%a6),%d1
  11412	andi.b		&0x30,%d1		# extract rnd mode
  11413	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
  11414	beq.b		fadd_zero_rm		# yes
  11415	fmov.s		&0x00000000,%fp0	# return +ZERO
  11416	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
  11417	rts
  11418
  11419fadd_zero_rm:
  11420	fmov.s		&0x80000000,%fp0	# return -ZERO
  11421	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
  11422	rts
  11423
  11424#
  11425# one operand is a ZERO and the other is a DENORM or NORM. scale
  11426# the DENORM or NORM and jump to the regular fadd routine.
  11427#
  11428fadd_zero_dst:
  11429	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  11430	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  11431	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  11432	bsr.l		scale_to_zero_src	# scale the operand
  11433	clr.w		FP_SCR1_EX(%a6)
  11434	clr.l		FP_SCR1_HI(%a6)
  11435	clr.l		FP_SCR1_LO(%a6)
  11436	bra.w		fadd_zero_entry		# go execute fadd
  11437
  11438fadd_zero_src:
  11439	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
  11440	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
  11441	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
  11442	bsr.l		scale_to_zero_dst	# scale the operand
  11443	clr.w		FP_SCR0_EX(%a6)
  11444	clr.l		FP_SCR0_HI(%a6)
  11445	clr.l		FP_SCR0_LO(%a6)
  11446	bra.w		fadd_zero_entry		# go execute fadd
  11447
  11448#
  11449# both operands are INFs. an OPERR will result if the INFs have
  11450# different signs. else, an INF of the same sign is returned
  11451#
  11452fadd_inf_2:
  11453	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
  11454	mov.b		DST_EX(%a1),%d1
  11455	eor.b		%d1,%d0
  11456	bmi.l		res_operr		# weed out (-INF)+(+INF)
  11457
  11458# ok, so it's not an OPERR. but, we do have to remember to return the
  11459# src INF since that's where the 881/882 gets the j-bit from...
  11460
  11461#
  11462# operands are INF and one of {ZERO, INF, DENORM, NORM}
  11463#
  11464fadd_inf_src:
  11465	fmovm.x		SRC(%a0),&0x80		# return src INF
  11466	tst.b		SRC_EX(%a0)		# is INF positive?
  11467	bpl.b		fadd_inf_done		# yes; we're done
  11468	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
  11469	rts
  11470
  11471#
  11472# operands are INF and one of {ZERO, INF, DENORM, NORM}
  11473#
  11474fadd_inf_dst:
  11475	fmovm.x		DST(%a1),&0x80		# return dst INF
  11476	tst.b		DST_EX(%a1)		# is INF positive?
  11477	bpl.b		fadd_inf_done		# yes; we're done
  11478	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
  11479	rts
  11480
  11481fadd_inf_done:
  11482	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
  11483	rts
  11484
  11485#########################################################################
  11486# XDEF ****************************************************************	#
  11487#	fsub(): emulates the fsub instruction				#
  11488#	fssub(): emulates the fssub instruction				#
  11489#	fdsub(): emulates the fdsub instruction				#
  11490#									#
  11491# XREF ****************************************************************	#
  11492#	addsub_scaler2() - scale the operands so they won't take exc	#
  11493#	ovf_res() - return default overflow result			#
  11494#	unf_res() - return default underflow result			#
  11495#	res_qnan() - set QNAN result					#
  11496#	res_snan() - set SNAN result					#
  11497#	res_operr() - set OPERR result					#
  11498#	scale_to_zero_src() - set src operand exponent equal to zero	#
  11499#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
  11500#									#
  11501# INPUT ***************************************************************	#
  11502#	a0 = pointer to extended precision source operand		#
  11503#	a1 = pointer to extended precision destination operand		#
  11504#									#
  11505# OUTPUT **************************************************************	#
  11506#	fp0 = result							#
  11507#	fp1 = EXOP (if exception occurred)				#
  11508#									#
  11509# ALGORITHM ***********************************************************	#
  11510#	Handle NANs, infinities, and zeroes as special cases. Divide	#
  11511# norms into extended, single, and double precision.			#
  11512#	Do subtraction after scaling exponents such that exception won't#
  11513# occur. Then, check result exponent to see if exception would have	#
  11514# occurred. If so, return default result and maybe EXOP. Else, insert	#
  11515# the correct result exponent and return. Set FPSR bits as appropriate.	#
  11516#									#
  11517#########################################################################
  11518
  11519	global		fssub
  11520fssub:
  11521	andi.b		&0x30,%d0		# clear rnd prec
  11522	ori.b		&s_mode*0x10,%d0	# insert sgl prec
  11523	bra.b		fsub
  11524
  11525	global		fdsub
  11526fdsub:
  11527	andi.b		&0x30,%d0		# clear rnd prec
  11528	ori.b		&d_mode*0x10,%d0	# insert dbl prec
  11529
  11530	global		fsub
  11531fsub:
  11532	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  11533
  11534	clr.w		%d1
  11535	mov.b		DTAG(%a6),%d1
  11536	lsl.b		&0x3,%d1
  11537	or.b		STAG(%a6),%d1		# combine src tags
  11538
  11539	bne.w		fsub_not_norm		# optimize on non-norm input
  11540
  11541#
  11542# SUB: norms and denorms
  11543#
  11544fsub_norm:
  11545	bsr.l		addsub_scaler2		# scale exponents
  11546
  11547fsub_zero_entry:
  11548	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  11549
  11550	fmov.l		&0x0,%fpsr		# clear FPSR
  11551	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  11552
  11553	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
  11554
  11555	fmov.l		&0x0,%fpcr		# clear FPCR
  11556	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
  11557
  11558	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
  11559
  11560	fbeq.w		fsub_zero_exit		# if result zero, end now
  11561
  11562	mov.l		%d2,-(%sp)		# save d2
  11563
  11564	fmovm.x		&0x01,-(%sp)		# save result to stack
  11565
  11566	mov.w		2+L_SCR3(%a6),%d1
  11567	lsr.b		&0x6,%d1
  11568
  11569	mov.w		(%sp),%d2		# fetch new exponent
  11570	andi.l		&0x7fff,%d2		# strip sign
  11571	sub.l		%d0,%d2			# add scale factor
  11572
  11573	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
  11574	bge.b		fsub_ovfl		# yes
  11575
  11576	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
  11577	blt.w		fsub_unfl		# yes
  11578	beq.w		fsub_may_unfl		# maybe; go find out
  11579
  11580fsub_normal:
  11581	mov.w		(%sp),%d1
  11582	andi.w		&0x8000,%d1		# keep sign
  11583	or.w		%d2,%d1			# insert new exponent
  11584	mov.w		%d1,(%sp)		# insert new exponent
  11585
  11586	fmovm.x		(%sp)+,&0x80		# return result in fp0
  11587
  11588	mov.l		(%sp)+,%d2		# restore d2
  11589	rts
  11590
  11591fsub_zero_exit:
  11592#	fmov.s		&0x00000000,%fp0	# return zero in fp0
  11593	rts
  11594
  11595tbl_fsub_ovfl:
  11596	long		0x7fff			# ext ovfl
  11597	long		0x407f			# sgl ovfl
  11598	long		0x43ff			# dbl ovfl
  11599
  11600tbl_fsub_unfl:
  11601	long	        0x0000			# ext unfl
  11602	long		0x3f81			# sgl unfl
  11603	long		0x3c01			# dbl unfl
  11604
  11605fsub_ovfl:
  11606	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  11607
  11608	mov.b		FPCR_ENABLE(%a6),%d1
  11609	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  11610	bne.b		fsub_ovfl_ena		# yes
  11611
  11612	add.l		&0xc,%sp
  11613fsub_ovfl_dis:
  11614	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
  11615	sne		%d1			# set sign param accordingly
  11616	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
  11617	bsr.l		ovf_res			# calculate default result
  11618	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
  11619	fmovm.x		(%a0),&0x80		# return default result in fp0
  11620	mov.l		(%sp)+,%d2		# restore d2
  11621	rts
  11622
  11623fsub_ovfl_ena:
  11624	mov.b		L_SCR3(%a6),%d1
  11625	andi.b		&0xc0,%d1		# is precision extended?
  11626	bne.b		fsub_ovfl_ena_sd	# no
  11627
  11628fsub_ovfl_ena_cont:
  11629	mov.w		(%sp),%d1		# fetch {sgn,exp}
  11630	andi.w		&0x8000,%d1		# keep sign
  11631	subi.l		&0x6000,%d2		# subtract new bias
  11632	andi.w		&0x7fff,%d2		# clear top bit
  11633	or.w		%d2,%d1			# concat sign,exp
  11634	mov.w		%d1,(%sp)		# insert new exponent
  11635
  11636	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
  11637	bra.b		fsub_ovfl_dis
  11638
  11639fsub_ovfl_ena_sd:
  11640	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  11641
  11642	mov.l		L_SCR3(%a6),%d1
  11643	andi.b		&0x30,%d1		# clear rnd prec
  11644	fmov.l		%d1,%fpcr		# set FPCR
  11645
  11646	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
  11647
  11648	fmov.l		&0x0,%fpcr		# clear FPCR
  11649
  11650	add.l		&0xc,%sp
  11651	fmovm.x		&0x01,-(%sp)
  11652	bra.b		fsub_ovfl_ena_cont
  11653
  11654fsub_unfl:
  11655	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  11656
  11657	add.l		&0xc,%sp
  11658
  11659	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  11660
  11661	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
  11662	fmov.l		&0x0,%fpsr		# clear FPSR
  11663
  11664	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
  11665
  11666	fmov.l		&0x0,%fpcr		# clear FPCR
  11667	fmov.l		%fpsr,%d1		# save status
  11668
  11669	or.l		%d1,USER_FPSR(%a6)
  11670
  11671	mov.b		FPCR_ENABLE(%a6),%d1
  11672	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  11673	bne.b		fsub_unfl_ena		# yes
  11674
  11675fsub_unfl_dis:
  11676	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  11677
  11678	lea		FP_SCR0(%a6),%a0	# pass: result addr
  11679	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  11680	bsr.l		unf_res			# calculate default result
  11681	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
  11682	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  11683	mov.l		(%sp)+,%d2		# restore d2
  11684	rts
  11685
  11686fsub_unfl_ena:
  11687	fmovm.x		FP_SCR1(%a6),&0x40
  11688
  11689	mov.l		L_SCR3(%a6),%d1
  11690	andi.b		&0xc0,%d1		# is precision extended?
  11691	bne.b		fsub_unfl_ena_sd	# no
  11692
  11693	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  11694
  11695fsub_unfl_ena_cont:
  11696	fmov.l		&0x0,%fpsr		# clear FPSR
  11697
  11698	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
  11699
  11700	fmov.l		&0x0,%fpcr		# clear FPCR
  11701
  11702	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
  11703	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  11704	mov.l		%d1,%d2			# make a copy
  11705	andi.l		&0x7fff,%d1		# strip sign
  11706	andi.w		&0x8000,%d2		# keep old sign
  11707	sub.l		%d0,%d1			# add scale factor
  11708	addi.l		&0x6000,%d1		# subtract new bias
  11709	andi.w		&0x7fff,%d1		# clear top bit
  11710	or.w		%d2,%d1			# concat sgn,exp
  11711	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  11712	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  11713	bra.w		fsub_unfl_dis
  11714
  11715fsub_unfl_ena_sd:
  11716	mov.l		L_SCR3(%a6),%d1
  11717	andi.b		&0x30,%d1		# clear rnd prec
  11718	fmov.l		%d1,%fpcr		# set FPCR
  11719
  11720	bra.b		fsub_unfl_ena_cont
  11721
  11722#
  11723# result is equal to the smallest normalized number in the selected precision
  11724# if the precision is extended, this result could not have come from an
  11725# underflow that rounded up.
  11726#
  11727fsub_may_unfl:
  11728	mov.l		L_SCR3(%a6),%d1
  11729	andi.b		&0xc0,%d1		# fetch rnd prec
  11730	beq.w		fsub_normal		# yes; no underflow occurred
  11731
  11732	mov.l		0x4(%sp),%d1
  11733	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
  11734	bne.w		fsub_normal		# no; no underflow occurred
  11735
  11736	tst.l		0x8(%sp)		# is lo(man) = 0x0?
  11737	bne.w		fsub_normal		# no; no underflow occurred
  11738
  11739	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
  11740	beq.w		fsub_normal		# no; no underflow occurred
  11741
  11742#
  11743# ok, so now the result has a exponent equal to the smallest normalized
  11744# exponent for the selected precision. also, the mantissa is equal to
  11745# 0x8000000000000000 and this mantissa is the result of rounding non-zero
  11746# g,r,s.
  11747# now, we must determine whether the pre-rounded result was an underflow
  11748# rounded "up" or a normalized number rounded "down".
  11749# so, we do this be re-executing the add using RZ as the rounding mode and
  11750# seeing if the new result is smaller or equal to the current result.
  11751#
  11752	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
  11753
  11754	mov.l		L_SCR3(%a6),%d1
  11755	andi.b		&0xc0,%d1		# keep rnd prec
  11756	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
  11757	fmov.l		%d1,%fpcr		# set FPCR
  11758	fmov.l		&0x0,%fpsr		# clear FPSR
  11759
  11760	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
  11761
  11762	fmov.l		&0x0,%fpcr		# clear FPCR
  11763
  11764	fabs.x		%fp0			# compare absolute values
  11765	fabs.x		%fp1
  11766	fcmp.x		%fp0,%fp1		# is first result > second?
  11767
  11768	fbgt.w		fsub_unfl		# yes; it's an underflow
  11769	bra.w		fsub_normal		# no; it's not an underflow
  11770
  11771##########################################################################
  11772
  11773#
  11774# Sub: inputs are not both normalized; what are they?
  11775#
  11776fsub_not_norm:
  11777	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
  11778	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
  11779
  11780	swbeg		&48
  11781tbl_fsub_op:
  11782	short		fsub_norm	- tbl_fsub_op # NORM - NORM
  11783	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
  11784	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
  11785	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
  11786	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
  11787	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
  11788	short		tbl_fsub_op	- tbl_fsub_op #
  11789	short		tbl_fsub_op	- tbl_fsub_op #
  11790
  11791	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
  11792	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
  11793	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
  11794	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
  11795	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
  11796	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
  11797	short		tbl_fsub_op	- tbl_fsub_op #
  11798	short		tbl_fsub_op	- tbl_fsub_op #
  11799
  11800	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
  11801	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
  11802	short		fsub_inf_2	- tbl_fsub_op # INF - INF
  11803	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
  11804	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
  11805	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
  11806	short		tbl_fsub_op	- tbl_fsub_op #
  11807	short		tbl_fsub_op	- tbl_fsub_op #
  11808
  11809	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
  11810	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
  11811	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
  11812	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
  11813	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
  11814	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
  11815	short		tbl_fsub_op	- tbl_fsub_op #
  11816	short		tbl_fsub_op	- tbl_fsub_op #
  11817
  11818	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
  11819	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
  11820	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
  11821	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
  11822	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
  11823	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
  11824	short		tbl_fsub_op	- tbl_fsub_op #
  11825	short		tbl_fsub_op	- tbl_fsub_op #
  11826
  11827	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
  11828	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
  11829	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
  11830	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
  11831	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
  11832	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
  11833	short		tbl_fsub_op	- tbl_fsub_op #
  11834	short		tbl_fsub_op	- tbl_fsub_op #
  11835
  11836fsub_res_qnan:
  11837	bra.l		res_qnan
  11838fsub_res_snan:
  11839	bra.l		res_snan
  11840
  11841#
  11842# both operands are ZEROes
  11843#
  11844fsub_zero_2:
  11845	mov.b		SRC_EX(%a0),%d0
  11846	mov.b		DST_EX(%a1),%d1
  11847	eor.b		%d1,%d0
  11848	bpl.b		fsub_zero_2_chk_rm
  11849
  11850# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
  11851	tst.b		%d0			# is dst negative?
  11852	bmi.b		fsub_zero_2_rm		# yes
  11853	fmov.s		&0x00000000,%fp0	# no; return +ZERO
  11854	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
  11855	rts
  11856
  11857#
  11858# the ZEROes have the same signs:
  11859# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
  11860# - -ZERO is returned in the case of RM.
  11861#
  11862fsub_zero_2_chk_rm:
  11863	mov.b		3+L_SCR3(%a6),%d1
  11864	andi.b		&0x30,%d1		# extract rnd mode
  11865	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
  11866	beq.b		fsub_zero_2_rm		# yes
  11867	fmov.s		&0x00000000,%fp0	# no; return +ZERO
  11868	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
  11869	rts
  11870
  11871fsub_zero_2_rm:
  11872	fmov.s		&0x80000000,%fp0	# return -ZERO
  11873	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
  11874	rts
  11875
  11876#
  11877# one operand is a ZERO and the other is a DENORM or a NORM.
  11878# scale the DENORM or NORM and jump to the regular fsub routine.
  11879#
  11880fsub_zero_dst:
  11881	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  11882	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  11883	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  11884	bsr.l		scale_to_zero_src	# scale the operand
  11885	clr.w		FP_SCR1_EX(%a6)
  11886	clr.l		FP_SCR1_HI(%a6)
  11887	clr.l		FP_SCR1_LO(%a6)
  11888	bra.w		fsub_zero_entry		# go execute fsub
  11889
  11890fsub_zero_src:
  11891	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
  11892	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
  11893	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
  11894	bsr.l		scale_to_zero_dst	# scale the operand
  11895	clr.w		FP_SCR0_EX(%a6)
  11896	clr.l		FP_SCR0_HI(%a6)
  11897	clr.l		FP_SCR0_LO(%a6)
  11898	bra.w		fsub_zero_entry		# go execute fsub
  11899
  11900#
  11901# both operands are INFs. an OPERR will result if the INFs have the
  11902# same signs. else,
  11903#
  11904fsub_inf_2:
  11905	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
  11906	mov.b		DST_EX(%a1),%d1
  11907	eor.b		%d1,%d0
  11908	bpl.l		res_operr		# weed out (-INF)+(+INF)
  11909
  11910# ok, so it's not an OPERR. but we do have to remember to return
  11911# the src INF since that's where the 881/882 gets the j-bit.
  11912
  11913fsub_inf_src:
  11914	fmovm.x		SRC(%a0),&0x80		# return src INF
  11915	fneg.x		%fp0			# invert sign
  11916	fbge.w		fsub_inf_done		# sign is now positive
  11917	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
  11918	rts
  11919
  11920fsub_inf_dst:
  11921	fmovm.x		DST(%a1),&0x80		# return dst INF
  11922	tst.b		DST_EX(%a1)		# is INF negative?
  11923	bpl.b		fsub_inf_done		# no
  11924	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
  11925	rts
  11926
  11927fsub_inf_done:
  11928	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
  11929	rts
  11930
  11931#########################################################################
  11932# XDEF ****************************************************************	#
  11933#	fsqrt(): emulates the fsqrt instruction				#
  11934#	fssqrt(): emulates the fssqrt instruction			#
  11935#	fdsqrt(): emulates the fdsqrt instruction			#
  11936#									#
  11937# XREF ****************************************************************	#
  11938#	scale_sqrt() - scale the source operand				#
  11939#	unf_res() - return default underflow result			#
  11940#	ovf_res() - return default overflow result			#
  11941#	res_qnan_1op() - return QNAN result				#
  11942#	res_snan_1op() - return SNAN result				#
  11943#									#
  11944# INPUT ***************************************************************	#
  11945#	a0 = pointer to extended precision source operand		#
  11946#	d0  rnd prec,mode						#
  11947#									#
  11948# OUTPUT **************************************************************	#
  11949#	fp0 = result							#
  11950#	fp1 = EXOP (if exception occurred)				#
  11951#									#
  11952# ALGORITHM ***********************************************************	#
  11953#	Handle NANs, infinities, and zeroes as special cases. Divide	#
  11954# norms/denorms into ext/sgl/dbl precision.				#
  11955#	For norms/denorms, scale the exponents such that a sqrt		#
  11956# instruction won't cause an exception. Use the regular fsqrt to	#
  11957# compute a result. Check if the regular operands would have taken	#
  11958# an exception. If so, return the default overflow/underflow result	#
  11959# and return the EXOP if exceptions are enabled. Else, scale the	#
  11960# result operand to the proper exponent.				#
  11961#									#
  11962#########################################################################
  11963
  11964	global		fssqrt
  11965fssqrt:
  11966	andi.b		&0x30,%d0		# clear rnd prec
  11967	ori.b		&s_mode*0x10,%d0	# insert sgl precision
  11968	bra.b		fsqrt
  11969
  11970	global		fdsqrt
  11971fdsqrt:
  11972	andi.b		&0x30,%d0		# clear rnd prec
  11973	ori.b		&d_mode*0x10,%d0	# insert dbl precision
  11974
  11975	global		fsqrt
  11976fsqrt:
  11977	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  11978	clr.w		%d1
  11979	mov.b		STAG(%a6),%d1
  11980	bne.w		fsqrt_not_norm		# optimize on non-norm input
  11981
  11982#
  11983# SQUARE ROOT: norms and denorms ONLY!
  11984#
  11985fsqrt_norm:
  11986	tst.b		SRC_EX(%a0)		# is operand negative?
  11987	bmi.l		res_operr		# yes
  11988
  11989	andi.b		&0xc0,%d0		# is precision extended?
  11990	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
  11991
  11992	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  11993	fmov.l		&0x0,%fpsr		# clear FPSR
  11994
  11995	fsqrt.x		(%a0),%fp0		# execute square root
  11996
  11997	fmov.l		%fpsr,%d1
  11998	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
  11999
  12000	rts
  12001
  12002fsqrt_denorm:
  12003	tst.b		SRC_EX(%a0)		# is operand negative?
  12004	bmi.l		res_operr		# yes
  12005
  12006	andi.b		&0xc0,%d0		# is precision extended?
  12007	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
  12008
  12009	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  12010	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  12011	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  12012
  12013	bsr.l		scale_sqrt		# calculate scale factor
  12014
  12015	bra.w		fsqrt_sd_normal
  12016
  12017#
  12018# operand is either single or double
  12019#
  12020fsqrt_not_ext:
  12021	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
  12022	bne.w		fsqrt_dbl
  12023
  12024#
  12025# operand is to be rounded to single precision
  12026#
  12027fsqrt_sgl:
  12028	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  12029	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  12030	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  12031
  12032	bsr.l		scale_sqrt		# calculate scale factor
  12033
  12034	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
  12035	beq.w		fsqrt_sd_may_unfl
  12036	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
  12037	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
  12038	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
  12039	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
  12040
  12041#
  12042# operand will NOT overflow or underflow when moved in to the fp reg file
  12043#
  12044fsqrt_sd_normal:
  12045	fmov.l		&0x0,%fpsr		# clear FPSR
  12046	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  12047
  12048	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
  12049
  12050	fmov.l		%fpsr,%d1		# save FPSR
  12051	fmov.l		&0x0,%fpcr		# clear FPCR
  12052
  12053	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  12054
  12055fsqrt_sd_normal_exit:
  12056	mov.l		%d2,-(%sp)		# save d2
  12057	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  12058	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
  12059	mov.l		%d1,%d2			# make a copy
  12060	andi.l		&0x7fff,%d1		# strip sign
  12061	sub.l		%d0,%d1			# add scale factor
  12062	andi.w		&0x8000,%d2		# keep old sign
  12063	or.w		%d1,%d2			# concat old sign,new exp
  12064	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
  12065	mov.l		(%sp)+,%d2		# restore d2
  12066	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
  12067	rts
  12068
  12069#
  12070# operand is to be rounded to double precision
  12071#
  12072fsqrt_dbl:
  12073	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  12074	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  12075	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  12076
  12077	bsr.l		scale_sqrt		# calculate scale factor
  12078
  12079	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
  12080	beq.w		fsqrt_sd_may_unfl
  12081	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
  12082	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
  12083	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
  12084	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
  12085	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
  12086
  12087# we're on the line here and the distinguising characteristic is whether
  12088# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
  12089# elsewise fall through to underflow.
  12090fsqrt_sd_may_unfl:
  12091	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
  12092	bne.w		fsqrt_sd_normal		# yes, so no underflow
  12093
  12094#
  12095# operand WILL underflow when moved in to the fp register file
  12096#
  12097fsqrt_sd_unfl:
  12098	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  12099
  12100	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
  12101	fmov.l		&0x0,%fpsr		# clear FPSR
  12102
  12103	fsqrt.x		FP_SCR0(%a6),%fp0	# execute square root
  12104
  12105	fmov.l		%fpsr,%d1		# save status
  12106	fmov.l		&0x0,%fpcr		# clear FPCR
  12107
  12108	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  12109
  12110# if underflow or inexact is enabled, go calculate EXOP first.
  12111	mov.b		FPCR_ENABLE(%a6),%d1
  12112	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  12113	bne.b		fsqrt_sd_unfl_ena	# yes
  12114
  12115fsqrt_sd_unfl_dis:
  12116	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  12117
  12118	lea		FP_SCR0(%a6),%a0	# pass: result addr
  12119	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  12120	bsr.l		unf_res			# calculate default result
  12121	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
  12122	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  12123	rts
  12124
  12125#
  12126# operand will underflow AND underflow is enabled.
  12127# Therefore, we must return the result rounded to extended precision.
  12128#
  12129fsqrt_sd_unfl_ena:
  12130	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
  12131	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
  12132	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
  12133
  12134	mov.l		%d2,-(%sp)		# save d2
  12135	mov.l		%d1,%d2			# make a copy
  12136	andi.l		&0x7fff,%d1		# strip sign
  12137	andi.w		&0x8000,%d2		# keep old sign
  12138	sub.l		%d0,%d1			# subtract scale factor
  12139	addi.l		&0x6000,%d1		# add new bias
  12140	andi.w		&0x7fff,%d1
  12141	or.w		%d2,%d1			# concat new sign,new exp
  12142	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
  12143	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
  12144	mov.l		(%sp)+,%d2		# restore d2
  12145	bra.b		fsqrt_sd_unfl_dis
  12146
  12147#
  12148# operand WILL overflow.
  12149#
  12150fsqrt_sd_ovfl:
  12151	fmov.l		&0x0,%fpsr		# clear FPSR
  12152	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  12153
  12154	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
  12155
  12156	fmov.l		&0x0,%fpcr		# clear FPCR
  12157	fmov.l		%fpsr,%d1		# save FPSR
  12158
  12159	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  12160
  12161fsqrt_sd_ovfl_tst:
  12162	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  12163
  12164	mov.b		FPCR_ENABLE(%a6),%d1
  12165	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  12166	bne.b		fsqrt_sd_ovfl_ena	# yes
  12167
  12168#
  12169# OVFL is not enabled; therefore, we must create the default result by
  12170# calling ovf_res().
  12171#
  12172fsqrt_sd_ovfl_dis:
  12173	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
  12174	sne		%d1			# set sign param accordingly
  12175	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
  12176	bsr.l		ovf_res			# calculate default result
  12177	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
  12178	fmovm.x		(%a0),&0x80		# return default result in fp0
  12179	rts
  12180
  12181#
  12182# OVFL is enabled.
  12183# the INEX2 bit has already been updated by the round to the correct precision.
  12184# now, round to extended(and don't alter the FPSR).
  12185#
  12186fsqrt_sd_ovfl_ena:
  12187	mov.l		%d2,-(%sp)		# save d2
  12188	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  12189	mov.l		%d1,%d2			# make a copy
  12190	andi.l		&0x7fff,%d1		# strip sign
  12191	andi.w		&0x8000,%d2		# keep old sign
  12192	sub.l		%d0,%d1			# add scale factor
  12193	subi.l		&0x6000,%d1		# subtract bias
  12194	andi.w		&0x7fff,%d1
  12195	or.w		%d2,%d1			# concat sign,exp
  12196	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  12197	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  12198	mov.l		(%sp)+,%d2		# restore d2
  12199	bra.b		fsqrt_sd_ovfl_dis
  12200
  12201#
  12202# the move in MAY underflow. so...
  12203#
  12204fsqrt_sd_may_ovfl:
  12205	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
  12206	bne.w		fsqrt_sd_ovfl		# yes, so overflow
  12207
  12208	fmov.l		&0x0,%fpsr		# clear FPSR
  12209	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  12210
  12211	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
  12212
  12213	fmov.l		%fpsr,%d1		# save status
  12214	fmov.l		&0x0,%fpcr		# clear FPCR
  12215
  12216	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  12217
  12218	fmov.x		%fp0,%fp1		# make a copy of result
  12219	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
  12220	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
  12221
  12222# no, it didn't overflow; we have correct result
  12223	bra.w		fsqrt_sd_normal_exit
  12224
  12225##########################################################################
  12226
  12227#
  12228# input is not normalized; what is it?
  12229#
  12230fsqrt_not_norm:
  12231	cmpi.b		%d1,&DENORM		# weed out DENORM
  12232	beq.w		fsqrt_denorm
  12233	cmpi.b		%d1,&ZERO		# weed out ZERO
  12234	beq.b		fsqrt_zero
  12235	cmpi.b		%d1,&INF		# weed out INF
  12236	beq.b		fsqrt_inf
  12237	cmpi.b		%d1,&SNAN		# weed out SNAN
  12238	beq.l		res_snan_1op
  12239	bra.l		res_qnan_1op
  12240
  12241#
  12242#	fsqrt(+0) = +0
  12243#	fsqrt(-0) = -0
  12244#	fsqrt(+INF) = +INF
  12245#	fsqrt(-INF) = OPERR
  12246#
  12247fsqrt_zero:
  12248	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
  12249	bmi.b		fsqrt_zero_m		# negative
  12250fsqrt_zero_p:
  12251	fmov.s		&0x00000000,%fp0	# return +ZERO
  12252	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
  12253	rts
  12254fsqrt_zero_m:
  12255	fmov.s		&0x80000000,%fp0	# return -ZERO
  12256	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
  12257	rts
  12258
  12259fsqrt_inf:
  12260	tst.b		SRC_EX(%a0)		# is INF positive or negative?
  12261	bmi.l		res_operr		# negative
  12262fsqrt_inf_p:
  12263	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
  12264	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
  12265	rts
  12266
  12267#########################################################################
  12268# XDEF ****************************************************************	#
  12269#	fetch_dreg(): fetch register according to index in d1		#
  12270#									#
  12271# XREF ****************************************************************	#
  12272#	None								#
  12273#									#
  12274# INPUT ***************************************************************	#
  12275#	d1 = index of register to fetch from				#
  12276#									#
  12277# OUTPUT **************************************************************	#
  12278#	d0 = value of register fetched					#
  12279#									#
  12280# ALGORITHM ***********************************************************	#
  12281#	According to the index value in d1 which can range from zero	#
  12282# to fifteen, load the corresponding register file value (where		#
  12283# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
  12284# stack. The rest should still be in their original places.		#
  12285#									#
  12286#########################################################################
  12287
  12288# this routine leaves d1 intact for subsequent store_dreg calls.
  12289	global		fetch_dreg
  12290fetch_dreg:
  12291	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
  12292	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
  12293
  12294tbl_fdreg:
  12295	short		fdreg0 - tbl_fdreg
  12296	short		fdreg1 - tbl_fdreg
  12297	short		fdreg2 - tbl_fdreg
  12298	short		fdreg3 - tbl_fdreg
  12299	short		fdreg4 - tbl_fdreg
  12300	short		fdreg5 - tbl_fdreg
  12301	short		fdreg6 - tbl_fdreg
  12302	short		fdreg7 - tbl_fdreg
  12303	short		fdreg8 - tbl_fdreg
  12304	short		fdreg9 - tbl_fdreg
  12305	short		fdrega - tbl_fdreg
  12306	short		fdregb - tbl_fdreg
  12307	short		fdregc - tbl_fdreg
  12308	short		fdregd - tbl_fdreg
  12309	short		fdrege - tbl_fdreg
  12310	short		fdregf - tbl_fdreg
  12311
  12312fdreg0:
  12313	mov.l		EXC_DREGS+0x0(%a6),%d0
  12314	rts
  12315fdreg1:
  12316	mov.l		EXC_DREGS+0x4(%a6),%d0
  12317	rts
  12318fdreg2:
  12319	mov.l		%d2,%d0
  12320	rts
  12321fdreg3:
  12322	mov.l		%d3,%d0
  12323	rts
  12324fdreg4:
  12325	mov.l		%d4,%d0
  12326	rts
  12327fdreg5:
  12328	mov.l		%d5,%d0
  12329	rts
  12330fdreg6:
  12331	mov.l		%d6,%d0
  12332	rts
  12333fdreg7:
  12334	mov.l		%d7,%d0
  12335	rts
  12336fdreg8:
  12337	mov.l		EXC_DREGS+0x8(%a6),%d0
  12338	rts
  12339fdreg9:
  12340	mov.l		EXC_DREGS+0xc(%a6),%d0
  12341	rts
  12342fdrega:
  12343	mov.l		%a2,%d0
  12344	rts
  12345fdregb:
  12346	mov.l		%a3,%d0
  12347	rts
  12348fdregc:
  12349	mov.l		%a4,%d0
  12350	rts
  12351fdregd:
  12352	mov.l		%a5,%d0
  12353	rts
  12354fdrege:
  12355	mov.l		(%a6),%d0
  12356	rts
  12357fdregf:
  12358	mov.l		EXC_A7(%a6),%d0
  12359	rts
  12360
  12361#########################################################################
  12362# XDEF ****************************************************************	#
  12363#	store_dreg_l(): store longword to data register specified by d1	#
  12364#									#
  12365# XREF ****************************************************************	#
  12366#	None								#
  12367#									#
  12368# INPUT ***************************************************************	#
  12369#	d0 = longowrd value to store					#
  12370#	d1 = index of register to fetch from				#
  12371#									#
  12372# OUTPUT **************************************************************	#
  12373#	(data register is updated)					#
  12374#									#
  12375# ALGORITHM ***********************************************************	#
  12376#	According to the index value in d1, store the longword value	#
  12377# in d0 to the corresponding data register. D0/D1 are on the stack	#
  12378# while the rest are in their initial places.				#
  12379#									#
  12380#########################################################################
  12381
  12382	global		store_dreg_l
  12383store_dreg_l:
  12384	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
  12385	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
  12386
  12387tbl_sdregl:
  12388	short		sdregl0 - tbl_sdregl
  12389	short		sdregl1 - tbl_sdregl
  12390	short		sdregl2 - tbl_sdregl
  12391	short		sdregl3 - tbl_sdregl
  12392	short		sdregl4 - tbl_sdregl
  12393	short		sdregl5 - tbl_sdregl
  12394	short		sdregl6 - tbl_sdregl
  12395	short		sdregl7 - tbl_sdregl
  12396
  12397sdregl0:
  12398	mov.l		%d0,EXC_DREGS+0x0(%a6)
  12399	rts
  12400sdregl1:
  12401	mov.l		%d0,EXC_DREGS+0x4(%a6)
  12402	rts
  12403sdregl2:
  12404	mov.l		%d0,%d2
  12405	rts
  12406sdregl3:
  12407	mov.l		%d0,%d3
  12408	rts
  12409sdregl4:
  12410	mov.l		%d0,%d4
  12411	rts
  12412sdregl5:
  12413	mov.l		%d0,%d5
  12414	rts
  12415sdregl6:
  12416	mov.l		%d0,%d6
  12417	rts
  12418sdregl7:
  12419	mov.l		%d0,%d7
  12420	rts
  12421
  12422#########################################################################
  12423# XDEF ****************************************************************	#
  12424#	store_dreg_w(): store word to data register specified by d1	#
  12425#									#
  12426# XREF ****************************************************************	#
  12427#	None								#
  12428#									#
  12429# INPUT ***************************************************************	#
  12430#	d0 = word value to store					#
  12431#	d1 = index of register to fetch from				#
  12432#									#
  12433# OUTPUT **************************************************************	#
  12434#	(data register is updated)					#
  12435#									#
  12436# ALGORITHM ***********************************************************	#
  12437#	According to the index value in d1, store the word value	#
  12438# in d0 to the corresponding data register. D0/D1 are on the stack	#
  12439# while the rest are in their initial places.				#
  12440#									#
  12441#########################################################################
  12442
  12443	global		store_dreg_w
  12444store_dreg_w:
  12445	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
  12446	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
  12447
  12448tbl_sdregw:
  12449	short		sdregw0 - tbl_sdregw
  12450	short		sdregw1 - tbl_sdregw
  12451	short		sdregw2 - tbl_sdregw
  12452	short		sdregw3 - tbl_sdregw
  12453	short		sdregw4 - tbl_sdregw
  12454	short		sdregw5 - tbl_sdregw
  12455	short		sdregw6 - tbl_sdregw
  12456	short		sdregw7 - tbl_sdregw
  12457
  12458sdregw0:
  12459	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
  12460	rts
  12461sdregw1:
  12462	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
  12463	rts
  12464sdregw2:
  12465	mov.w		%d0,%d2
  12466	rts
  12467sdregw3:
  12468	mov.w		%d0,%d3
  12469	rts
  12470sdregw4:
  12471	mov.w		%d0,%d4
  12472	rts
  12473sdregw5:
  12474	mov.w		%d0,%d5
  12475	rts
  12476sdregw6:
  12477	mov.w		%d0,%d6
  12478	rts
  12479sdregw7:
  12480	mov.w		%d0,%d7
  12481	rts
  12482
  12483#########################################################################
  12484# XDEF ****************************************************************	#
  12485#	store_dreg_b(): store byte to data register specified by d1	#
  12486#									#
  12487# XREF ****************************************************************	#
  12488#	None								#
  12489#									#
  12490# INPUT ***************************************************************	#
  12491#	d0 = byte value to store					#
  12492#	d1 = index of register to fetch from				#
  12493#									#
  12494# OUTPUT **************************************************************	#
  12495#	(data register is updated)					#
  12496#									#
  12497# ALGORITHM ***********************************************************	#
  12498#	According to the index value in d1, store the byte value	#
  12499# in d0 to the corresponding data register. D0/D1 are on the stack	#
  12500# while the rest are in their initial places.				#
  12501#									#
  12502#########################################################################
  12503
  12504	global		store_dreg_b
  12505store_dreg_b:
  12506	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
  12507	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
  12508
  12509tbl_sdregb:
  12510	short		sdregb0 - tbl_sdregb
  12511	short		sdregb1 - tbl_sdregb
  12512	short		sdregb2 - tbl_sdregb
  12513	short		sdregb3 - tbl_sdregb
  12514	short		sdregb4 - tbl_sdregb
  12515	short		sdregb5 - tbl_sdregb
  12516	short		sdregb6 - tbl_sdregb
  12517	short		sdregb7 - tbl_sdregb
  12518
  12519sdregb0:
  12520	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
  12521	rts
  12522sdregb1:
  12523	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
  12524	rts
  12525sdregb2:
  12526	mov.b		%d0,%d2
  12527	rts
  12528sdregb3:
  12529	mov.b		%d0,%d3
  12530	rts
  12531sdregb4:
  12532	mov.b		%d0,%d4
  12533	rts
  12534sdregb5:
  12535	mov.b		%d0,%d5
  12536	rts
  12537sdregb6:
  12538	mov.b		%d0,%d6
  12539	rts
  12540sdregb7:
  12541	mov.b		%d0,%d7
  12542	rts
  12543
  12544#########################################################################
  12545# XDEF ****************************************************************	#
  12546#	inc_areg(): increment an address register by the value in d0	#
  12547#									#
  12548# XREF ****************************************************************	#
  12549#	None								#
  12550#									#
  12551# INPUT ***************************************************************	#
  12552#	d0 = amount to increment by					#
  12553#	d1 = index of address register to increment			#
  12554#									#
  12555# OUTPUT **************************************************************	#
  12556#	(address register is updated)					#
  12557#									#
  12558# ALGORITHM ***********************************************************	#
  12559#	Typically used for an instruction w/ a post-increment <ea>,	#
  12560# this routine adds the increment value in d0 to the address register	#
  12561# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
  12562# in their original places.						#
  12563#	For a7, if the increment amount is one, then we have to		#
  12564# increment by two. For any a7 update, set the mia7_flag so that if	#
  12565# an access error exception occurs later in emulation, this address	#
  12566# register update can be undone.					#
  12567#									#
  12568#########################################################################
  12569
  12570	global		inc_areg
  12571inc_areg:
  12572	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
  12573	jmp		(tbl_iareg.b,%pc,%d1.w*1)
  12574
  12575tbl_iareg:
  12576	short		iareg0 - tbl_iareg
  12577	short		iareg1 - tbl_iareg
  12578	short		iareg2 - tbl_iareg
  12579	short		iareg3 - tbl_iareg
  12580	short		iareg4 - tbl_iareg
  12581	short		iareg5 - tbl_iareg
  12582	short		iareg6 - tbl_iareg
  12583	short		iareg7 - tbl_iareg
  12584
  12585iareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
  12586	rts
  12587iareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
  12588	rts
  12589iareg2:	add.l		%d0,%a2
  12590	rts
  12591iareg3:	add.l		%d0,%a3
  12592	rts
  12593iareg4:	add.l		%d0,%a4
  12594	rts
  12595iareg5:	add.l		%d0,%a5
  12596	rts
  12597iareg6:	add.l		%d0,(%a6)
  12598	rts
  12599iareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
  12600	cmpi.b		%d0,&0x1
  12601	beq.b		iareg7b
  12602	add.l		%d0,EXC_A7(%a6)
  12603	rts
  12604iareg7b:
  12605	addq.l		&0x2,EXC_A7(%a6)
  12606	rts
  12607
  12608#########################################################################
  12609# XDEF ****************************************************************	#
  12610#	dec_areg(): decrement an address register by the value in d0	#
  12611#									#
  12612# XREF ****************************************************************	#
  12613#	None								#
  12614#									#
  12615# INPUT ***************************************************************	#
  12616#	d0 = amount to decrement by					#
  12617#	d1 = index of address register to decrement			#
  12618#									#
  12619# OUTPUT **************************************************************	#
  12620#	(address register is updated)					#
  12621#									#
  12622# ALGORITHM ***********************************************************	#
  12623#	Typically used for an instruction w/ a pre-decrement <ea>,	#
  12624# this routine adds the decrement value in d0 to the address register	#
  12625# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
  12626# in their original places.						#
  12627#	For a7, if the decrement amount is one, then we have to		#
  12628# decrement by two. For any a7 update, set the mda7_flag so that if	#
  12629# an access error exception occurs later in emulation, this address	#
  12630# register update can be undone.					#
  12631#									#
  12632#########################################################################
  12633
  12634	global		dec_areg
  12635dec_areg:
  12636	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
  12637	jmp		(tbl_dareg.b,%pc,%d1.w*1)
  12638
  12639tbl_dareg:
  12640	short		dareg0 - tbl_dareg
  12641	short		dareg1 - tbl_dareg
  12642	short		dareg2 - tbl_dareg
  12643	short		dareg3 - tbl_dareg
  12644	short		dareg4 - tbl_dareg
  12645	short		dareg5 - tbl_dareg
  12646	short		dareg6 - tbl_dareg
  12647	short		dareg7 - tbl_dareg
  12648
  12649dareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
  12650	rts
  12651dareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
  12652	rts
  12653dareg2:	sub.l		%d0,%a2
  12654	rts
  12655dareg3:	sub.l		%d0,%a3
  12656	rts
  12657dareg4:	sub.l		%d0,%a4
  12658	rts
  12659dareg5:	sub.l		%d0,%a5
  12660	rts
  12661dareg6:	sub.l		%d0,(%a6)
  12662	rts
  12663dareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
  12664	cmpi.b		%d0,&0x1
  12665	beq.b		dareg7b
  12666	sub.l		%d0,EXC_A7(%a6)
  12667	rts
  12668dareg7b:
  12669	subq.l		&0x2,EXC_A7(%a6)
  12670	rts
  12671
  12672##############################################################################
  12673
  12674#########################################################################
  12675# XDEF ****************************************************************	#
  12676#	load_fpn1(): load FP register value into FP_SRC(a6).		#
  12677#									#
  12678# XREF ****************************************************************	#
  12679#	None								#
  12680#									#
  12681# INPUT ***************************************************************	#
  12682#	d0 = index of FP register to load				#
  12683#									#
  12684# OUTPUT **************************************************************	#
  12685#	FP_SRC(a6) = value loaded from FP register file			#
  12686#									#
  12687# ALGORITHM ***********************************************************	#
  12688#	Using the index in d0, load FP_SRC(a6) with a number from the	#
  12689# FP register file.							#
  12690#									#
  12691#########################################################################
  12692
  12693	global		load_fpn1
  12694load_fpn1:
  12695	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
  12696	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
  12697
  12698tbl_load_fpn1:
  12699	short		load_fpn1_0 - tbl_load_fpn1
  12700	short		load_fpn1_1 - tbl_load_fpn1
  12701	short		load_fpn1_2 - tbl_load_fpn1
  12702	short		load_fpn1_3 - tbl_load_fpn1
  12703	short		load_fpn1_4 - tbl_load_fpn1
  12704	short		load_fpn1_5 - tbl_load_fpn1
  12705	short		load_fpn1_6 - tbl_load_fpn1
  12706	short		load_fpn1_7 - tbl_load_fpn1
  12707
  12708load_fpn1_0:
  12709	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
  12710	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
  12711	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
  12712	lea		FP_SRC(%a6), %a0
  12713	rts
  12714load_fpn1_1:
  12715	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
  12716	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
  12717	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
  12718	lea		FP_SRC(%a6), %a0
  12719	rts
  12720load_fpn1_2:
  12721	fmovm.x		&0x20, FP_SRC(%a6)
  12722	lea		FP_SRC(%a6), %a0
  12723	rts
  12724load_fpn1_3:
  12725	fmovm.x		&0x10, FP_SRC(%a6)
  12726	lea		FP_SRC(%a6), %a0
  12727	rts
  12728load_fpn1_4:
  12729	fmovm.x		&0x08, FP_SRC(%a6)
  12730	lea		FP_SRC(%a6), %a0
  12731	rts
  12732load_fpn1_5:
  12733	fmovm.x		&0x04, FP_SRC(%a6)
  12734	lea		FP_SRC(%a6), %a0
  12735	rts
  12736load_fpn1_6:
  12737	fmovm.x		&0x02, FP_SRC(%a6)
  12738	lea		FP_SRC(%a6), %a0
  12739	rts
  12740load_fpn1_7:
  12741	fmovm.x		&0x01, FP_SRC(%a6)
  12742	lea		FP_SRC(%a6), %a0
  12743	rts
  12744
  12745#############################################################################
  12746
  12747#########################################################################
  12748# XDEF ****************************************************************	#
  12749#	load_fpn2(): load FP register value into FP_DST(a6).		#
  12750#									#
  12751# XREF ****************************************************************	#
  12752#	None								#
  12753#									#
  12754# INPUT ***************************************************************	#
  12755#	d0 = index of FP register to load				#
  12756#									#
  12757# OUTPUT **************************************************************	#
  12758#	FP_DST(a6) = value loaded from FP register file			#
  12759#									#
  12760# ALGORITHM ***********************************************************	#
  12761#	Using the index in d0, load FP_DST(a6) with a number from the	#
  12762# FP register file.							#
  12763#									#
  12764#########################################################################
  12765
  12766	global		load_fpn2
  12767load_fpn2:
  12768	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
  12769	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
  12770
  12771tbl_load_fpn2:
  12772	short		load_fpn2_0 - tbl_load_fpn2
  12773	short		load_fpn2_1 - tbl_load_fpn2
  12774	short		load_fpn2_2 - tbl_load_fpn2
  12775	short		load_fpn2_3 - tbl_load_fpn2
  12776	short		load_fpn2_4 - tbl_load_fpn2
  12777	short		load_fpn2_5 - tbl_load_fpn2
  12778	short		load_fpn2_6 - tbl_load_fpn2
  12779	short		load_fpn2_7 - tbl_load_fpn2
  12780
  12781load_fpn2_0:
  12782	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
  12783	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
  12784	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
  12785	lea		FP_DST(%a6), %a0
  12786	rts
  12787load_fpn2_1:
  12788	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
  12789	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
  12790	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
  12791	lea		FP_DST(%a6), %a0
  12792	rts
  12793load_fpn2_2:
  12794	fmovm.x		&0x20, FP_DST(%a6)
  12795	lea		FP_DST(%a6), %a0
  12796	rts
  12797load_fpn2_3:
  12798	fmovm.x		&0x10, FP_DST(%a6)
  12799	lea		FP_DST(%a6), %a0
  12800	rts
  12801load_fpn2_4:
  12802	fmovm.x		&0x08, FP_DST(%a6)
  12803	lea		FP_DST(%a6), %a0
  12804	rts
  12805load_fpn2_5:
  12806	fmovm.x		&0x04, FP_DST(%a6)
  12807	lea		FP_DST(%a6), %a0
  12808	rts
  12809load_fpn2_6:
  12810	fmovm.x		&0x02, FP_DST(%a6)
  12811	lea		FP_DST(%a6), %a0
  12812	rts
  12813load_fpn2_7:
  12814	fmovm.x		&0x01, FP_DST(%a6)
  12815	lea		FP_DST(%a6), %a0
  12816	rts
  12817
  12818#############################################################################
  12819
  12820#########################################################################
  12821# XDEF ****************************************************************	#
  12822#	store_fpreg(): store an fp value to the fpreg designated d0.	#
  12823#									#
  12824# XREF ****************************************************************	#
  12825#	None								#
  12826#									#
  12827# INPUT ***************************************************************	#
  12828#	fp0 = extended precision value to store				#
  12829#	d0  = index of floating-point register				#
  12830#									#
  12831# OUTPUT **************************************************************	#
  12832#	None								#
  12833#									#
  12834# ALGORITHM ***********************************************************	#
  12835#	Store the value in fp0 to the FP register designated by the	#
  12836# value in d0. The FP number can be DENORM or SNAN so we have to be	#
  12837# careful that we don't take an exception here.				#
  12838#									#
  12839#########################################################################
  12840
  12841	global		store_fpreg
  12842store_fpreg:
  12843	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
  12844	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
  12845
  12846tbl_store_fpreg:
  12847	short		store_fpreg_0 - tbl_store_fpreg
  12848	short		store_fpreg_1 - tbl_store_fpreg
  12849	short		store_fpreg_2 - tbl_store_fpreg
  12850	short		store_fpreg_3 - tbl_store_fpreg
  12851	short		store_fpreg_4 - tbl_store_fpreg
  12852	short		store_fpreg_5 - tbl_store_fpreg
  12853	short		store_fpreg_6 - tbl_store_fpreg
  12854	short		store_fpreg_7 - tbl_store_fpreg
  12855
  12856store_fpreg_0:
  12857	fmovm.x		&0x80, EXC_FP0(%a6)
  12858	rts
  12859store_fpreg_1:
  12860	fmovm.x		&0x80, EXC_FP1(%a6)
  12861	rts
  12862store_fpreg_2:
  12863	fmovm.x		&0x01, -(%sp)
  12864	fmovm.x		(%sp)+, &0x20
  12865	rts
  12866store_fpreg_3:
  12867	fmovm.x		&0x01, -(%sp)
  12868	fmovm.x		(%sp)+, &0x10
  12869	rts
  12870store_fpreg_4:
  12871	fmovm.x		&0x01, -(%sp)
  12872	fmovm.x		(%sp)+, &0x08
  12873	rts
  12874store_fpreg_5:
  12875	fmovm.x		&0x01, -(%sp)
  12876	fmovm.x		(%sp)+, &0x04
  12877	rts
  12878store_fpreg_6:
  12879	fmovm.x		&0x01, -(%sp)
  12880	fmovm.x		(%sp)+, &0x02
  12881	rts
  12882store_fpreg_7:
  12883	fmovm.x		&0x01, -(%sp)
  12884	fmovm.x		(%sp)+, &0x01
  12885	rts
  12886
  12887#########################################################################
  12888# XDEF ****************************************************************	#
  12889#	get_packed(): fetch a packed operand from memory and then	#
  12890#		      convert it to a floating-point binary number.	#
  12891#									#
  12892# XREF ****************************************************************	#
  12893#	_dcalc_ea() - calculate the correct <ea>			#
  12894#	_mem_read() - fetch the packed operand from memory		#
  12895#	facc_in_x() - the fetch failed so jump to special exit code	#
  12896#	decbin()    - convert packed to binary extended precision	#
  12897#									#
  12898# INPUT ***************************************************************	#
  12899#	None								#
  12900#									#
  12901# OUTPUT **************************************************************	#
  12902#	If no failure on _mem_read():					#
  12903#	FP_SRC(a6) = packed operand now as a binary FP number		#
  12904#									#
  12905# ALGORITHM ***********************************************************	#
  12906#	Get the correct <ea> which is the value on the exception stack	#
  12907# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
  12908# Then, fetch the operand from memory. If the fetch fails, exit		#
  12909# through facc_in_x().							#
  12910#	If the packed operand is a ZERO,NAN, or INF, convert it to	#
  12911# its binary representation here. Else, call decbin() which will	#
  12912# convert the packed value to an extended precision binary value.	#
  12913#									#
  12914#########################################################################
  12915
  12916# the stacked <ea> for packed is correct except for -(An).
  12917# the base reg must be updated for both -(An) and (An)+.
  12918	global		get_packed
  12919get_packed:
  12920	mov.l		&0xc,%d0		# packed is 12 bytes
  12921	bsr.l		_dcalc_ea		# fetch <ea>; correct An
  12922
  12923	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
  12924	mov.l		&0xc,%d0		# pass: 12 bytes
  12925	bsr.l		_dmem_read		# read packed operand
  12926
  12927	tst.l		%d1			# did dfetch fail?
  12928	bne.l		facc_in_x		# yes
  12929
  12930# The packed operand is an INF or a NAN if the exponent field is all ones.
  12931	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
  12932	cmpi.w		%d0,&0x7fff		# INF or NAN?
  12933	bne.b		gp_try_zero		# no
  12934	rts					# operand is an INF or NAN
  12935
  12936# The packed operand is a zero if the mantissa is all zero, else it's
  12937# a normal packed op.
  12938gp_try_zero:
  12939	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
  12940	andi.b		&0x0f,%d0		# clear all but last nybble
  12941	bne.b		gp_not_spec		# not a zero
  12942	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
  12943	bne.b		gp_not_spec		# not a zero
  12944	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
  12945	bne.b		gp_not_spec		# not a zero
  12946	rts					# operand is a ZERO
  12947gp_not_spec:
  12948	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
  12949	bsr.l		decbin			# convert to extended
  12950	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
  12951	rts
  12952
  12953#########################################################################
  12954# decbin(): Converts normalized packed bcd value pointed to by register	#
  12955#	    a0 to extended-precision value in fp0.			#
  12956#									#
  12957# INPUT ***************************************************************	#
  12958#	a0 = pointer to normalized packed bcd value			#
  12959#									#
  12960# OUTPUT **************************************************************	#
  12961#	fp0 = exact fp representation of the packed bcd value.		#
  12962#									#
  12963# ALGORITHM ***********************************************************	#
  12964#	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
  12965#	and NaN operands are dispatched without entering this routine)	#
  12966#	value in 68881/882 format at location (a0).			#
  12967#									#
  12968#	A1. Convert the bcd exponent to binary by successive adds and	#
  12969#	muls. Set the sign according to SE. Subtract 16 to compensate	#
  12970#	for the mantissa which is to be interpreted as 17 integer	#
  12971#	digits, rather than 1 integer and 16 fraction digits.		#
  12972#	Note: this operation can never overflow.			#
  12973#									#
  12974#	A2. Convert the bcd mantissa to binary by successive		#
  12975#	adds and muls in FP0. Set the sign according to SM.		#
  12976#	The mantissa digits will be converted with the decimal point	#
  12977#	assumed following the least-significant digit.			#
  12978#	Note: this operation can never overflow.			#
  12979#									#
  12980#	A3. Count the number of leading/trailing zeros in the		#
  12981#	bcd string.  If SE is positive, count the leading zeros;	#
  12982#	if negative, count the trailing zeros.  Set the adjusted	#
  12983#	exponent equal to the exponent from A1 and the zero count	#
  12984#	added if SM = 1 and subtracted if SM = 0.  Scale the		#
  12985#	mantissa the equivalent of forcing in the bcd value:		#
  12986#									#
  12987#	SM = 0	a non-zero digit in the integer position		#
  12988#	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
  12989#									#
  12990#	this will insure that any value, regardless of its		#
  12991#	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
  12992#	consistently.							#
  12993#									#
  12994#	A4. Calculate the factor 10^exp in FP1 using a table of		#
  12995#	10^(2^n) values.  To reduce the error in forming factors	#
  12996#	greater than 10^27, a directed rounding scheme is used with	#
  12997#	tables rounded to RN, RM, and RP, according to the table	#
  12998#	in the comments of the pwrten section.				#
  12999#									#
  13000#	A5. Form the final binary number by scaling the mantissa by	#
  13001#	the exponent factor.  This is done by multiplying the		#
  13002#	mantissa in FP0 by the factor in FP1 if the adjusted		#
  13003#	exponent sign is positive, and dividing FP0 by FP1 if		#
  13004#	it is negative.							#
  13005#									#
  13006#	Clean up and return. Check if the final mul or div was inexact.	#
  13007#	If so, set INEX1 in USER_FPSR.					#
  13008#									#
  13009#########################################################################
  13010
  13011#
  13012#	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
  13013#	to nearest, minus, and plus, respectively.  The tables include
  13014#	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
  13015#	is required until the power is greater than 27, however, all
  13016#	tables include the first 5 for ease of indexing.
  13017#
  13018RTABLE:
  13019	byte		0,0,0,0
  13020	byte		2,3,2,3
  13021	byte		2,3,3,2
  13022	byte		3,2,2,3
  13023
  13024	set		FNIBS,7
  13025	set		FSTRT,0
  13026
  13027	set		ESTRT,4
  13028	set		EDIGITS,2
  13029
  13030	global		decbin
  13031decbin:
  13032	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
  13033	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
  13034	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
  13035
  13036	lea		FP_SCR0(%a6),%a0
  13037
  13038	movm.l		&0x3c00,-(%sp)		# save d2-d5
  13039	fmovm.x		&0x1,-(%sp)		# save fp1
  13040#
  13041# Calculate exponent:
  13042#  1. Copy bcd value in memory for use as a working copy.
  13043#  2. Calculate absolute value of exponent in d1 by mul and add.
  13044#  3. Correct for exponent sign.
  13045#  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
  13046#     (i.e., all digits assumed left of the decimal point.)
  13047#
  13048# Register usage:
  13049#
  13050#  calc_e:
  13051#	(*)  d0: temp digit storage
  13052#	(*)  d1: accumulator for binary exponent
  13053#	(*)  d2: digit count
  13054#	(*)  d3: offset pointer
  13055#	( )  d4: first word of bcd
  13056#	( )  a0: pointer to working bcd value
  13057#	( )  a6: pointer to original bcd value
  13058#	(*)  FP_SCR1: working copy of original bcd value
  13059#	(*)  L_SCR1: copy of original exponent word
  13060#
  13061calc_e:
  13062	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
  13063	mov.l		&ESTRT,%d3		# counter to pick up digits
  13064	mov.l		(%a0),%d4		# get first word of bcd
  13065	clr.l		%d1			# zero d1 for accumulator
  13066e_gd:
  13067	mulu.l		&0xa,%d1		# mul partial product by one digit place
  13068	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
  13069	add.l		%d0,%d1			# d1 = d1 + d0
  13070	addq.b		&4,%d3			# advance d3 to the next digit
  13071	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
  13072	btst		&30,%d4			# get SE
  13073	beq.b		e_pos			# don't negate if pos
  13074	neg.l		%d1			# negate before subtracting
  13075e_pos:
  13076	sub.l		&16,%d1			# sub to compensate for shift of mant
  13077	bge.b		e_save			# if still pos, do not neg
  13078	neg.l		%d1			# now negative, make pos and set SE
  13079	or.l		&0x40000000,%d4		# set SE in d4,
  13080	or.l		&0x40000000,(%a0)	# and in working bcd
  13081e_save:
  13082	mov.l		%d1,-(%sp)		# save exp on stack
  13083#
  13084#
  13085# Calculate mantissa:
  13086#  1. Calculate absolute value of mantissa in fp0 by mul and add.
  13087#  2. Correct for mantissa sign.
  13088#     (i.e., all digits assumed left of the decimal point.)
  13089#
  13090# Register usage:
  13091#
  13092#  calc_m:
  13093#	(*)  d0: temp digit storage
  13094#	(*)  d1: lword counter
  13095#	(*)  d2: digit count
  13096#	(*)  d3: offset pointer
  13097#	( )  d4: words 2 and 3 of bcd
  13098#	( )  a0: pointer to working bcd value
  13099#	( )  a6: pointer to original bcd value
  13100#	(*) fp0: mantissa accumulator
  13101#	( )  FP_SCR1: working copy of original bcd value
  13102#	( )  L_SCR1: copy of original exponent word
  13103#
  13104calc_m:
  13105	mov.l		&1,%d1			# word counter, init to 1
  13106	fmov.s		&0x00000000,%fp0	# accumulator
  13107#
  13108#
  13109#  Since the packed number has a long word between the first & second parts,
  13110#  get the integer digit then skip down & get the rest of the
  13111#  mantissa.  We will unroll the loop once.
  13112#
  13113	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
  13114	fadd.b		%d0,%fp0		# add digit to sum in fp0
  13115#
  13116#
  13117#  Get the rest of the mantissa.
  13118#
  13119loadlw:
  13120	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
  13121	mov.l		&FSTRT,%d3		# counter to pick up digits
  13122	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
  13123md2b:
  13124	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
  13125	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
  13126	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
  13127#
  13128#
  13129#  If all the digits (8) in that long word have been converted (d2=0),
  13130#  then inc d1 (=2) to point to the next long word and reset d3 to 0
  13131#  to initialize the digit offset, and set d2 to 7 for the digit count;
  13132#  else continue with this long word.
  13133#
  13134	addq.b		&4,%d3			# advance d3 to the next digit
  13135	dbf.w		%d2,md2b		# check for last digit in this lw
  13136nextlw:
  13137	addq.l		&1,%d1			# inc lw pointer in mantissa
  13138	cmp.l		%d1,&2			# test for last lw
  13139	ble.b		loadlw			# if not, get last one
  13140#
  13141#  Check the sign of the mant and make the value in fp0 the same sign.
  13142#
  13143m_sign:
  13144	btst		&31,(%a0)		# test sign of the mantissa
  13145	beq.b		ap_st_z			# if clear, go to append/strip zeros
  13146	fneg.x		%fp0			# if set, negate fp0
  13147#
  13148# Append/strip zeros:
  13149#
  13150#  For adjusted exponents which have an absolute value greater than 27*,
  13151#  this routine calculates the amount needed to normalize the mantissa
  13152#  for the adjusted exponent.  That number is subtracted from the exp
  13153#  if the exp was positive, and added if it was negative.  The purpose
  13154#  of this is to reduce the value of the exponent and the possibility
  13155#  of error in calculation of pwrten.
  13156#
  13157#  1. Branch on the sign of the adjusted exponent.
  13158#  2p.(positive exp)
  13159#   2. Check M16 and the digits in lwords 2 and 3 in descending order.
  13160#   3. Add one for each zero encountered until a non-zero digit.
  13161#   4. Subtract the count from the exp.
  13162#   5. Check if the exp has crossed zero in #3 above; make the exp abs
  13163#	   and set SE.
  13164#	6. Multiply the mantissa by 10**count.
  13165#  2n.(negative exp)
  13166#   2. Check the digits in lwords 3 and 2 in descending order.
  13167#   3. Add one for each zero encountered until a non-zero digit.
  13168#   4. Add the count to the exp.
  13169#   5. Check if the exp has crossed zero in #3 above; clear SE.
  13170#   6. Divide the mantissa by 10**count.
  13171#
  13172#  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
  13173#   any adjustment due to append/strip zeros will drive the resultane
  13174#   exponent towards zero.  Since all pwrten constants with a power
  13175#   of 27 or less are exact, there is no need to use this routine to
  13176#   attempt to lessen the resultant exponent.
  13177#
  13178# Register usage:
  13179#
  13180#  ap_st_z:
  13181#	(*)  d0: temp digit storage
  13182#	(*)  d1: zero count
  13183#	(*)  d2: digit count
  13184#	(*)  d3: offset pointer
  13185#	( )  d4: first word of bcd
  13186#	(*)  d5: lword counter
  13187#	( )  a0: pointer to working bcd value
  13188#	( )  FP_SCR1: working copy of original bcd value
  13189#	( )  L_SCR1: copy of original exponent word
  13190#
  13191#
  13192# First check the absolute value of the exponent to see if this
  13193# routine is necessary.  If so, then check the sign of the exponent
  13194# and do append (+) or strip (-) zeros accordingly.
  13195# This section handles a positive adjusted exponent.
  13196#
  13197ap_st_z:
  13198	mov.l		(%sp),%d1		# load expA for range test
  13199	cmp.l		%d1,&27			# test is with 27
  13200	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
  13201	btst		&30,(%a0)		# check sign of exp
  13202	bne.b		ap_st_n			# if neg, go to neg side
  13203	clr.l		%d1			# zero count reg
  13204	mov.l		(%a0),%d4		# load lword 1 to d4
  13205	bfextu		%d4{&28:&4},%d0		# get M16 in d0
  13206	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
  13207	addq.l		&1,%d1			# inc zero count
  13208	mov.l		&1,%d5			# init lword counter
  13209	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
  13210	bne.b		ap_p_cl			# if lw 2 is zero, skip it
  13211	addq.l		&8,%d1			# and inc count by 8
  13212	addq.l		&1,%d5			# inc lword counter
  13213	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
  13214ap_p_cl:
  13215	clr.l		%d3			# init offset reg
  13216	mov.l		&7,%d2			# init digit counter
  13217ap_p_gd:
  13218	bfextu		%d4{%d3:&4},%d0		# get digit
  13219	bne.b		ap_p_fx			# if non-zero, go to fix exp
  13220	addq.l		&4,%d3			# point to next digit
  13221	addq.l		&1,%d1			# inc digit counter
  13222	dbf.w		%d2,ap_p_gd		# get next digit
  13223ap_p_fx:
  13224	mov.l		%d1,%d0			# copy counter to d2
  13225	mov.l		(%sp),%d1		# get adjusted exp from memory
  13226	sub.l		%d0,%d1			# subtract count from exp
  13227	bge.b		ap_p_fm			# if still pos, go to pwrten
  13228	neg.l		%d1			# now its neg; get abs
  13229	mov.l		(%a0),%d4		# load lword 1 to d4
  13230	or.l		&0x40000000,%d4		# and set SE in d4
  13231	or.l		&0x40000000,(%a0)	# and in memory
  13232#
  13233# Calculate the mantissa multiplier to compensate for the striping of
  13234# zeros from the mantissa.
  13235#
  13236ap_p_fm:
  13237	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
  13238	clr.l		%d3			# init table index
  13239	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
  13240	mov.l		&3,%d2			# init d2 to count bits in counter
  13241ap_p_el:
  13242	asr.l		&1,%d0			# shift lsb into carry
  13243	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
  13244	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
  13245ap_p_en:
  13246	add.l		&12,%d3			# inc d3 to next rtable entry
  13247	tst.l		%d0			# check if d0 is zero
  13248	bne.b		ap_p_el			# if not, get next bit
  13249	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
  13250	bra.b		pwrten			# go calc pwrten
  13251#
  13252# This section handles a negative adjusted exponent.
  13253#
  13254ap_st_n:
  13255	clr.l		%d1			# clr counter
  13256	mov.l		&2,%d5			# set up d5 to point to lword 3
  13257	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
  13258	bne.b		ap_n_cl			# if not zero, check digits
  13259	sub.l		&1,%d5			# dec d5 to point to lword 2
  13260	addq.l		&8,%d1			# inc counter by 8
  13261	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
  13262ap_n_cl:
  13263	mov.l		&28,%d3			# point to last digit
  13264	mov.l		&7,%d2			# init digit counter
  13265ap_n_gd:
  13266	bfextu		%d4{%d3:&4},%d0		# get digit
  13267	bne.b		ap_n_fx			# if non-zero, go to exp fix
  13268	subq.l		&4,%d3			# point to previous digit
  13269	addq.l		&1,%d1			# inc digit counter
  13270	dbf.w		%d2,ap_n_gd		# get next digit
  13271ap_n_fx:
  13272	mov.l		%d1,%d0			# copy counter to d0
  13273	mov.l		(%sp),%d1		# get adjusted exp from memory
  13274	sub.l		%d0,%d1			# subtract count from exp
  13275	bgt.b		ap_n_fm			# if still pos, go fix mantissa
  13276	neg.l		%d1			# take abs of exp and clr SE
  13277	mov.l		(%a0),%d4		# load lword 1 to d4
  13278	and.l		&0xbfffffff,%d4		# and clr SE in d4
  13279	and.l		&0xbfffffff,(%a0)	# and in memory
  13280#
  13281# Calculate the mantissa multiplier to compensate for the appending of
  13282# zeros to the mantissa.
  13283#
  13284ap_n_fm:
  13285	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
  13286	clr.l		%d3			# init table index
  13287	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
  13288	mov.l		&3,%d2			# init d2 to count bits in counter
  13289ap_n_el:
  13290	asr.l		&1,%d0			# shift lsb into carry
  13291	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
  13292	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
  13293ap_n_en:
  13294	add.l		&12,%d3			# inc d3 to next rtable entry
  13295	tst.l		%d0			# check if d0 is zero
  13296	bne.b		ap_n_el			# if not, get next bit
  13297	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
  13298#
  13299#
  13300# Calculate power-of-ten factor from adjusted and shifted exponent.
  13301#
  13302# Register usage:
  13303#
  13304#  pwrten:
  13305#	(*)  d0: temp
  13306#	( )  d1: exponent
  13307#	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
  13308#	(*)  d3: FPCR work copy
  13309#	( )  d4: first word of bcd
  13310#	(*)  a1: RTABLE pointer
  13311#  calc_p:
  13312#	(*)  d0: temp
  13313#	( )  d1: exponent
  13314#	(*)  d3: PWRTxx table index
  13315#	( )  a0: pointer to working copy of bcd
  13316#	(*)  a1: PWRTxx pointer
  13317#	(*) fp1: power-of-ten accumulator
  13318#
  13319# Pwrten calculates the exponent factor in the selected rounding mode
  13320# according to the following table:
  13321#
  13322#	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
  13323#
  13324#	ANY	  ANY	RN	RN
  13325#
  13326#	 +	   +	RP	RP
  13327#	 -	   +	RP	RM
  13328#	 +	   -	RP	RM
  13329#	 -	   -	RP	RP
  13330#
  13331#	 +	   +	RM	RM
  13332#	 -	   +	RM	RP
  13333#	 +	   -	RM	RP
  13334#	 -	   -	RM	RM
  13335#
  13336#	 +	   +	RZ	RM
  13337#	 -	   +	RZ	RM
  13338#	 +	   -	RZ	RP
  13339#	 -	   -	RZ	RP
  13340#
  13341#
  13342pwrten:
  13343	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
  13344	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
  13345	mov.l		(%a0),%d4		# reload 1st bcd word to d4
  13346	asl.l		&2,%d2			# format d2 to be
  13347	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
  13348	add.l		%d0,%d2			# in d2 as index into RTABLE
  13349	lea.l		RTABLE(%pc),%a1		# load rtable base
  13350	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
  13351	clr.l		%d3			# clear d3 to force no exc and extended
  13352	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
  13353	fmov.l		%d3,%fpcr		# write new FPCR
  13354	asr.l		&1,%d0			# write correct PTENxx table
  13355	bcc.b		not_rp			# to a1
  13356	lea.l		PTENRP(%pc),%a1		# it is RP
  13357	bra.b		calc_p			# go to init section
  13358not_rp:
  13359	asr.l		&1,%d0			# keep checking
  13360	bcc.b		not_rm
  13361	lea.l		PTENRM(%pc),%a1		# it is RM
  13362	bra.b		calc_p			# go to init section
  13363not_rm:
  13364	lea.l		PTENRN(%pc),%a1		# it is RN
  13365calc_p:
  13366	mov.l		%d1,%d0			# copy exp to d0;use d0
  13367	bpl.b		no_neg			# if exp is negative,
  13368	neg.l		%d0			# invert it
  13369	or.l		&0x40000000,(%a0)	# and set SE bit
  13370no_neg:
  13371	clr.l		%d3			# table index
  13372	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
  13373e_loop:
  13374	asr.l		&1,%d0			# shift next bit into carry
  13375	bcc.b		e_next			# if zero, skip the mul
  13376	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
  13377e_next:
  13378	add.l		&12,%d3			# inc d3 to next rtable entry
  13379	tst.l		%d0			# check if d0 is zero
  13380	bne.b		e_loop			# not zero, continue shifting
  13381#
  13382#
  13383#  Check the sign of the adjusted exp and make the value in fp0 the
  13384#  same sign. If the exp was pos then multiply fp1*fp0;
  13385#  else divide fp0/fp1.
  13386#
  13387# Register Usage:
  13388#  norm:
  13389#	( )  a0: pointer to working bcd value
  13390#	(*) fp0: mantissa accumulator
  13391#	( ) fp1: scaling factor - 10**(abs(exp))
  13392#
  13393pnorm:
  13394	btst		&30,(%a0)		# test the sign of the exponent
  13395	beq.b		mul			# if clear, go to multiply
  13396div:
  13397	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
  13398	bra.b		end_dec
  13399mul:
  13400	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
  13401#
  13402#
  13403# Clean up and return with result in fp0.
  13404#
  13405# If the final mul/div in decbin incurred an inex exception,
  13406# it will be inex2, but will be reported as inex1 by get_op.
  13407#
  13408end_dec:
  13409	fmov.l		%fpsr,%d0		# get status register
  13410	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
  13411	beq.b		no_exc			# skip this if no exc
  13412	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
  13413no_exc:
  13414	add.l		&0x4,%sp		# clear 1 lw param
  13415	fmovm.x		(%sp)+,&0x40		# restore fp1
  13416	movm.l		(%sp)+,&0x3c		# restore d2-d5
  13417	fmov.l		&0x0,%fpcr
  13418	fmov.l		&0x0,%fpsr
  13419	rts
  13420
  13421#########################################################################
  13422# bindec(): Converts an input in extended precision format to bcd format#
  13423#									#
  13424# INPUT ***************************************************************	#
  13425#	a0 = pointer to the input extended precision value in memory.	#
  13426#	     the input may be either normalized, unnormalized, or	#
  13427#	     denormalized.						#
  13428#	d0 = contains the k-factor sign-extended to 32-bits.		#
  13429#									#
  13430# OUTPUT **************************************************************	#
  13431#	FP_SCR0(a6) = bcd format result on the stack.			#
  13432#									#
  13433# ALGORITHM ***********************************************************	#
  13434#									#
  13435#	A1.	Set RM and size ext;  Set SIGMA = sign of input.	#
  13436#		The k-factor is saved for use in d7. Clear the		#
  13437#		BINDEC_FLG for separating normalized/denormalized	#
  13438#		input.  If input is unnormalized or denormalized,	#
  13439#		normalize it.						#
  13440#									#
  13441#	A2.	Set X = abs(input).					#
  13442#									#
  13443#	A3.	Compute ILOG.						#
  13444#		ILOG is the log base 10 of the input value.  It is	#
  13445#		approximated by adding e + 0.f when the original	#
  13446#		value is viewed as 2^^e * 1.f in extended precision.	#
  13447#		This value is stored in d6.				#
  13448#									#
  13449#	A4.	Clr INEX bit.						#
  13450#		The operation in A3 above may have set INEX2.		#
  13451#									#
  13452#	A5.	Set ICTR = 0;						#
  13453#		ICTR is a flag used in A13.  It must be set before the	#
  13454#		loop entry A6.						#
  13455#									#
  13456#	A6.	Calculate LEN.						#
  13457#		LEN is the number of digits to be displayed.  The	#
  13458#		k-factor can dictate either the total number of digits,	#
  13459#		if it is a positive number, or the number of digits	#
  13460#		after the decimal point which are to be included as	#
  13461#		significant.  See the 68882 manual for examples.	#
  13462#		If LEN is computed to be greater than 17, set OPERR in	#
  13463#		USER_FPSR.  LEN is stored in d4.			#
  13464#									#
  13465#	A7.	Calculate SCALE.					#
  13466#		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
  13467#		of decimal places needed to insure LEN integer digits	#
  13468#		in the output before conversion to bcd. LAMBDA is the	#
  13469#		sign of ISCALE, used in A9. Fp1 contains		#
  13470#		10^^(abs(ISCALE)) using a rounding mode which is a	#
  13471#		function of the original rounding mode and the signs	#
  13472#		of ISCALE and X.  A table is given in the code.		#
  13473#									#
  13474#	A8.	Clr INEX; Force RZ.					#
  13475#		The operation in A3 above may have set INEX2.		#
  13476#		RZ mode is forced for the scaling operation to insure	#
  13477#		only one rounding error.  The grs bits are collected in #
  13478#		the INEX flag for use in A10.				#
  13479#									#
  13480#	A9.	Scale X -> Y.						#
  13481#		The mantissa is scaled to the desired number of		#
  13482#		significant digits.  The excess digits are collected	#
  13483#		in INEX2.						#
  13484#									#
  13485#	A10.	Or in INEX.						#
  13486#		If INEX is set, round error occurred.  This is		#
  13487#		compensated for by 'or-ing' in the INEX2 flag to	#
  13488#		the lsb of Y.						#
  13489#									#
  13490#	A11.	Restore original FPCR; set size ext.			#
  13491#		Perform FINT operation in the user's rounding mode.	#
  13492#		Keep the size to extended.				#
  13493#									#
  13494#	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
  13495#		mode.  The FPSP routine sintd0 is used.  The output	#
  13496#		is in fp0.						#
  13497#									#
  13498#	A13.	Check for LEN digits.					#
  13499#		If the int operation results in more than LEN digits,	#
  13500#		or less than LEN -1 digits, adjust ILOG and repeat from	#
  13501#		A6.  This test occurs only on the first pass.  If the	#
  13502#		result is exactly 10^LEN, decrement ILOG and divide	#
  13503#		the mantissa by 10.					#
  13504#									#
  13505#	A14.	Convert the mantissa to bcd.				#
  13506#		The binstr routine is used to convert the LEN digit	#
  13507#		mantissa to bcd in memory.  The input to binstr is	#
  13508#		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
  13509#		such that the decimal point is to the left of bit 63.	#
  13510#		The bcd digits are stored in the correct position in	#
  13511#		the final string area in memory.			#
  13512#									#
  13513#	A15.	Convert the exponent to bcd.				#
  13514#		As in A14 above, the exp is converted to bcd and the	#
  13515#		digits are stored in the final string.			#
  13516#		Test the length of the final exponent string.  If the	#
  13517#		length is 4, set operr.					#
  13518#									#
  13519#	A16.	Write sign bits to final string.			#
  13520#									#
  13521#########################################################################
  13522
  13523set	BINDEC_FLG,	EXC_TEMP	# DENORM flag
  13524
  13525# Constants in extended precision
  13526PLOG2:
  13527	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
  13528PLOG2UP1:
  13529	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
  13530
  13531# Constants in single precision
  13532FONE:
  13533	long		0x3F800000,0x00000000,0x00000000,0x00000000
  13534FTWO:
  13535	long		0x40000000,0x00000000,0x00000000,0x00000000
  13536FTEN:
  13537	long		0x41200000,0x00000000,0x00000000,0x00000000
  13538F4933:
  13539	long		0x459A2800,0x00000000,0x00000000,0x00000000
  13540
  13541RBDTBL:
  13542	byte		0,0,0,0
  13543	byte		3,3,2,2
  13544	byte		3,2,2,3
  13545	byte		2,3,3,2
  13546
  13547#	Implementation Notes:
  13548#
  13549#	The registers are used as follows:
  13550#
  13551#		d0: scratch; LEN input to binstr
  13552#		d1: scratch
  13553#		d2: upper 32-bits of mantissa for binstr
  13554#		d3: scratch;lower 32-bits of mantissa for binstr
  13555#		d4: LEN
  13556#		d5: LAMBDA/ICTR
  13557#		d6: ILOG
  13558#		d7: k-factor
  13559#		a0: ptr for original operand/final result
  13560#		a1: scratch pointer
  13561#		a2: pointer to FP_X; abs(original value) in ext
  13562#		fp0: scratch
  13563#		fp1: scratch
  13564#		fp2: scratch
  13565#		F_SCR1:
  13566#		F_SCR2:
  13567#		L_SCR1:
  13568#		L_SCR2:
  13569
  13570	global		bindec
  13571bindec:
  13572	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
  13573	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
  13574
  13575# A1. Set RM and size ext. Set SIGMA = sign input;
  13576#     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
  13577#     separating  normalized/denormalized input.  If the input
  13578#     is a denormalized number, set the BINDEC_FLG memory word
  13579#     to signal denorm.  If the input is unnormalized, normalize
  13580#     the input and test for denormalized result.
  13581#
  13582	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
  13583	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
  13584	mov.l		%d0,%d7		# move k-factor to d7
  13585
  13586	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
  13587	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
  13588	bne.w		A2_str		# no; input is a NORM
  13589
  13590#
  13591# Normalize the denorm
  13592#
  13593un_de_norm:
  13594	mov.w		(%a0),%d0
  13595	and.w		&0x7fff,%d0	# strip sign of normalized exp
  13596	mov.l		4(%a0),%d1
  13597	mov.l		8(%a0),%d2
  13598norm_loop:
  13599	sub.w		&1,%d0
  13600	lsl.l		&1,%d2
  13601	roxl.l		&1,%d1
  13602	tst.l		%d1
  13603	bge.b		norm_loop
  13604#
  13605# Test if the normalized input is denormalized
  13606#
  13607	tst.w		%d0
  13608	bgt.b		pos_exp		# if greater than zero, it is a norm
  13609	st		BINDEC_FLG(%a6)	# set flag for denorm
  13610pos_exp:
  13611	and.w		&0x7fff,%d0	# strip sign of normalized exp
  13612	mov.w		%d0,(%a0)
  13613	mov.l		%d1,4(%a0)
  13614	mov.l		%d2,8(%a0)
  13615
  13616# A2. Set X = abs(input).
  13617#
  13618A2_str:
  13619	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
  13620	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
  13621	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
  13622	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
  13623
  13624# A3. Compute ILOG.
  13625#     ILOG is the log base 10 of the input value.  It is approx-
  13626#     imated by adding e + 0.f when the original value is viewed
  13627#     as 2^^e * 1.f in extended precision.  This value is stored
  13628#     in d6.
  13629#
  13630# Register usage:
  13631#	Input/Output
  13632#	d0: k-factor/exponent
  13633#	d2: x/x
  13634#	d3: x/x
  13635#	d4: x/x
  13636#	d5: x/x
  13637#	d6: x/ILOG
  13638#	d7: k-factor/Unchanged
  13639#	a0: ptr for original operand/final result
  13640#	a1: x/x
  13641#	a2: x/x
  13642#	fp0: x/float(ILOG)
  13643#	fp1: x/x
  13644#	fp2: x/x
  13645#	F_SCR1:x/x
  13646#	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
  13647#	L_SCR1:x/x
  13648#	L_SCR2:first word of X packed/Unchanged
  13649
  13650	tst.b		BINDEC_FLG(%a6)	# check for denorm
  13651	beq.b		A3_cont		# if clr, continue with norm
  13652	mov.l		&-4933,%d6	# force ILOG = -4933
  13653	bra.b		A4_str
  13654A3_cont:
  13655	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
  13656	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
  13657	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
  13658	sub.w		&0x3fff,%d0	# strip off bias
  13659	fadd.w		%d0,%fp0	# add in exp
  13660	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
  13661	fbge.w		pos_res		# if pos, branch
  13662	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
  13663	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
  13664	bra.b		A4_str		# go move out ILOG
  13665pos_res:
  13666	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
  13667	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
  13668
  13669
  13670# A4. Clr INEX bit.
  13671#     The operation in A3 above may have set INEX2.
  13672
  13673A4_str:
  13674	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
  13675
  13676
  13677# A5. Set ICTR = 0;
  13678#     ICTR is a flag used in A13.  It must be set before the
  13679#     loop entry A6. The lower word of d5 is used for ICTR.
  13680
  13681	clr.w		%d5		# clear ICTR
  13682
  13683# A6. Calculate LEN.
  13684#     LEN is the number of digits to be displayed.  The k-factor
  13685#     can dictate either the total number of digits, if it is
  13686#     a positive number, or the number of digits after the
  13687#     original decimal point which are to be included as
  13688#     significant.  See the 68882 manual for examples.
  13689#     If LEN is computed to be greater than 17, set OPERR in
  13690#     USER_FPSR.  LEN is stored in d4.
  13691#
  13692# Register usage:
  13693#	Input/Output
  13694#	d0: exponent/Unchanged
  13695#	d2: x/x/scratch
  13696#	d3: x/x
  13697#	d4: exc picture/LEN
  13698#	d5: ICTR/Unchanged
  13699#	d6: ILOG/Unchanged
  13700#	d7: k-factor/Unchanged
  13701#	a0: ptr for original operand/final result
  13702#	a1: x/x
  13703#	a2: x/x
  13704#	fp0: float(ILOG)/Unchanged
  13705#	fp1: x/x
  13706#	fp2: x/x
  13707#	F_SCR1:x/x
  13708#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
  13709#	L_SCR1:x/x
  13710#	L_SCR2:first word of X packed/Unchanged
  13711
  13712A6_str:
  13713	tst.l		%d7		# branch on sign of k
  13714	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
  13715	mov.l		%d7,%d4		# if k > 0, LEN = k
  13716	bra.b		len_ck		# skip to LEN check
  13717k_neg:
  13718	mov.l		%d6,%d4		# first load ILOG to d4
  13719	sub.l		%d7,%d4		# subtract off k
  13720	addq.l		&1,%d4		# add in the 1
  13721len_ck:
  13722	tst.l		%d4		# LEN check: branch on sign of LEN
  13723	ble.b		LEN_ng		# if neg, set LEN = 1
  13724	cmp.l		%d4,&17		# test if LEN > 17
  13725	ble.b		A7_str		# if not, forget it
  13726	mov.l		&17,%d4		# set max LEN = 17
  13727	tst.l		%d7		# if negative, never set OPERR
  13728	ble.b		A7_str		# if positive, continue
  13729	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
  13730	bra.b		A7_str		# finished here
  13731LEN_ng:
  13732	mov.l		&1,%d4		# min LEN is 1
  13733
  13734
  13735# A7. Calculate SCALE.
  13736#     SCALE is equal to 10^ISCALE, where ISCALE is the number
  13737#     of decimal places needed to insure LEN integer digits
  13738#     in the output before conversion to bcd. LAMBDA is the sign
  13739#     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
  13740#     the rounding mode as given in the following table (see
  13741#     Coonen, p. 7.23 as ref.; however, the SCALE variable is
  13742#     of opposite sign in bindec.sa from Coonen).
  13743#
  13744#	Initial					USE
  13745#	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
  13746#	----------------------------------------------
  13747#	 RN	00	   0	   0		00/0	RN
  13748#	 RN	00	   0	   1		00/0	RN
  13749#	 RN	00	   1	   0		00/0	RN
  13750#	 RN	00	   1	   1		00/0	RN
  13751#	 RZ	01	   0	   0		11/3	RP
  13752#	 RZ	01	   0	   1		11/3	RP
  13753#	 RZ	01	   1	   0		10/2	RM
  13754#	 RZ	01	   1	   1		10/2	RM
  13755#	 RM	10	   0	   0		11/3	RP
  13756#	 RM	10	   0	   1		10/2	RM
  13757#	 RM	10	   1	   0		10/2	RM
  13758#	 RM	10	   1	   1		11/3	RP
  13759#	 RP	11	   0	   0		10/2	RM
  13760#	 RP	11	   0	   1		11/3	RP
  13761#	 RP	11	   1	   0		11/3	RP
  13762#	 RP	11	   1	   1		10/2	RM
  13763#
  13764# Register usage:
  13765#	Input/Output
  13766#	d0: exponent/scratch - final is 0
  13767#	d2: x/0 or 24 for A9
  13768#	d3: x/scratch - offset ptr into PTENRM array
  13769#	d4: LEN/Unchanged
  13770#	d5: 0/ICTR:LAMBDA
  13771#	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
  13772#	d7: k-factor/Unchanged
  13773#	a0: ptr for original operand/final result
  13774#	a1: x/ptr to PTENRM array
  13775#	a2: x/x
  13776#	fp0: float(ILOG)/Unchanged
  13777#	fp1: x/10^ISCALE
  13778#	fp2: x/x
  13779#	F_SCR1:x/x
  13780#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
  13781#	L_SCR1:x/x
  13782#	L_SCR2:first word of X packed/Unchanged
  13783
  13784A7_str:
  13785	tst.l		%d7		# test sign of k
  13786	bgt.b		k_pos		# if pos and > 0, skip this
  13787	cmp.l		%d7,%d6		# test k - ILOG
  13788	blt.b		k_pos		# if ILOG >= k, skip this
  13789	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
  13790k_pos:
  13791	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
  13792	addq.l		&1,%d0		# add the 1
  13793	sub.l		%d4,%d0		# sub off LEN
  13794	swap		%d5		# use upper word of d5 for LAMBDA
  13795	clr.w		%d5		# set it zero initially
  13796	clr.w		%d2		# set up d2 for very small case
  13797	tst.l		%d0		# test sign of ISCALE
  13798	bge.b		iscale		# if pos, skip next inst
  13799	addq.w		&1,%d5		# if neg, set LAMBDA true
  13800	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
  13801	bgt.b		no_inf		# if false, skip rest
  13802	add.l		&24,%d0		# add in 24 to iscale
  13803	mov.l		&24,%d2		# put 24 in d2 for A9
  13804no_inf:
  13805	neg.l		%d0		# and take abs of ISCALE
  13806iscale:
  13807	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
  13808	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
  13809	lsl.w		&1,%d1		# put them in bits 2:1
  13810	add.w		%d5,%d1		# add in LAMBDA
  13811	lsl.w		&1,%d1		# put them in bits 3:1
  13812	tst.l		L_SCR2(%a6)	# test sign of original x
  13813	bge.b		x_pos		# if pos, don't set bit 0
  13814	addq.l		&1,%d1		# if neg, set bit 0
  13815x_pos:
  13816	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
  13817	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
  13818	lsl.l		&4,%d3		# put bits in proper position
  13819	fmov.l		%d3,%fpcr	# load bits into fpu
  13820	lsr.l		&4,%d3		# put bits in proper position
  13821	tst.b		%d3		# decode new rmode for pten table
  13822	bne.b		not_rn		# if zero, it is RN
  13823	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
  13824	bra.b		rmode		# exit decode
  13825not_rn:
  13826	lsr.b		&1,%d3		# get lsb in carry
  13827	bcc.b		not_rp2		# if carry clear, it is RM
  13828	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
  13829	bra.b		rmode		# exit decode
  13830not_rp2:
  13831	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
  13832rmode:
  13833	clr.l		%d3		# clr table index
  13834e_loop2:
  13835	lsr.l		&1,%d0		# shift next bit into carry
  13836	bcc.b		e_next2		# if zero, skip the mul
  13837	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
  13838e_next2:
  13839	add.l		&12,%d3		# inc d3 to next pwrten table entry
  13840	tst.l		%d0		# test if ISCALE is zero
  13841	bne.b		e_loop2		# if not, loop
  13842
  13843# A8. Clr INEX; Force RZ.
  13844#     The operation in A3 above may have set INEX2.
  13845#     RZ mode is forced for the scaling operation to insure
  13846#     only one rounding error.  The grs bits are collected in
  13847#     the INEX flag for use in A10.
  13848#
  13849# Register usage:
  13850#	Input/Output
  13851
  13852	fmov.l		&0,%fpsr	# clr INEX
  13853	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
  13854
  13855# A9. Scale X -> Y.
  13856#     The mantissa is scaled to the desired number of significant
  13857#     digits.  The excess digits are collected in INEX2. If mul,
  13858#     Check d2 for excess 10 exponential value.  If not zero,
  13859#     the iscale value would have caused the pwrten calculation
  13860#     to overflow.  Only a negative iscale can cause this, so
  13861#     multiply by 10^(d2), which is now only allowed to be 24,
  13862#     with a multiply by 10^8 and 10^16, which is exact since
  13863#     10^24 is exact.  If the input was denormalized, we must
  13864#     create a busy stack frame with the mul command and the
  13865#     two operands, and allow the fpu to complete the multiply.
  13866#
  13867# Register usage:
  13868#	Input/Output
  13869#	d0: FPCR with RZ mode/Unchanged
  13870#	d2: 0 or 24/unchanged
  13871#	d3: x/x
  13872#	d4: LEN/Unchanged
  13873#	d5: ICTR:LAMBDA
  13874#	d6: ILOG/Unchanged
  13875#	d7: k-factor/Unchanged
  13876#	a0: ptr for original operand/final result
  13877#	a1: ptr to PTENRM array/Unchanged
  13878#	a2: x/x
  13879#	fp0: float(ILOG)/X adjusted for SCALE (Y)
  13880#	fp1: 10^ISCALE/Unchanged
  13881#	fp2: x/x
  13882#	F_SCR1:x/x
  13883#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
  13884#	L_SCR1:x/x
  13885#	L_SCR2:first word of X packed/Unchanged
  13886
  13887A9_str:
  13888	fmov.x		(%a0),%fp0	# load X from memory
  13889	fabs.x		%fp0		# use abs(X)
  13890	tst.w		%d5		# LAMBDA is in lower word of d5
  13891	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
  13892	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
  13893	bra.w		A10_st		# branch to A10
  13894
  13895sc_mul:
  13896	tst.b		BINDEC_FLG(%a6)	# check for denorm
  13897	beq.w		A9_norm		# if norm, continue with mul
  13898
  13899# for DENORM, we must calculate:
  13900#	fp0 = input_op * 10^ISCALE * 10^24
  13901# since the input operand is a DENORM, we can't multiply it directly.
  13902# so, we do the multiplication of the exponents and mantissas separately.
  13903# in this way, we avoid underflow on intermediate stages of the
  13904# multiplication and guarantee a result without exception.
  13905	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
  13906
  13907	mov.w		(%sp),%d3	# grab exponent
  13908	andi.w		&0x7fff,%d3	# clear sign
  13909	ori.w		&0x8000,(%a0)	# make DENORM exp negative
  13910	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
  13911	subi.w		&0x3fff,%d3	# subtract BIAS
  13912	add.w		36(%a1),%d3
  13913	subi.w		&0x3fff,%d3	# subtract BIAS
  13914	add.w		48(%a1),%d3
  13915	subi.w		&0x3fff,%d3	# subtract BIAS
  13916
  13917	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
  13918
  13919	andi.w		&0x8000,(%sp)	# keep sign
  13920	or.w		%d3,(%sp)	# insert new exponent
  13921	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
  13922	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
  13923	mov.l		0x4(%a0),-(%sp)
  13924	mov.l		&0x3fff0000,-(%sp) # force exp to zero
  13925	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
  13926	fmul.x		(%sp)+,%fp0
  13927
  13928#	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
  13929#	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
  13930	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
  13931	mov.l		36+4(%a1),-(%sp)
  13932	mov.l		&0x3fff0000,-(%sp) # force exp to zero
  13933	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
  13934	mov.l		48+4(%a1),-(%sp)
  13935	mov.l		&0x3fff0000,-(%sp)# force exp to zero
  13936	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
  13937	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
  13938	bra.b		A10_st
  13939
  13940sc_mul_err:
  13941	bra.b		sc_mul_err
  13942
  13943A9_norm:
  13944	tst.w		%d2		# test for small exp case
  13945	beq.b		A9_con		# if zero, continue as normal
  13946	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
  13947	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
  13948A9_con:
  13949	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
  13950
  13951# A10. Or in INEX.
  13952#      If INEX is set, round error occurred.  This is compensated
  13953#      for by 'or-ing' in the INEX2 flag to the lsb of Y.
  13954#
  13955# Register usage:
  13956#	Input/Output
  13957#	d0: FPCR with RZ mode/FPSR with INEX2 isolated
  13958#	d2: x/x
  13959#	d3: x/x
  13960#	d4: LEN/Unchanged
  13961#	d5: ICTR:LAMBDA
  13962#	d6: ILOG/Unchanged
  13963#	d7: k-factor/Unchanged
  13964#	a0: ptr for original operand/final result
  13965#	a1: ptr to PTENxx array/Unchanged
  13966#	a2: x/ptr to FP_SCR1(a6)
  13967#	fp0: Y/Y with lsb adjusted
  13968#	fp1: 10^ISCALE/Unchanged
  13969#	fp2: x/x
  13970
  13971A10_st:
  13972	fmov.l		%fpsr,%d0	# get FPSR
  13973	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
  13974	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
  13975	btst		&9,%d0		# check if INEX2 set
  13976	beq.b		A11_st		# if clear, skip rest
  13977	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
  13978	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
  13979
  13980
  13981# A11. Restore original FPCR; set size ext.
  13982#      Perform FINT operation in the user's rounding mode.  Keep
  13983#      the size to extended.  The sintdo entry point in the sint
  13984#      routine expects the FPCR value to be in USER_FPCR for
  13985#      mode and precision.  The original FPCR is saved in L_SCR1.
  13986
  13987A11_st:
  13988	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
  13989	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
  13990#					;block exceptions
  13991
  13992
  13993# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
  13994#      The FPSP routine sintd0 is used.  The output is in fp0.
  13995#
  13996# Register usage:
  13997#	Input/Output
  13998#	d0: FPSR with AINEX cleared/FPCR with size set to ext
  13999#	d2: x/x/scratch
  14000#	d3: x/x
  14001#	d4: LEN/Unchanged
  14002#	d5: ICTR:LAMBDA/Unchanged
  14003#	d6: ILOG/Unchanged
  14004#	d7: k-factor/Unchanged
  14005#	a0: ptr for original operand/src ptr for sintdo
  14006#	a1: ptr to PTENxx array/Unchanged
  14007#	a2: ptr to FP_SCR1(a6)/Unchanged
  14008#	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
  14009#	fp0: Y/YINT
  14010#	fp1: 10^ISCALE/Unchanged
  14011#	fp2: x/x
  14012#	F_SCR1:x/x
  14013#	F_SCR2:Y adjusted for inex/Y with original exponent
  14014#	L_SCR1:x/original USER_FPCR
  14015#	L_SCR2:first word of X packed/Unchanged
  14016
  14017A12_st:
  14018	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
  14019	mov.l	L_SCR1(%a6),-(%sp)
  14020	mov.l	L_SCR2(%a6),-(%sp)
  14021
  14022	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
  14023	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
  14024	tst.l		L_SCR2(%a6)	# test sign of original operand
  14025	bge.b		do_fint12		# if pos, use Y
  14026	or.l		&0x80000000,(%a0)	# if neg, use -Y
  14027do_fint12:
  14028	mov.l	USER_FPSR(%a6),-(%sp)
  14029#	bsr	sintdo		# sint routine returns int in fp0
  14030
  14031	fmov.l	USER_FPCR(%a6),%fpcr
  14032	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
  14033##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
  14034##	andi.l		&0x00000030,%d0
  14035##	fmov.l		%d0,%fpcr
  14036	fint.x		FP_SCR1(%a6),%fp0	# do fint()
  14037	fmov.l	%fpsr,%d0
  14038	or.w	%d0,FPSR_EXCEPT(%a6)
  14039##	fmov.l		&0x0,%fpcr
  14040##	fmov.l		%fpsr,%d0		# don't keep ccodes
  14041##	or.w		%d0,FPSR_EXCEPT(%a6)
  14042
  14043	mov.b	(%sp),USER_FPSR(%a6)
  14044	add.l	&4,%sp
  14045
  14046	mov.l	(%sp)+,L_SCR2(%a6)
  14047	mov.l	(%sp)+,L_SCR1(%a6)
  14048	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
  14049
  14050	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
  14051	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
  14052
  14053# A13. Check for LEN digits.
  14054#      If the int operation results in more than LEN digits,
  14055#      or less than LEN -1 digits, adjust ILOG and repeat from
  14056#      A6.  This test occurs only on the first pass.  If the
  14057#      result is exactly 10^LEN, decrement ILOG and divide
  14058#      the mantissa by 10.  The calculation of 10^LEN cannot
  14059#      be inexact, since all powers of ten up to 10^27 are exact
  14060#      in extended precision, so the use of a previous power-of-ten
  14061#      table will introduce no error.
  14062#
  14063#
  14064# Register usage:
  14065#	Input/Output
  14066#	d0: FPCR with size set to ext/scratch final = 0
  14067#	d2: x/x
  14068#	d3: x/scratch final = x
  14069#	d4: LEN/LEN adjusted
  14070#	d5: ICTR:LAMBDA/LAMBDA:ICTR
  14071#	d6: ILOG/ILOG adjusted
  14072#	d7: k-factor/Unchanged
  14073#	a0: pointer into memory for packed bcd string formation
  14074#	a1: ptr to PTENxx array/Unchanged
  14075#	a2: ptr to FP_SCR1(a6)/Unchanged
  14076#	fp0: int portion of Y/abs(YINT) adjusted
  14077#	fp1: 10^ISCALE/Unchanged
  14078#	fp2: x/10^LEN
  14079#	F_SCR1:x/x
  14080#	F_SCR2:Y with original exponent/Unchanged
  14081#	L_SCR1:original USER_FPCR/Unchanged
  14082#	L_SCR2:first word of X packed/Unchanged
  14083
  14084A13_st:
  14085	swap		%d5		# put ICTR in lower word of d5
  14086	tst.w		%d5		# check if ICTR = 0
  14087	bne		not_zr		# if non-zero, go to second test
  14088#
  14089# Compute 10^(LEN-1)
  14090#
  14091	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
  14092	mov.l		%d4,%d0		# put LEN in d0
  14093	subq.l		&1,%d0		# d0 = LEN -1
  14094	clr.l		%d3		# clr table index
  14095l_loop:
  14096	lsr.l		&1,%d0		# shift next bit into carry
  14097	bcc.b		l_next		# if zero, skip the mul
  14098	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
  14099l_next:
  14100	add.l		&12,%d3		# inc d3 to next pwrten table entry
  14101	tst.l		%d0		# test if LEN is zero
  14102	bne.b		l_loop		# if not, loop
  14103#
  14104# 10^LEN-1 is computed for this test and A14.  If the input was
  14105# denormalized, check only the case in which YINT > 10^LEN.
  14106#
  14107	tst.b		BINDEC_FLG(%a6)	# check if input was norm
  14108	beq.b		A13_con		# if norm, continue with checking
  14109	fabs.x		%fp0		# take abs of YINT
  14110	bra		test_2
  14111#
  14112# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
  14113#
  14114A13_con:
  14115	fabs.x		%fp0		# take abs of YINT
  14116	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
  14117	fbge.w		test_2		# if greater, do next test
  14118	subq.l		&1,%d6		# subtract 1 from ILOG
  14119	mov.w		&1,%d5		# set ICTR
  14120	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
  14121	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
  14122	bra.w		A6_str		# return to A6 and recompute YINT
  14123test_2:
  14124	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
  14125	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
  14126	fblt.w		A14_st		# if less, all is ok, go to A14
  14127	fbgt.w		fix_ex		# if greater, fix and redo
  14128	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
  14129	addq.l		&1,%d6		# and inc ILOG
  14130	bra.b		A14_st		# and continue elsewhere
  14131fix_ex:
  14132	addq.l		&1,%d6		# increment ILOG by 1
  14133	mov.w		&1,%d5		# set ICTR
  14134	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
  14135	bra.w		A6_str		# return to A6 and recompute YINT
  14136#
  14137# Since ICTR <> 0, we have already been through one adjustment,
  14138# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
  14139# 10^LEN is again computed using whatever table is in a1 since the
  14140# value calculated cannot be inexact.
  14141#
  14142not_zr:
  14143	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
  14144	mov.l		%d4,%d0		# put LEN in d0
  14145	clr.l		%d3		# clr table index
  14146z_loop:
  14147	lsr.l		&1,%d0		# shift next bit into carry
  14148	bcc.b		z_next		# if zero, skip the mul
  14149	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
  14150z_next:
  14151	add.l		&12,%d3		# inc d3 to next pwrten table entry
  14152	tst.l		%d0		# test if LEN is zero
  14153	bne.b		z_loop		# if not, loop
  14154	fabs.x		%fp0		# get abs(YINT)
  14155	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
  14156	fbneq.w		A14_st		# if not, skip this
  14157	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
  14158	addq.l		&1,%d6		# and inc ILOG by 1
  14159	addq.l		&1,%d4		# and inc LEN
  14160	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
  14161
  14162# A14. Convert the mantissa to bcd.
  14163#      The binstr routine is used to convert the LEN digit
  14164#      mantissa to bcd in memory.  The input to binstr is
  14165#      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
  14166#      such that the decimal point is to the left of bit 63.
  14167#      The bcd digits are stored in the correct position in
  14168#      the final string area in memory.
  14169#
  14170#
  14171# Register usage:
  14172#	Input/Output
  14173#	d0: x/LEN call to binstr - final is 0
  14174#	d1: x/0
  14175#	d2: x/ms 32-bits of mant of abs(YINT)
  14176#	d3: x/ls 32-bits of mant of abs(YINT)
  14177#	d4: LEN/Unchanged
  14178#	d5: ICTR:LAMBDA/LAMBDA:ICTR
  14179#	d6: ILOG
  14180#	d7: k-factor/Unchanged
  14181#	a0: pointer into memory for packed bcd string formation
  14182#	    /ptr to first mantissa byte in result string
  14183#	a1: ptr to PTENxx array/Unchanged
  14184#	a2: ptr to FP_SCR1(a6)/Unchanged
  14185#	fp0: int portion of Y/abs(YINT) adjusted
  14186#	fp1: 10^ISCALE/Unchanged
  14187#	fp2: 10^LEN/Unchanged
  14188#	F_SCR1:x/Work area for final result
  14189#	F_SCR2:Y with original exponent/Unchanged
  14190#	L_SCR1:original USER_FPCR/Unchanged
  14191#	L_SCR2:first word of X packed/Unchanged
  14192
  14193A14_st:
  14194	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
  14195	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
  14196	lea.l		FP_SCR0(%a6),%a0
  14197	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
  14198	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
  14199	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
  14200	clr.l		4(%a0)		# zero word 2 of FP_RES
  14201	clr.l		8(%a0)		# zero word 3 of FP_RES
  14202	mov.l		(%a0),%d0	# move exponent to d0
  14203	swap		%d0		# put exponent in lower word
  14204	beq.b		no_sft		# if zero, don't shift
  14205	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
  14206	tst.l		%d0		# check if > 1
  14207	bgt.b		no_sft		# if so, don't shift
  14208	neg.l		%d0		# make exp positive
  14209m_loop:
  14210	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
  14211	roxr.l		&1,%d3		# the number of places
  14212	dbf.w		%d0,m_loop	# given in d0
  14213no_sft:
  14214	tst.l		%d2		# check for mantissa of zero
  14215	bne.b		no_zr		# if not, go on
  14216	tst.l		%d3		# continue zero check
  14217	beq.b		zer_m		# if zero, go directly to binstr
  14218no_zr:
  14219	clr.l		%d1		# put zero in d1 for addx
  14220	add.l		&0x00000080,%d3	# inc at bit 7
  14221	addx.l		%d1,%d2		# continue inc
  14222	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
  14223zer_m:
  14224	mov.l		%d4,%d0		# put LEN in d0 for binstr call
  14225	addq.l		&3,%a0		# a0 points to M16 byte in result
  14226	bsr		binstr		# call binstr to convert mant
  14227
  14228
  14229# A15. Convert the exponent to bcd.
  14230#      As in A14 above, the exp is converted to bcd and the
  14231#      digits are stored in the final string.
  14232#
  14233#      Digits are stored in L_SCR1(a6) on return from BINDEC as:
  14234#
  14235#	 32               16 15                0
  14236#	-----------------------------------------
  14237#	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
  14238#	-----------------------------------------
  14239#
  14240# And are moved into their proper places in FP_SCR0.  If digit e4
  14241# is non-zero, OPERR is signaled.  In all cases, all 4 digits are
  14242# written as specified in the 881/882 manual for packed decimal.
  14243#
  14244# Register usage:
  14245#	Input/Output
  14246#	d0: x/LEN call to binstr - final is 0
  14247#	d1: x/scratch (0);shift count for final exponent packing
  14248#	d2: x/ms 32-bits of exp fraction/scratch
  14249#	d3: x/ls 32-bits of exp fraction
  14250#	d4: LEN/Unchanged
  14251#	d5: ICTR:LAMBDA/LAMBDA:ICTR
  14252#	d6: ILOG
  14253#	d7: k-factor/Unchanged
  14254#	a0: ptr to result string/ptr to L_SCR1(a6)
  14255#	a1: ptr to PTENxx array/Unchanged
  14256#	a2: ptr to FP_SCR1(a6)/Unchanged
  14257#	fp0: abs(YINT) adjusted/float(ILOG)
  14258#	fp1: 10^ISCALE/Unchanged
  14259#	fp2: 10^LEN/Unchanged
  14260#	F_SCR1:Work area for final result/BCD result
  14261#	F_SCR2:Y with original exponent/ILOG/10^4
  14262#	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
  14263#	L_SCR2:first word of X packed/Unchanged
  14264
  14265A15_st:
  14266	tst.b		BINDEC_FLG(%a6)	# check for denorm
  14267	beq.b		not_denorm
  14268	ftest.x		%fp0		# test for zero
  14269	fbeq.w		den_zero	# if zero, use k-factor or 4933
  14270	fmov.l		%d6,%fp0	# float ILOG
  14271	fabs.x		%fp0		# get abs of ILOG
  14272	bra.b		convrt
  14273den_zero:
  14274	tst.l		%d7		# check sign of the k-factor
  14275	blt.b		use_ilog	# if negative, use ILOG
  14276	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
  14277	bra.b		convrt		# do it
  14278use_ilog:
  14279	fmov.l		%d6,%fp0	# float ILOG
  14280	fabs.x		%fp0		# get abs of ILOG
  14281	bra.b		convrt
  14282not_denorm:
  14283	ftest.x		%fp0		# test for zero
  14284	fbneq.w		not_zero	# if zero, force exponent
  14285	fmov.s		FONE(%pc),%fp0	# force exponent to 1
  14286	bra.b		convrt		# do it
  14287not_zero:
  14288	fmov.l		%d6,%fp0	# float ILOG
  14289	fabs.x		%fp0		# get abs of ILOG
  14290convrt:
  14291	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
  14292	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
  14293	mov.l		4(%a2),%d2	# move word 2 to d2
  14294	mov.l		8(%a2),%d3	# move word 3 to d3
  14295	mov.w		(%a2),%d0	# move exp to d0
  14296	beq.b		x_loop_fin	# if zero, skip the shift
  14297	sub.w		&0x3ffd,%d0	# subtract off bias
  14298	neg.w		%d0		# make exp positive
  14299x_loop:
  14300	lsr.l		&1,%d2		# shift d2:d3 right
  14301	roxr.l		&1,%d3		# the number of places
  14302	dbf.w		%d0,x_loop	# given in d0
  14303x_loop_fin:
  14304	clr.l		%d1		# put zero in d1 for addx
  14305	add.l		&0x00000080,%d3	# inc at bit 6
  14306	addx.l		%d1,%d2		# continue inc
  14307	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
  14308	mov.l		&4,%d0		# put 4 in d0 for binstr call
  14309	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
  14310	bsr		binstr		# call binstr to convert exp
  14311	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
  14312	mov.l		&12,%d1		# use d1 for shift count
  14313	lsr.l		%d1,%d0		# shift d0 right by 12
  14314	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
  14315	lsr.l		%d1,%d0		# shift d0 right by 12
  14316	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
  14317	tst.b		%d0		# check if e4 is zero
  14318	beq.b		A16_st		# if zero, skip rest
  14319	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
  14320
  14321
  14322# A16. Write sign bits to final string.
  14323#	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
  14324#
  14325# Register usage:
  14326#	Input/Output
  14327#	d0: x/scratch - final is x
  14328#	d2: x/x
  14329#	d3: x/x
  14330#	d4: LEN/Unchanged
  14331#	d5: ICTR:LAMBDA/LAMBDA:ICTR
  14332#	d6: ILOG/ILOG adjusted
  14333#	d7: k-factor/Unchanged
  14334#	a0: ptr to L_SCR1(a6)/Unchanged
  14335#	a1: ptr to PTENxx array/Unchanged
  14336#	a2: ptr to FP_SCR1(a6)/Unchanged
  14337#	fp0: float(ILOG)/Unchanged
  14338#	fp1: 10^ISCALE/Unchanged
  14339#	fp2: 10^LEN/Unchanged
  14340#	F_SCR1:BCD result with correct signs
  14341#	F_SCR2:ILOG/10^4
  14342#	L_SCR1:Exponent digits on return from binstr
  14343#	L_SCR2:first word of X packed/Unchanged
  14344
  14345A16_st:
  14346	clr.l		%d0		# clr d0 for collection of signs
  14347	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
  14348	tst.l		L_SCR2(%a6)	# check sign of original mantissa
  14349	bge.b		mant_p		# if pos, don't set SM
  14350	mov.l		&2,%d0		# move 2 in to d0 for SM
  14351mant_p:
  14352	tst.l		%d6		# check sign of ILOG
  14353	bge.b		wr_sgn		# if pos, don't set SE
  14354	addq.l		&1,%d0		# set bit 0 in d0 for SE
  14355wr_sgn:
  14356	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
  14357
  14358# Clean up and restore all registers used.
  14359
  14360	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
  14361	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
  14362	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
  14363	rts
  14364
  14365	global		PTENRN
  14366PTENRN:
  14367	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
  14368	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
  14369	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
  14370	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
  14371	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
  14372	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
  14373	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
  14374	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
  14375	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
  14376	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
  14377	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
  14378	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
  14379	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
  14380
  14381	global		PTENRP
  14382PTENRP:
  14383	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
  14384	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
  14385	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
  14386	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
  14387	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
  14388	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
  14389	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
  14390	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
  14391	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
  14392	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
  14393	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
  14394	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
  14395	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
  14396
  14397	global		PTENRM
  14398PTENRM:
  14399	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
  14400	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
  14401	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
  14402	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
  14403	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
  14404	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
  14405	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
  14406	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
  14407	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
  14408	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
  14409	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
  14410	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
  14411	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
  14412
  14413#########################################################################
  14414# binstr(): Converts a 64-bit binary integer to bcd.			#
  14415#									#
  14416# INPUT *************************************************************** #
  14417#	d2:d3 = 64-bit binary integer					#
  14418#	d0    = desired length (LEN)					#
  14419#	a0    = pointer to start in memory for bcd characters		#
  14420#		(This pointer must point to byte 4 of the first		#
  14421#		 lword of the packed decimal memory string.)		#
  14422#									#
  14423# OUTPUT ************************************************************** #
  14424#	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
  14425#									#
  14426# ALGORITHM ***********************************************************	#
  14427#	The 64-bit binary is assumed to have a decimal point before	#
  14428#	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
  14429#	shift and a mul by 8 shift.  The bits shifted out of the	#
  14430#	msb form a decimal digit.  This process is iterated until	#
  14431#	LEN digits are formed.						#
  14432#									#
  14433# A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
  14434#     digit formed will be assumed the least significant.  This is	#
  14435#     to force the first byte formed to have a 0 in the upper 4 bits.	#
  14436#									#
  14437# A2. Beginning of the loop:						#
  14438#     Copy the fraction in d2:d3 to d4:d5.				#
  14439#									#
  14440# A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
  14441#     extracts and shifts.  The three msbs from d2 will go into d1.	#
  14442#									#
  14443# A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
  14444#     will be collected by the carry.					#
  14445#									#
  14446# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
  14447#     into d2:d3.  D1 will contain the bcd digit formed.		#
  14448#									#
  14449# A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
  14450#     zero, it is the ls digit.  Put the digit in its place in the	#
  14451#     upper word of d0.  If it is the ls digit, write the word		#
  14452#     from d0 to memory.						#
  14453#									#
  14454# A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
  14455#									#
  14456#########################################################################
  14457
  14458#	Implementation Notes:
  14459#
  14460#	The registers are used as follows:
  14461#
  14462#		d0: LEN counter
  14463#		d1: temp used to form the digit
  14464#		d2: upper 32-bits of fraction for mul by 8
  14465#		d3: lower 32-bits of fraction for mul by 8
  14466#		d4: upper 32-bits of fraction for mul by 2
  14467#		d5: lower 32-bits of fraction for mul by 2
  14468#		d6: temp for bit-field extracts
  14469#		d7: byte digit formation word;digit count {0,1}
  14470#		a0: pointer into memory for packed bcd string formation
  14471#
  14472
  14473	global		binstr
  14474binstr:
  14475	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
  14476
  14477#
  14478# A1: Init d7
  14479#
  14480	mov.l		&1,%d7		# init d7 for second digit
  14481	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
  14482#
  14483# A2. Copy d2:d3 to d4:d5.  Start loop.
  14484#
  14485loop:
  14486	mov.l		%d2,%d4		# copy the fraction before muls
  14487	mov.l		%d3,%d5		# to d4:d5
  14488#
  14489# A3. Multiply d2:d3 by 8; extract msbs into d1.
  14490#
  14491	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
  14492	asl.l		&3,%d2		# shift d2 left by 3 places
  14493	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
  14494	asl.l		&3,%d3		# shift d3 left by 3 places
  14495	or.l		%d6,%d2		# or in msbs from d3 into d2
  14496#
  14497# A4. Multiply d4:d5 by 2; add carry out to d1.
  14498#
  14499	asl.l		&1,%d5		# mul d5 by 2
  14500	roxl.l		&1,%d4		# mul d4 by 2
  14501	swap		%d6		# put 0 in d6 lower word
  14502	addx.w		%d6,%d1		# add in extend from mul by 2
  14503#
  14504# A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
  14505#
  14506	add.l		%d5,%d3		# add lower 32 bits
  14507	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
  14508	addx.l		%d4,%d2		# add with extend upper 32 bits
  14509	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
  14510	addx.w		%d6,%d1		# add in extend from add to d1
  14511	swap		%d6		# with d6 = 0; put 0 in upper word
  14512#
  14513# A6. Test d7 and branch.
  14514#
  14515	tst.w		%d7		# if zero, store digit & to loop
  14516	beq.b		first_d		# if non-zero, form byte & write
  14517sec_d:
  14518	swap		%d7		# bring first digit to word d7b
  14519	asl.w		&4,%d7		# first digit in upper 4 bits d7b
  14520	add.w		%d1,%d7		# add in ls digit to d7b
  14521	mov.b		%d7,(%a0)+	# store d7b byte in memory
  14522	swap		%d7		# put LEN counter in word d7a
  14523	clr.w		%d7		# set d7a to signal no digits done
  14524	dbf.w		%d0,loop	# do loop some more!
  14525	bra.b		end_bstr	# finished, so exit
  14526first_d:
  14527	swap		%d7		# put digit word in d7b
  14528	mov.w		%d1,%d7		# put new digit in d7b
  14529	swap		%d7		# put LEN counter in word d7a
  14530	addq.w		&1,%d7		# set d7a to signal first digit done
  14531	dbf.w		%d0,loop	# do loop some more!
  14532	swap		%d7		# put last digit in string
  14533	lsl.w		&4,%d7		# move it to upper 4 bits
  14534	mov.b		%d7,(%a0)+	# store it in memory string
  14535#
  14536# Clean up and return with result in fp0.
  14537#
  14538end_bstr:
  14539	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
  14540	rts
  14541
  14542#########################################################################
  14543# XDEF ****************************************************************	#
  14544#	facc_in_b(): dmem_read_byte failed				#
  14545#	facc_in_w(): dmem_read_word failed				#
  14546#	facc_in_l(): dmem_read_long failed				#
  14547#	facc_in_d(): dmem_read of dbl prec failed			#
  14548#	facc_in_x(): dmem_read of ext prec failed			#
  14549#									#
  14550#	facc_out_b(): dmem_write_byte failed				#
  14551#	facc_out_w(): dmem_write_word failed				#
  14552#	facc_out_l(): dmem_write_long failed				#
  14553#	facc_out_d(): dmem_write of dbl prec failed			#
  14554#	facc_out_x(): dmem_write of ext prec failed			#
  14555#									#
  14556# XREF ****************************************************************	#
  14557#	_real_access() - exit through access error handler		#
  14558#									#
  14559# INPUT ***************************************************************	#
  14560#	None								#
  14561#									#
  14562# OUTPUT **************************************************************	#
  14563#	None								#
  14564#									#
  14565# ALGORITHM ***********************************************************	#
  14566#	Flow jumps here when an FP data fetch call gets an error	#
  14567# result. This means the operating system wants an access error frame	#
  14568# made out of the current exception stack frame.			#
  14569#	So, we first call restore() which makes sure that any updated	#
  14570# -(an)+ register gets returned to its pre-exception value and then	#
  14571# we change the stack to an access error stack frame.			#
  14572#									#
  14573#########################################################################
  14574
  14575facc_in_b:
  14576	movq.l		&0x1,%d0			# one byte
  14577	bsr.w		restore				# fix An
  14578
  14579	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
  14580	bra.w		facc_finish
  14581
  14582facc_in_w:
  14583	movq.l		&0x2,%d0			# two bytes
  14584	bsr.w		restore				# fix An
  14585
  14586	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
  14587	bra.b		facc_finish
  14588
  14589facc_in_l:
  14590	movq.l		&0x4,%d0			# four bytes
  14591	bsr.w		restore				# fix An
  14592
  14593	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
  14594	bra.b		facc_finish
  14595
  14596facc_in_d:
  14597	movq.l		&0x8,%d0			# eight bytes
  14598	bsr.w		restore				# fix An
  14599
  14600	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
  14601	bra.b		facc_finish
  14602
  14603facc_in_x:
  14604	movq.l		&0xc,%d0			# twelve bytes
  14605	bsr.w		restore				# fix An
  14606
  14607	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
  14608	bra.b		facc_finish
  14609
  14610################################################################
  14611
  14612facc_out_b:
  14613	movq.l		&0x1,%d0			# one byte
  14614	bsr.w		restore				# restore An
  14615
  14616	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
  14617	bra.b		facc_finish
  14618
  14619facc_out_w:
  14620	movq.l		&0x2,%d0			# two bytes
  14621	bsr.w		restore				# restore An
  14622
  14623	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
  14624	bra.b		facc_finish
  14625
  14626facc_out_l:
  14627	movq.l		&0x4,%d0			# four bytes
  14628	bsr.w		restore				# restore An
  14629
  14630	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
  14631	bra.b		facc_finish
  14632
  14633facc_out_d:
  14634	movq.l		&0x8,%d0			# eight bytes
  14635	bsr.w		restore				# restore An
  14636
  14637	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
  14638	bra.b		facc_finish
  14639
  14640facc_out_x:
  14641	mov.l		&0xc,%d0			# twelve bytes
  14642	bsr.w		restore				# restore An
  14643
  14644	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
  14645
  14646# here's where we actually create the access error frame from the
  14647# current exception stack frame.
  14648facc_finish:
  14649	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
  14650
  14651	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
  14652	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  14653	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
  14654
  14655	unlk		%a6
  14656
  14657	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
  14658	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
  14659	mov.l		0xc(%sp),0x8(%sp)	# store EA
  14660	mov.l		&0x00000001,0xc(%sp)	# store FSLW
  14661	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
  14662	mov.w		&0x4008,0x6(%sp)	# store voff
  14663
  14664	btst		&0x5,(%sp)		# supervisor or user mode?
  14665	beq.b		facc_out2		# user
  14666	bset		&0x2,0xd(%sp)		# set supervisor TM bit
  14667
  14668facc_out2:
  14669	bra.l		_real_access
  14670
  14671##################################################################
  14672
  14673# if the effective addressing mode was predecrement or postincrement,
  14674# the emulation has already changed its value to the correct post-
  14675# instruction value. but since we're exiting to the access error
  14676# handler, then AN must be returned to its pre-instruction value.
  14677# we do that here.
  14678restore:
  14679	mov.b		EXC_OPWORD+0x1(%a6),%d1
  14680	andi.b		&0x38,%d1		# extract opmode
  14681	cmpi.b		%d1,&0x18		# postinc?
  14682	beq.w		rest_inc
  14683	cmpi.b		%d1,&0x20		# predec?
  14684	beq.w		rest_dec
  14685	rts
  14686
  14687rest_inc:
  14688	mov.b		EXC_OPWORD+0x1(%a6),%d1
  14689	andi.w		&0x0007,%d1		# fetch An
  14690
  14691	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
  14692	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
  14693
  14694tbl_rest_inc:
  14695	short		ri_a0 - tbl_rest_inc
  14696	short		ri_a1 - tbl_rest_inc
  14697	short		ri_a2 - tbl_rest_inc
  14698	short		ri_a3 - tbl_rest_inc
  14699	short		ri_a4 - tbl_rest_inc
  14700	short		ri_a5 - tbl_rest_inc
  14701	short		ri_a6 - tbl_rest_inc
  14702	short		ri_a7 - tbl_rest_inc
  14703
  14704ri_a0:
  14705	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
  14706	rts
  14707ri_a1:
  14708	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
  14709	rts
  14710ri_a2:
  14711	sub.l		%d0,%a2			# fix a2
  14712	rts
  14713ri_a3:
  14714	sub.l		%d0,%a3			# fix a3
  14715	rts
  14716ri_a4:
  14717	sub.l		%d0,%a4			# fix a4
  14718	rts
  14719ri_a5:
  14720	sub.l		%d0,%a5			# fix a5
  14721	rts
  14722ri_a6:
  14723	sub.l		%d0,(%a6)		# fix stacked a6
  14724	rts
  14725# if it's a fmove out instruction, we don't have to fix a7
  14726# because we hadn't changed it yet. if it's an opclass two
  14727# instruction (data moved in) and the exception was in supervisor
  14728# mode, then also also wasn't updated. if it was user mode, then
  14729# restore the correct a7 which is in the USP currently.
  14730ri_a7:
  14731	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
  14732	bne.b		ri_a7_done		# out
  14733
  14734	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
  14735	bne.b		ri_a7_done		# supervisor
  14736	movc		%usp,%a0		# restore USP
  14737	sub.l		%d0,%a0
  14738	movc		%a0,%usp
  14739ri_a7_done:
  14740	rts
  14741
  14742# need to invert adjustment value if the <ea> was predec
  14743rest_dec:
  14744	neg.l		%d0
  14745	bra.b		rest_inc