cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

fpsp.S (761577B)


      1~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      2MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
      3M68000 Hi-Performance Microprocessor Division
      4M68060 Software Package
      5Production Release P1.00 -- October 10, 1994
      6
      7M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
      8
      9THE SOFTWARE is provided on an "AS IS" basis and without warranty.
     10To the maximum extent permitted by applicable law,
     11MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
     12INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
     13and any warranty against infringement with regard to the SOFTWARE
     14(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
     15
     16To the maximum extent permitted by applicable law,
     17IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
     18(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
     19BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
     20ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
     21Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
     22
     23You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
     24so long as this entire notice is retained without alteration in any modified and/or
     25redistributed versions, and that such modified versions are clearly identified as such.
     26No licenses are granted by implication, estoppel or otherwise under any patents
     27or trademarks of Motorola, Inc.
     28~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     29#
     30# freal.s:
     31#	This file is appended to the top of the 060FPSP package
     32# and contains the entry points into the package. The user, in
     33# effect, branches to one of the branch table entries located
     34# after _060FPSP_TABLE.
     35#	Also, subroutine stubs exist in this file (_fpsp_done for
     36# example) that are referenced by the FPSP package itself in order
     37# to call a given routine. The stub routine actually performs the
     38# callout. The FPSP code does a "bsr" to the stub routine. This
     39# extra layer of hierarchy adds a slight performance penalty but
     40# it makes the FPSP code easier to read and more mainatinable.
     41#
     42
     43set	_off_bsun,	0x00
     44set	_off_snan,	0x04
     45set	_off_operr,	0x08
     46set	_off_ovfl,	0x0c
     47set	_off_unfl,	0x10
     48set	_off_dz,	0x14
     49set	_off_inex,	0x18
     50set	_off_fline,	0x1c
     51set	_off_fpu_dis,	0x20
     52set	_off_trap,	0x24
     53set	_off_trace,	0x28
     54set	_off_access,	0x2c
     55set	_off_done,	0x30
     56
     57set	_off_imr,	0x40
     58set	_off_dmr,	0x44
     59set	_off_dmw,	0x48
     60set	_off_irw,	0x4c
     61set	_off_irl,	0x50
     62set	_off_drb,	0x54
     63set	_off_drw,	0x58
     64set	_off_drl,	0x5c
     65set	_off_dwb,	0x60
     66set	_off_dww,	0x64
     67set	_off_dwl,	0x68
     68
     69_060FPSP_TABLE:
     70
     71###############################################################
     72
     73# Here's the table of ENTRY POINTS for those linking the package.
     74	bra.l		_fpsp_snan
     75	short		0x0000
     76	bra.l		_fpsp_operr
     77	short		0x0000
     78	bra.l		_fpsp_ovfl
     79	short		0x0000
     80	bra.l		_fpsp_unfl
     81	short		0x0000
     82	bra.l		_fpsp_dz
     83	short		0x0000
     84	bra.l		_fpsp_inex
     85	short		0x0000
     86	bra.l		_fpsp_fline
     87	short		0x0000
     88	bra.l		_fpsp_unsupp
     89	short		0x0000
     90	bra.l		_fpsp_effadd
     91	short		0x0000
     92
     93	space		56
     94
     95###############################################################
     96	global		_fpsp_done
     97_fpsp_done:
     98	mov.l		%d0,-(%sp)
     99	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
    100	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    101	mov.l		0x4(%sp),%d0
    102	rtd		&0x4
    103
    104	global		_real_ovfl
    105_real_ovfl:
    106	mov.l		%d0,-(%sp)
    107	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
    108	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    109	mov.l		0x4(%sp),%d0
    110	rtd		&0x4
    111
    112	global		_real_unfl
    113_real_unfl:
    114	mov.l		%d0,-(%sp)
    115	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
    116	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    117	mov.l		0x4(%sp),%d0
    118	rtd		&0x4
    119
    120	global		_real_inex
    121_real_inex:
    122	mov.l		%d0,-(%sp)
    123	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
    124	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    125	mov.l		0x4(%sp),%d0
    126	rtd		&0x4
    127
    128	global		_real_bsun
    129_real_bsun:
    130	mov.l		%d0,-(%sp)
    131	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
    132	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    133	mov.l		0x4(%sp),%d0
    134	rtd		&0x4
    135
    136	global		_real_operr
    137_real_operr:
    138	mov.l		%d0,-(%sp)
    139	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
    140	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    141	mov.l		0x4(%sp),%d0
    142	rtd		&0x4
    143
    144	global		_real_snan
    145_real_snan:
    146	mov.l		%d0,-(%sp)
    147	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
    148	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    149	mov.l		0x4(%sp),%d0
    150	rtd		&0x4
    151
    152	global		_real_dz
    153_real_dz:
    154	mov.l		%d0,-(%sp)
    155	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
    156	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    157	mov.l		0x4(%sp),%d0
    158	rtd		&0x4
    159
    160	global		_real_fline
    161_real_fline:
    162	mov.l		%d0,-(%sp)
    163	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
    164	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    165	mov.l		0x4(%sp),%d0
    166	rtd		&0x4
    167
    168	global		_real_fpu_disabled
    169_real_fpu_disabled:
    170	mov.l		%d0,-(%sp)
    171	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
    172	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    173	mov.l		0x4(%sp),%d0
    174	rtd		&0x4
    175
    176	global		_real_trap
    177_real_trap:
    178	mov.l		%d0,-(%sp)
    179	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
    180	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    181	mov.l		0x4(%sp),%d0
    182	rtd		&0x4
    183
    184	global		_real_trace
    185_real_trace:
    186	mov.l		%d0,-(%sp)
    187	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
    188	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    189	mov.l		0x4(%sp),%d0
    190	rtd		&0x4
    191
    192	global		_real_access
    193_real_access:
    194	mov.l		%d0,-(%sp)
    195	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
    196	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    197	mov.l		0x4(%sp),%d0
    198	rtd		&0x4
    199
    200#######################################
    201
    202	global		_imem_read
    203_imem_read:
    204	mov.l		%d0,-(%sp)
    205	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
    206	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    207	mov.l		0x4(%sp),%d0
    208	rtd		&0x4
    209
    210	global		_dmem_read
    211_dmem_read:
    212	mov.l		%d0,-(%sp)
    213	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
    214	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    215	mov.l		0x4(%sp),%d0
    216	rtd		&0x4
    217
    218	global		_dmem_write
    219_dmem_write:
    220	mov.l		%d0,-(%sp)
    221	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
    222	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    223	mov.l		0x4(%sp),%d0
    224	rtd		&0x4
    225
    226	global		_imem_read_word
    227_imem_read_word:
    228	mov.l		%d0,-(%sp)
    229	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
    230	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    231	mov.l		0x4(%sp),%d0
    232	rtd		&0x4
    233
    234	global		_imem_read_long
    235_imem_read_long:
    236	mov.l		%d0,-(%sp)
    237	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
    238	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    239	mov.l		0x4(%sp),%d0
    240	rtd		&0x4
    241
    242	global		_dmem_read_byte
    243_dmem_read_byte:
    244	mov.l		%d0,-(%sp)
    245	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
    246	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    247	mov.l		0x4(%sp),%d0
    248	rtd		&0x4
    249
    250	global		_dmem_read_word
    251_dmem_read_word:
    252	mov.l		%d0,-(%sp)
    253	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
    254	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    255	mov.l		0x4(%sp),%d0
    256	rtd		&0x4
    257
    258	global		_dmem_read_long
    259_dmem_read_long:
    260	mov.l		%d0,-(%sp)
    261	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
    262	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    263	mov.l		0x4(%sp),%d0
    264	rtd		&0x4
    265
    266	global		_dmem_write_byte
    267_dmem_write_byte:
    268	mov.l		%d0,-(%sp)
    269	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
    270	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    271	mov.l		0x4(%sp),%d0
    272	rtd		&0x4
    273
    274	global		_dmem_write_word
    275_dmem_write_word:
    276	mov.l		%d0,-(%sp)
    277	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
    278	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    279	mov.l		0x4(%sp),%d0
    280	rtd		&0x4
    281
    282	global		_dmem_write_long
    283_dmem_write_long:
    284	mov.l		%d0,-(%sp)
    285	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
    286	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
    287	mov.l		0x4(%sp),%d0
    288	rtd		&0x4
    289
    290#
    291# This file contains a set of define statements for constants
    292# in order to promote readability within the corecode itself.
    293#
    294
    295set LOCAL_SIZE,		192			# stack frame size(bytes)
    296set LV,			-LOCAL_SIZE		# stack offset
    297
    298set EXC_SR,		0x4			# stack status register
    299set EXC_PC,		0x6			# stack pc
    300set EXC_VOFF,		0xa			# stacked vector offset
    301set EXC_EA,		0xc			# stacked <ea>
    302
    303set EXC_FP,		0x0			# frame pointer
    304
    305set EXC_AREGS,		-68			# offset of all address regs
    306set EXC_DREGS,		-100			# offset of all data regs
    307set EXC_FPREGS,		-36			# offset of all fp regs
    308
    309set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
    310set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
    311set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
    312set EXC_A5,		EXC_AREGS+(5*4)
    313set EXC_A4,		EXC_AREGS+(4*4)
    314set EXC_A3,		EXC_AREGS+(3*4)
    315set EXC_A2,		EXC_AREGS+(2*4)
    316set EXC_A1,		EXC_AREGS+(1*4)
    317set EXC_A0,		EXC_AREGS+(0*4)
    318set EXC_D7,		EXC_DREGS+(7*4)
    319set EXC_D6,		EXC_DREGS+(6*4)
    320set EXC_D5,		EXC_DREGS+(5*4)
    321set EXC_D4,		EXC_DREGS+(4*4)
    322set EXC_D3,		EXC_DREGS+(3*4)
    323set EXC_D2,		EXC_DREGS+(2*4)
    324set EXC_D1,		EXC_DREGS+(1*4)
    325set EXC_D0,		EXC_DREGS+(0*4)
    326
    327set EXC_FP0,		EXC_FPREGS+(0*12)	# offset of saved fp0
    328set EXC_FP1,		EXC_FPREGS+(1*12)	# offset of saved fp1
    329set EXC_FP2,		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
    330
    331set FP_SCR1,		LV+80			# fp scratch 1
    332set FP_SCR1_EX,		FP_SCR1+0
    333set FP_SCR1_SGN,	FP_SCR1+2
    334set FP_SCR1_HI,		FP_SCR1+4
    335set FP_SCR1_LO,		FP_SCR1+8
    336
    337set FP_SCR0,		LV+68			# fp scratch 0
    338set FP_SCR0_EX,		FP_SCR0+0
    339set FP_SCR0_SGN,	FP_SCR0+2
    340set FP_SCR0_HI,		FP_SCR0+4
    341set FP_SCR0_LO,		FP_SCR0+8
    342
    343set FP_DST,		LV+56			# fp destination operand
    344set FP_DST_EX,		FP_DST+0
    345set FP_DST_SGN,		FP_DST+2
    346set FP_DST_HI,		FP_DST+4
    347set FP_DST_LO,		FP_DST+8
    348
    349set FP_SRC,		LV+44			# fp source operand
    350set FP_SRC_EX,		FP_SRC+0
    351set FP_SRC_SGN,		FP_SRC+2
    352set FP_SRC_HI,		FP_SRC+4
    353set FP_SRC_LO,		FP_SRC+8
    354
    355set USER_FPIAR,		LV+40			# FP instr address register
    356
    357set USER_FPSR,		LV+36			# FP status register
    358set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
    359set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
    360set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
    361set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
    362
    363set USER_FPCR,		LV+32			# FP control register
    364set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
    365set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
    366
    367set L_SCR3,		LV+28			# integer scratch 3
    368set L_SCR2,		LV+24			# integer scratch 2
    369set L_SCR1,		LV+20			# integer scratch 1
    370
    371set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
    372
    373set EXC_TEMP2,		LV+24			# temporary space
    374set EXC_TEMP,		LV+16			# temporary space
    375
    376set DTAG,		LV+15			# destination operand type
    377set STAG,		LV+14			# source operand type
    378
    379set SPCOND_FLG,		LV+10			# flag: special case (see below)
    380
    381set EXC_CC,		LV+8			# saved condition codes
    382set EXC_EXTWPTR,	LV+4			# saved current PC (active)
    383set EXC_EXTWORD,	LV+2			# saved extension word
    384set EXC_CMDREG,		LV+2			# saved extension word
    385set EXC_OPWORD,		LV+0			# saved operation word
    386
    387################################
    388
    389# Helpful macros
    390
    391set FTEMP,		0			# offsets within an
    392set FTEMP_EX,		0			# extended precision
    393set FTEMP_SGN,		2			# value saved in memory.
    394set FTEMP_HI,		4
    395set FTEMP_LO,		8
    396set FTEMP_GRS,		12
    397
    398set LOCAL,		0			# offsets within an
    399set LOCAL_EX,		0			# extended precision
    400set LOCAL_SGN,		2			# value saved in memory.
    401set LOCAL_HI,		4
    402set LOCAL_LO,		8
    403set LOCAL_GRS,		12
    404
    405set DST,		0			# offsets within an
    406set DST_EX,		0			# extended precision
    407set DST_HI,		4			# value saved in memory.
    408set DST_LO,		8
    409
    410set SRC,		0			# offsets within an
    411set SRC_EX,		0			# extended precision
    412set SRC_HI,		4			# value saved in memory.
    413set SRC_LO,		8
    414
    415set SGL_LO,		0x3f81			# min sgl prec exponent
    416set SGL_HI,		0x407e			# max sgl prec exponent
    417set DBL_LO,		0x3c01			# min dbl prec exponent
    418set DBL_HI,		0x43fe			# max dbl prec exponent
    419set EXT_LO,		0x0			# min ext prec exponent
    420set EXT_HI,		0x7ffe			# max ext prec exponent
    421
    422set EXT_BIAS,		0x3fff			# extended precision bias
    423set SGL_BIAS,		0x007f			# single precision bias
    424set DBL_BIAS,		0x03ff			# double precision bias
    425
    426set NORM,		0x00			# operand type for STAG/DTAG
    427set ZERO,		0x01			# operand type for STAG/DTAG
    428set INF,		0x02			# operand type for STAG/DTAG
    429set QNAN,		0x03			# operand type for STAG/DTAG
    430set DENORM,		0x04			# operand type for STAG/DTAG
    431set SNAN,		0x05			# operand type for STAG/DTAG
    432set UNNORM,		0x06			# operand type for STAG/DTAG
    433
    434##################
    435# FPSR/FPCR bits #
    436##################
    437set neg_bit,		0x3			# negative result
    438set z_bit,		0x2			# zero result
    439set inf_bit,		0x1			# infinite result
    440set nan_bit,		0x0			# NAN result
    441
    442set q_sn_bit,		0x7			# sign bit of quotient byte
    443
    444set bsun_bit,		7			# branch on unordered
    445set snan_bit,		6			# signalling NAN
    446set operr_bit,		5			# operand error
    447set ovfl_bit,		4			# overflow
    448set unfl_bit,		3			# underflow
    449set dz_bit,		2			# divide by zero
    450set inex2_bit,		1			# inexact result 2
    451set inex1_bit,		0			# inexact result 1
    452
    453set aiop_bit,		7			# accrued inexact operation bit
    454set aovfl_bit,		6			# accrued overflow bit
    455set aunfl_bit,		5			# accrued underflow bit
    456set adz_bit,		4			# accrued dz bit
    457set ainex_bit,		3			# accrued inexact bit
    458
    459#############################
    460# FPSR individual bit masks #
    461#############################
    462set neg_mask,		0x08000000		# negative bit mask (lw)
    463set inf_mask,		0x02000000		# infinity bit mask (lw)
    464set z_mask,		0x04000000		# zero bit mask (lw)
    465set nan_mask,		0x01000000		# nan bit mask (lw)
    466
    467set neg_bmask,		0x08			# negative bit mask (byte)
    468set inf_bmask,		0x02			# infinity bit mask (byte)
    469set z_bmask,		0x04			# zero bit mask (byte)
    470set nan_bmask,		0x01			# nan bit mask (byte)
    471
    472set bsun_mask,		0x00008000		# bsun exception mask
    473set snan_mask,		0x00004000		# snan exception mask
    474set operr_mask,		0x00002000		# operr exception mask
    475set ovfl_mask,		0x00001000		# overflow exception mask
    476set unfl_mask,		0x00000800		# underflow exception mask
    477set dz_mask,		0x00000400		# dz exception mask
    478set inex2_mask,		0x00000200		# inex2 exception mask
    479set inex1_mask,		0x00000100		# inex1 exception mask
    480
    481set aiop_mask,		0x00000080		# accrued illegal operation
    482set aovfl_mask,		0x00000040		# accrued overflow
    483set aunfl_mask,		0x00000020		# accrued underflow
    484set adz_mask,		0x00000010		# accrued divide by zero
    485set ainex_mask,		0x00000008		# accrued inexact
    486
    487######################################
    488# FPSR combinations used in the FPSP #
    489######################################
    490set dzinf_mask,		inf_mask+dz_mask+adz_mask
    491set opnan_mask,		nan_mask+operr_mask+aiop_mask
    492set nzi_mask,		0x01ffffff		#clears N, Z, and I
    493set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
    494set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
    495set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
    496set inx1a_mask,		inex1_mask+ainex_mask
    497set inx2a_mask,		inex2_mask+ainex_mask
    498set snaniop_mask,	nan_mask+snan_mask+aiop_mask
    499set snaniop2_mask,	snan_mask+aiop_mask
    500set naniop_mask,	nan_mask+aiop_mask
    501set neginf_mask,	neg_mask+inf_mask
    502set infaiop_mask,	inf_mask+aiop_mask
    503set negz_mask,		neg_mask+z_mask
    504set opaop_mask,		operr_mask+aiop_mask
    505set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
    506set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
    507
    508#########
    509# misc. #
    510#########
    511set rnd_stky_bit,	29			# stky bit pos in longword
    512
    513set sign_bit,		0x7			# sign bit
    514set signan_bit,		0x6			# signalling nan bit
    515
    516set sgl_thresh,		0x3f81			# minimum sgl exponent
    517set dbl_thresh,		0x3c01			# minimum dbl exponent
    518
    519set x_mode,		0x0			# extended precision
    520set s_mode,		0x4			# single precision
    521set d_mode,		0x8			# double precision
    522
    523set rn_mode,		0x0			# round-to-nearest
    524set rz_mode,		0x1			# round-to-zero
    525set rm_mode,		0x2			# round-tp-minus-infinity
    526set rp_mode,		0x3			# round-to-plus-infinity
    527
    528set mantissalen,	64			# length of mantissa in bits
    529
    530set BYTE,		1			# len(byte) == 1 byte
    531set WORD,		2			# len(word) == 2 bytes
    532set LONG,		4			# len(longword) == 2 bytes
    533
    534set BSUN_VEC,		0xc0			# bsun    vector offset
    535set INEX_VEC,		0xc4			# inexact vector offset
    536set DZ_VEC,		0xc8			# dz      vector offset
    537set UNFL_VEC,		0xcc			# unfl    vector offset
    538set OPERR_VEC,		0xd0			# operr   vector offset
    539set OVFL_VEC,		0xd4			# ovfl    vector offset
    540set SNAN_VEC,		0xd8			# snan    vector offset
    541
    542###########################
    543# SPecial CONDition FLaGs #
    544###########################
    545set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
    546set fbsun_flg,		0x02			# flag bit: bsun exception
    547set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
    548set mda7_flg,		0x08			# flag bit: -(a7) <ea>
    549set fmovm_flg,		0x40			# flag bit: fmovm instruction
    550set immed_flg,		0x80			# flag bit: &<data> <ea>
    551
    552set ftrapcc_bit,	0x0
    553set fbsun_bit,		0x1
    554set mia7_bit,		0x2
    555set mda7_bit,		0x3
    556set immed_bit,		0x7
    557
    558##################################
    559# TRANSCENDENTAL "LAST-OP" FLAGS #
    560##################################
    561set FMUL_OP,		0x0			# fmul instr performed last
    562set FDIV_OP,		0x1			# fdiv performed last
    563set FADD_OP,		0x2			# fadd performed last
    564set FMOV_OP,		0x3			# fmov performed last
    565
    566#############
    567# CONSTANTS #
    568#############
    569T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
    570T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
    571
    572PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
    573PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
    574
    575TWOBYPI:
    576	long		0x3FE45F30,0x6DC9C883
    577
    578#########################################################################
    579# XDEF ****************************************************************	#
    580#	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
    581#									#
    582#	This handler should be the first code executed upon taking the	#
    583#	FP Overflow exception in an operating system.			#
    584#									#
    585# XREF ****************************************************************	#
    586#	_imem_read_long() - read instruction longword			#
    587#	fix_skewed_ops() - adjust src operand in fsave frame		#
    588#	set_tag_x() - determine optype of src/dst operands		#
    589#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
    590#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
    591#	load_fpn2() - load dst operand from FP regfile			#
    592#	fout() - emulate an opclass 3 instruction			#
    593#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
    594#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
    595#	_real_ovfl() - "callout" for Overflow exception enabled code	#
    596#	_real_inex() - "callout" for Inexact exception enabled code	#
    597#	_real_trace() - "callout" for Trace exception code		#
    598#									#
    599# INPUT ***************************************************************	#
    600#	- The system stack contains the FP Ovfl exception stack frame	#
    601#	- The fsave frame contains the source operand			#
    602#									#
    603# OUTPUT **************************************************************	#
    604#	Overflow Exception enabled:					#
    605#	- The system stack is unchanged					#
    606#	- The fsave frame contains the adjusted src op for opclass 0,2	#
    607#	Overflow Exception disabled:					#
    608#	- The system stack is unchanged					#
    609#	- The "exception present" flag in the fsave frame is cleared	#
    610#									#
    611# ALGORITHM ***********************************************************	#
    612#	On the 060, if an FP overflow is present as the result of any	#
    613# instruction, the 060 will take an overflow exception whether the	#
    614# exception is enabled or disabled in the FPCR. For the disabled case,	#
    615# This handler emulates the instruction to determine what the correct	#
    616# default result should be for the operation. This default result is	#
    617# then stored in either the FP regfile, data regfile, or memory.	#
    618# Finally, the handler exits through the "callout" _fpsp_done()		#
    619# denoting that no exceptional conditions exist within the machine.	#
    620#	If the exception is enabled, then this handler must create the	#
    621# exceptional operand and plave it in the fsave state frame, and store	#
    622# the default result (only if the instruction is opclass 3). For	#
    623# exceptions enabled, this handler must exit through the "callout"	#
    624# _real_ovfl() so that the operating system enabled overflow handler	#
    625# can handle this case.							#
    626#	Two other conditions exist. First, if overflow was disabled	#
    627# but the inexact exception was enabled, this handler must exit		#
    628# through the "callout" _real_inex() regardless of whether the result	#
    629# was inexact.								#
    630#	Also, in the case of an opclass three instruction where		#
    631# overflow was disabled and the trace exception was enabled, this	#
    632# handler must exit through the "callout" _real_trace().		#
    633#									#
    634#########################################################################
    635
    636	global		_fpsp_ovfl
    637_fpsp_ovfl:
    638
    639#$#	sub.l		&24,%sp			# make room for src/dst
    640
    641	link.w		%a6,&-LOCAL_SIZE	# init stack frame
    642
    643	fsave		FP_SRC(%a6)		# grab the "busy" frame
    644
    645	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
    646	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
    647	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
    648
    649# the FPIAR holds the "current PC" of the faulting instruction
    650	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
    651	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
    652	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
    653	bsr.l		_imem_read_long		# fetch the instruction words
    654	mov.l		%d0,EXC_OPWORD(%a6)
    655
    656##############################################################################
    657
    658	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
    659	bne.w		fovfl_out
    660
    661
    662	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    663	bsr.l		fix_skewed_ops		# fix src op
    664
    665# since, I believe, only NORMs and DENORMs can come through here,
    666# maybe we can avoid the subroutine call.
    667	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    668	bsr.l		set_tag_x		# tag the operand type
    669	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
    670
    671# bit five of the fp extension word separates the monadic and dyadic operations
    672# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
    673# will never take this exception.
    674	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
    675	beq.b		fovfl_extract		# monadic
    676
    677	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
    678	bsr.l		load_fpn2		# load dst into FP_DST
    679
    680	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
    681	bsr.l		set_tag_x		# tag the operand type
    682	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
    683	bne.b		fovfl_op2_done		# no
    684	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
    685fovfl_op2_done:
    686	mov.b		%d0,DTAG(%a6)		# save dst optype tag
    687
    688fovfl_extract:
    689
    690#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
    691#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
    692#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
    693#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
    694#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
    695#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
    696
    697	clr.l		%d0
    698	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
    699
    700	mov.b		1+EXC_CMDREG(%a6),%d1
    701	andi.w		&0x007f,%d1		# extract extension
    702
    703	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
    704
    705	fmov.l		&0x0,%fpcr		# zero current control regs
    706	fmov.l		&0x0,%fpsr
    707
    708	lea		FP_SRC(%a6),%a0
    709	lea		FP_DST(%a6),%a1
    710
    711# maybe we can make these entry points ONLY the OVFL entry points of each routine.
    712	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
    713	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
    714
    715# the operation has been emulated. the result is in fp0.
    716# the EXOP, if an exception occurred, is in fp1.
    717# we must save the default result regardless of whether
    718# traps are enabled or disabled.
    719	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
    720	bsr.l		store_fpreg
    721
    722# the exceptional possibilities we have left ourselves with are ONLY overflow
    723# and inexact. and, the inexact is such that overflow occurred and was disabled
    724# but inexact was enabled.
    725	btst		&ovfl_bit,FPCR_ENABLE(%a6)
    726	bne.b		fovfl_ovfl_on
    727
    728	btst		&inex2_bit,FPCR_ENABLE(%a6)
    729	bne.b		fovfl_inex_on
    730
    731	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    732	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    733	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    734
    735	unlk		%a6
    736#$#	add.l		&24,%sp
    737	bra.l		_fpsp_done
    738
    739# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
    740# in fp1. now, simply jump to _real_ovfl()!
    741fovfl_ovfl_on:
    742	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
    743
    744	mov.w		&0xe005,2+FP_SRC(%a6)	# save exc status
    745
    746	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    747	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    748	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    749
    750	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
    751
    752	unlk		%a6
    753
    754	bra.l		_real_ovfl
    755
    756# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
    757# we must jump to real_inex().
    758fovfl_inex_on:
    759
    760	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
    761
    762	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
    763	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
    764
    765	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    766	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    767	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    768
    769	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
    770
    771	unlk		%a6
    772
    773	bra.l		_real_inex
    774
    775########################################################################
    776fovfl_out:
    777
    778
    779#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
    780#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
    781#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
    782
    783# the src operand is definitely a NORM(!), so tag it as such
    784	mov.b		&NORM,STAG(%a6)		# set src optype tag
    785
    786	clr.l		%d0
    787	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
    788
    789	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
    790
    791	fmov.l		&0x0,%fpcr		# zero current control regs
    792	fmov.l		&0x0,%fpsr
    793
    794	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
    795
    796	bsr.l		fout
    797
    798	btst		&ovfl_bit,FPCR_ENABLE(%a6)
    799	bne.w		fovfl_ovfl_on
    800
    801	btst		&inex2_bit,FPCR_ENABLE(%a6)
    802	bne.w		fovfl_inex_on
    803
    804	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    805	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    806	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    807
    808	unlk		%a6
    809#$#	add.l		&24,%sp
    810
    811	btst		&0x7,(%sp)		# is trace on?
    812	beq.l		_fpsp_done		# no
    813
    814	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
    815	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
    816	bra.l		_real_trace
    817
    818#########################################################################
    819# XDEF ****************************************************************	#
    820#	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
    821#									#
    822#	This handler should be the first code executed upon taking the	#
    823#	FP Underflow exception in an operating system.			#
    824#									#
    825# XREF ****************************************************************	#
    826#	_imem_read_long() - read instruction longword			#
    827#	fix_skewed_ops() - adjust src operand in fsave frame		#
    828#	set_tag_x() - determine optype of src/dst operands		#
    829#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
    830#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
    831#	load_fpn2() - load dst operand from FP regfile			#
    832#	fout() - emulate an opclass 3 instruction			#
    833#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
    834#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
    835#	_real_ovfl() - "callout" for Overflow exception enabled code	#
    836#	_real_inex() - "callout" for Inexact exception enabled code	#
    837#	_real_trace() - "callout" for Trace exception code		#
    838#									#
    839# INPUT ***************************************************************	#
    840#	- The system stack contains the FP Unfl exception stack frame	#
    841#	- The fsave frame contains the source operand			#
    842#									#
    843# OUTPUT **************************************************************	#
    844#	Underflow Exception enabled:					#
    845#	- The system stack is unchanged					#
    846#	- The fsave frame contains the adjusted src op for opclass 0,2	#
    847#	Underflow Exception disabled:					#
    848#	- The system stack is unchanged					#
    849#	- The "exception present" flag in the fsave frame is cleared	#
    850#									#
    851# ALGORITHM ***********************************************************	#
    852#	On the 060, if an FP underflow is present as the result of any	#
    853# instruction, the 060 will take an underflow exception whether the	#
    854# exception is enabled or disabled in the FPCR. For the disabled case,	#
    855# This handler emulates the instruction to determine what the correct	#
    856# default result should be for the operation. This default result is	#
    857# then stored in either the FP regfile, data regfile, or memory.	#
    858# Finally, the handler exits through the "callout" _fpsp_done()		#
    859# denoting that no exceptional conditions exist within the machine.	#
    860#	If the exception is enabled, then this handler must create the	#
    861# exceptional operand and plave it in the fsave state frame, and store	#
    862# the default result (only if the instruction is opclass 3). For	#
    863# exceptions enabled, this handler must exit through the "callout"	#
    864# _real_unfl() so that the operating system enabled overflow handler	#
    865# can handle this case.							#
    866#	Two other conditions exist. First, if underflow was disabled	#
    867# but the inexact exception was enabled and the result was inexact,	#
    868# this handler must exit through the "callout" _real_inex().		#
    869# was inexact.								#
    870#	Also, in the case of an opclass three instruction where		#
    871# underflow was disabled and the trace exception was enabled, this	#
    872# handler must exit through the "callout" _real_trace().		#
    873#									#
    874#########################################################################
    875
    876	global		_fpsp_unfl
    877_fpsp_unfl:
    878
    879#$#	sub.l		&24,%sp			# make room for src/dst
    880
    881	link.w		%a6,&-LOCAL_SIZE	# init stack frame
    882
    883	fsave		FP_SRC(%a6)		# grab the "busy" frame
    884
    885	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
    886	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
    887	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
    888
    889# the FPIAR holds the "current PC" of the faulting instruction
    890	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
    891	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
    892	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
    893	bsr.l		_imem_read_long		# fetch the instruction words
    894	mov.l		%d0,EXC_OPWORD(%a6)
    895
    896##############################################################################
    897
    898	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
    899	bne.w		funfl_out
    900
    901
    902	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    903	bsr.l		fix_skewed_ops		# fix src op
    904
    905	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
    906	bsr.l		set_tag_x		# tag the operand type
    907	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
    908
    909# bit five of the fp ext word separates the monadic and dyadic operations
    910# that can pass through fpsp_unfl(). remember that fcmp, and ftst
    911# will never take this exception.
    912	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
    913	beq.b		funfl_extract		# monadic
    914
    915# now, what's left that's not dyadic is fsincos. we can distinguish it
    916# from all dyadics by the '0110xxx pattern
    917	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
    918	bne.b		funfl_extract		# yes
    919
    920	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
    921	bsr.l		load_fpn2		# load dst into FP_DST
    922
    923	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
    924	bsr.l		set_tag_x		# tag the operand type
    925	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
    926	bne.b		funfl_op2_done		# no
    927	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
    928funfl_op2_done:
    929	mov.b		%d0,DTAG(%a6)		# save dst optype tag
    930
    931funfl_extract:
    932
    933#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
    934#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
    935#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
    936#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
    937#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
    938#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
    939
    940	clr.l		%d0
    941	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
    942
    943	mov.b		1+EXC_CMDREG(%a6),%d1
    944	andi.w		&0x007f,%d1		# extract extension
    945
    946	andi.l		&0x00ff01ff,USER_FPSR(%a6)
    947
    948	fmov.l		&0x0,%fpcr		# zero current control regs
    949	fmov.l		&0x0,%fpsr
    950
    951	lea		FP_SRC(%a6),%a0
    952	lea		FP_DST(%a6),%a1
    953
    954# maybe we can make these entry points ONLY the OVFL entry points of each routine.
    955	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
    956	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
    957
    958	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
    959	bsr.l		store_fpreg
    960
    961# The `060 FPU multiplier hardware is such that if the result of a
    962# multiply operation is the smallest possible normalized number
    963# (0x00000000_80000000_00000000), then the machine will take an
    964# underflow exception. Since this is incorrect, we need to check
    965# if our emulation, after re-doing the operation, decided that
    966# no underflow was called for. We do these checks only in
    967# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
    968# special case will simply exit gracefully with the correct result.
    969
    970# the exceptional possibilities we have left ourselves with are ONLY overflow
    971# and inexact. and, the inexact is such that overflow occurred and was disabled
    972# but inexact was enabled.
    973	btst		&unfl_bit,FPCR_ENABLE(%a6)
    974	bne.b		funfl_unfl_on
    975
    976funfl_chkinex:
    977	btst		&inex2_bit,FPCR_ENABLE(%a6)
    978	bne.b		funfl_inex_on
    979
    980funfl_exit:
    981	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
    982	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
    983	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
    984
    985	unlk		%a6
    986#$#	add.l		&24,%sp
    987	bra.l		_fpsp_done
    988
    989# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
    990# in fp1 (don't forget to save fp0). what to do now?
    991# well, we simply have to get to go to _real_unfl()!
    992funfl_unfl_on:
    993
    994# The `060 FPU multiplier hardware is such that if the result of a
    995# multiply operation is the smallest possible normalized number
    996# (0x00000000_80000000_00000000), then the machine will take an
    997# underflow exception. Since this is incorrect, we check here to see
    998# if our emulation, after re-doing the operation, decided that
    999# no underflow was called for.
   1000	btst		&unfl_bit,FPSR_EXCEPT(%a6)
   1001	beq.w		funfl_chkinex
   1002
   1003funfl_unfl_on2:
   1004	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
   1005
   1006	mov.w		&0xe003,2+FP_SRC(%a6)	# save exc status
   1007
   1008	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   1009	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1010	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1011
   1012	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
   1013
   1014	unlk		%a6
   1015
   1016	bra.l		_real_unfl
   1017
   1018# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
   1019# we must jump to real_inex().
   1020funfl_inex_on:
   1021
   1022# The `060 FPU multiplier hardware is such that if the result of a
   1023# multiply operation is the smallest possible normalized number
   1024# (0x00000000_80000000_00000000), then the machine will take an
   1025# underflow exception.
   1026# But, whether bogus or not, if inexact is enabled AND it occurred,
   1027# then we have to branch to real_inex.
   1028
   1029	btst		&inex2_bit,FPSR_EXCEPT(%a6)
   1030	beq.w		funfl_exit
   1031
   1032funfl_inex_on2:
   1033
   1034	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to stack
   1035
   1036	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
   1037	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
   1038
   1039	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   1040	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1041	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1042
   1043	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
   1044
   1045	unlk		%a6
   1046
   1047	bra.l		_real_inex
   1048
   1049#######################################################################
   1050funfl_out:
   1051
   1052
   1053#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
   1054#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
   1055#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
   1056
   1057# the src operand is definitely a NORM(!), so tag it as such
   1058	mov.b		&NORM,STAG(%a6)		# set src optype tag
   1059
   1060	clr.l		%d0
   1061	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
   1062
   1063	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
   1064
   1065	fmov.l		&0x0,%fpcr		# zero current control regs
   1066	fmov.l		&0x0,%fpsr
   1067
   1068	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   1069
   1070	bsr.l		fout
   1071
   1072	btst		&unfl_bit,FPCR_ENABLE(%a6)
   1073	bne.w		funfl_unfl_on2
   1074
   1075	btst		&inex2_bit,FPCR_ENABLE(%a6)
   1076	bne.w		funfl_inex_on2
   1077
   1078	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   1079	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1080	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1081
   1082	unlk		%a6
   1083#$#	add.l		&24,%sp
   1084
   1085	btst		&0x7,(%sp)		# is trace on?
   1086	beq.l		_fpsp_done		# no
   1087
   1088	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
   1089	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
   1090	bra.l		_real_trace
   1091
   1092#########################################################################
   1093# XDEF ****************************************************************	#
   1094#	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
   1095#		        Data Type" exception.				#
   1096#									#
   1097#	This handler should be the first code executed upon taking the	#
   1098#	FP Unimplemented Data Type exception in an operating system.	#
   1099#									#
   1100# XREF ****************************************************************	#
   1101#	_imem_read_{word,long}() - read instruction word/longword	#
   1102#	fix_skewed_ops() - adjust src operand in fsave frame		#
   1103#	set_tag_x() - determine optype of src/dst operands		#
   1104#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
   1105#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
   1106#	load_fpn2() - load dst operand from FP regfile			#
   1107#	load_fpn1() - load src operand from FP regfile			#
   1108#	fout() - emulate an opclass 3 instruction			#
   1109#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
   1110#	_real_inex() - "callout" to operating system inexact handler	#
   1111#	_fpsp_done() - "callout" for exit; work all done		#
   1112#	_real_trace() - "callout" for Trace enabled exception		#
   1113#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
   1114#	_real_snan() - "callout" for SNAN exception			#
   1115#	_real_operr() - "callout" for OPERR exception			#
   1116#	_real_ovfl() - "callout" for OVFL exception			#
   1117#	_real_unfl() - "callout" for UNFL exception			#
   1118#	get_packed() - fetch packed operand from memory			#
   1119#									#
   1120# INPUT ***************************************************************	#
   1121#	- The system stack contains the "Unimp Data Type" stk frame	#
   1122#	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
   1123#									#
   1124# OUTPUT **************************************************************	#
   1125#	If Inexact exception (opclass 3):				#
   1126#	- The system stack is changed to an Inexact exception stk frame	#
   1127#	If SNAN exception (opclass 3):					#
   1128#	- The system stack is changed to an SNAN exception stk frame	#
   1129#	If OPERR exception (opclass 3):					#
   1130#	- The system stack is changed to an OPERR exception stk frame	#
   1131#	If OVFL exception (opclass 3):					#
   1132#	- The system stack is changed to an OVFL exception stk frame	#
   1133#	If UNFL exception (opclass 3):					#
   1134#	- The system stack is changed to an UNFL exception stack frame	#
   1135#	If Trace exception enabled:					#
   1136#	- The system stack is changed to a Trace exception stack frame	#
   1137#	Else: (normal case)						#
   1138#	- Correct result has been stored as appropriate			#
   1139#									#
   1140# ALGORITHM ***********************************************************	#
   1141#	Two main instruction types can enter here: (1) DENORM or UNNORM	#
   1142# unimplemented data types. These can be either opclass 0,2 or 3	#
   1143# instructions, and (2) PACKED unimplemented data format instructions	#
   1144# also of opclasses 0,2, or 3.						#
   1145#	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
   1146# operand from the fsave state frame and the dst operand (if dyadic)	#
   1147# from the FP register file. The instruction is then emulated by	#
   1148# choosing an emulation routine from a table of routines indexed by	#
   1149# instruction type. Once the instruction has been emulated and result	#
   1150# saved, then we check to see if any enabled exceptions resulted from	#
   1151# instruction emulation. If none, then we exit through the "callout"	#
   1152# _fpsp_done(). If there is an enabled FP exception, then we insert	#
   1153# this exception into the FPU in the fsave state frame and then exit	#
   1154# through _fpsp_done().							#
   1155#	PACKED opclass 0 and 2 is similar in how the instruction is	#
   1156# emulated and exceptions handled. The differences occur in how the	#
   1157# handler loads the packed op (by calling get_packed() routine) and	#
   1158# by the fact that a Trace exception could be pending for PACKED ops.	#
   1159# If a Trace exception is pending, then the current exception stack	#
   1160# frame is changed to a Trace exception stack frame and an exit is	#
   1161# made through _real_trace().						#
   1162#	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
   1163# performed by calling the routine fout(). If no exception should occur	#
   1164# as the result of emulation, then an exit either occurs through	#
   1165# _fpsp_done() or through _real_trace() if a Trace exception is pending	#
   1166# (a Trace stack frame must be created here, too). If an FP exception	#
   1167# should occur, then we must create an exception stack frame of that	#
   1168# type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
   1169# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3	#
   1170# emulation is performed in a similar manner.				#
   1171#									#
   1172#########################################################################
   1173
   1174#
   1175# (1) DENORM and UNNORM (unimplemented) data types:
   1176#
   1177#				post-instruction
   1178#				*****************
   1179#				*      EA	*
   1180#	 pre-instruction	*		*
   1181#	*****************	*****************
   1182#	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
   1183#	*****************	*****************
   1184#	*     Next	*	*     Next	*
   1185#	*      PC	*	*      PC	*
   1186#	*****************	*****************
   1187#	*      SR	*	*      SR	*
   1188#	*****************	*****************
   1189#
   1190# (2) PACKED format (unsupported) opclasses two and three:
   1191#	*****************
   1192#	*      EA	*
   1193#	*		*
   1194#	*****************
   1195#	* 0x2 *  0x0dc	*
   1196#	*****************
   1197#	*     Next	*
   1198#	*      PC	*
   1199#	*****************
   1200#	*      SR	*
   1201#	*****************
   1202#
   1203	global		_fpsp_unsupp
   1204_fpsp_unsupp:
   1205
   1206	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   1207
   1208	fsave		FP_SRC(%a6)		# save fp state
   1209
   1210	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   1211	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   1212	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   1213
   1214	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
   1215	bne.b		fu_s
   1216fu_u:
   1217	mov.l		%usp,%a0		# fetch user stack pointer
   1218	mov.l		%a0,EXC_A7(%a6)		# save on stack
   1219	bra.b		fu_cont
   1220# if the exception is an opclass zero or two unimplemented data type
   1221# exception, then the a7' calculated here is wrong since it doesn't
   1222# stack an ea. however, we don't need an a7' for this case anyways.
   1223fu_s:
   1224	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
   1225	mov.l		%a0,EXC_A7(%a6)		# save on stack
   1226
   1227fu_cont:
   1228
   1229# the FPIAR holds the "current PC" of the faulting instruction
   1230# the FPIAR should be set correctly for ALL exceptions passing through
   1231# this point.
   1232	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   1233	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   1234	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   1235	bsr.l		_imem_read_long		# fetch the instruction words
   1236	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
   1237
   1238############################
   1239
   1240	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
   1241
   1242# Separate opclass three (fpn-to-mem) ops since they have a different
   1243# stack frame and protocol.
   1244	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
   1245	bne.w		fu_out			# yes
   1246
   1247# Separate packed opclass two instructions.
   1248	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
   1249	cmpi.b		%d0,&0x13
   1250	beq.w		fu_in_pack
   1251
   1252
   1253# I'm not sure at this point what FPSR bits are valid for this instruction.
   1254# so, since the emulation routines re-create them anyways, zero exception field
   1255	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
   1256
   1257	fmov.l		&0x0,%fpcr		# zero current control regs
   1258	fmov.l		&0x0,%fpsr
   1259
   1260# Opclass two w/ memory-to-fpn operation will have an incorrect extended
   1261# precision format if the src format was single or double and the
   1262# source data type was an INF, NAN, DENORM, or UNNORM
   1263	lea		FP_SRC(%a6),%a0		# pass ptr to input
   1264	bsr.l		fix_skewed_ops
   1265
   1266# we don't know whether the src operand or the dst operand (or both) is the
   1267# UNNORM or DENORM. call the function that tags the operand type. if the
   1268# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
   1269	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   1270	bsr.l		set_tag_x		# tag the operand type
   1271	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   1272	bne.b		fu_op2			# no
   1273	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   1274
   1275fu_op2:
   1276	mov.b		%d0,STAG(%a6)		# save src optype tag
   1277
   1278	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1279
   1280# bit five of the fp extension word separates the monadic and dyadic operations
   1281# at this point
   1282	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   1283	beq.b		fu_extract		# monadic
   1284	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
   1285	beq.b		fu_extract		# yes, so it's monadic, too
   1286
   1287	bsr.l		load_fpn2		# load dst into FP_DST
   1288
   1289	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   1290	bsr.l		set_tag_x		# tag the operand type
   1291	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   1292	bne.b		fu_op2_done		# no
   1293	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   1294fu_op2_done:
   1295	mov.b		%d0,DTAG(%a6)		# save dst optype tag
   1296
   1297fu_extract:
   1298	clr.l		%d0
   1299	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   1300
   1301	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
   1302
   1303	lea		FP_SRC(%a6),%a0
   1304	lea		FP_DST(%a6),%a1
   1305
   1306	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
   1307	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   1308
   1309#
   1310# Exceptions in order of precedence:
   1311#	BSUN	: none
   1312#	SNAN	: all dyadic ops
   1313#	OPERR	: fsqrt(-NORM)
   1314#	OVFL	: all except ftst,fcmp
   1315#	UNFL	: all except ftst,fcmp
   1316#	DZ	: fdiv
   1317#	INEX2	: all except ftst,fcmp
   1318#	INEX1	: none (packed doesn't go through here)
   1319#
   1320
   1321# we determine the highest priority exception(if any) set by the
   1322# emulation routine that has also been enabled by the user.
   1323	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
   1324	bne.b		fu_in_ena		# some are enabled
   1325
   1326fu_in_cont:
   1327# fcmp and ftst do not store any result.
   1328	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
   1329	andi.b		&0x38,%d0		# extract bits 3-5
   1330	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
   1331	beq.b		fu_in_exit		# yes
   1332
   1333	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1334	bsr.l		store_fpreg		# store the result
   1335
   1336fu_in_exit:
   1337
   1338	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1339	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1340	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1341
   1342	unlk		%a6
   1343
   1344	bra.l		_fpsp_done
   1345
   1346fu_in_ena:
   1347	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
   1348	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   1349	bne.b		fu_in_exc		# there is at least one set
   1350
   1351#
   1352# No exceptions occurred that were also enabled. Now:
   1353#
   1354#	if (OVFL && ovfl_disabled && inexact_enabled) {
   1355#	    branch to _real_inex() (even if the result was exact!);
   1356#	} else {
   1357#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
   1358#	    return;
   1359#	}
   1360#
   1361	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
   1362	beq.b		fu_in_cont		# no
   1363
   1364fu_in_ovflchk:
   1365	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
   1366	beq.b		fu_in_cont		# no
   1367	bra.w		fu_in_exc_ovfl		# go insert overflow frame
   1368
   1369#
   1370# An exception occurred and that exception was enabled:
   1371#
   1372#	shift enabled exception field into lo byte of d0;
   1373#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
   1374#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
   1375#		/*
   1376#		 * this is the case where we must call _real_inex() now or else
   1377#		 * there will be no other way to pass it the exceptional operand
   1378#		 */
   1379#		call _real_inex();
   1380#	} else {
   1381#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
   1382#	}
   1383#
   1384fu_in_exc:
   1385	subi.l		&24,%d0			# fix offset to be 0-8
   1386	cmpi.b		%d0,&0x6		# is exception INEX? (6)
   1387	bne.b		fu_in_exc_exit		# no
   1388
   1389# the enabled exception was inexact
   1390	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
   1391	bne.w		fu_in_exc_unfl		# yes
   1392	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
   1393	bne.w		fu_in_exc_ovfl		# yes
   1394
   1395# here, we insert the correct fsave status value into the fsave frame for the
   1396# corresponding exception. the operand in the fsave frame should be the original
   1397# src operand.
   1398fu_in_exc_exit:
   1399	mov.l		%d0,-(%sp)		# save d0
   1400	bsr.l		funimp_skew		# skew sgl or dbl inputs
   1401	mov.l		(%sp)+,%d0		# restore d0
   1402
   1403	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
   1404
   1405	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1406	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1407	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1408
   1409	frestore	FP_SRC(%a6)		# restore src op
   1410
   1411	unlk		%a6
   1412
   1413	bra.l		_fpsp_done
   1414
   1415tbl_except:
   1416	short		0xe000,0xe006,0xe004,0xe005
   1417	short		0xe003,0xe002,0xe001,0xe001
   1418
   1419fu_in_exc_unfl:
   1420	mov.w		&0x4,%d0
   1421	bra.b		fu_in_exc_exit
   1422fu_in_exc_ovfl:
   1423	mov.w		&0x03,%d0
   1424	bra.b		fu_in_exc_exit
   1425
   1426# If the input operand to this operation was opclass two and a single
   1427# or double precision denorm, inf, or nan, the operand needs to be
   1428# "corrected" in order to have the proper equivalent extended precision
   1429# number.
   1430	global		fix_skewed_ops
   1431fix_skewed_ops:
   1432	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
   1433	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
   1434	beq.b		fso_sgl			# yes
   1435	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
   1436	beq.b		fso_dbl			# yes
   1437	rts					# no
   1438
   1439fso_sgl:
   1440	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
   1441	andi.w		&0x7fff,%d0		# strip sign
   1442	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
   1443	beq.b		fso_sgl_dnrm_zero	# yes
   1444	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
   1445	beq.b		fso_infnan		# yes
   1446	rts					# no
   1447
   1448fso_sgl_dnrm_zero:
   1449	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
   1450	beq.b		fso_zero		# it's a skewed zero
   1451fso_sgl_dnrm:
   1452# here, we count on norm not to alter a0...
   1453	bsr.l		norm			# normalize mantissa
   1454	neg.w		%d0			# -shft amt
   1455	addi.w		&0x3f81,%d0		# adjust new exponent
   1456	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
   1457	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
   1458	rts
   1459
   1460fso_zero:
   1461	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
   1462	rts
   1463
   1464fso_infnan:
   1465	andi.b		&0x7f,LOCAL_HI(%a0)	# clear j-bit
   1466	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
   1467	rts
   1468
   1469fso_dbl:
   1470	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
   1471	andi.w		&0x7fff,%d0		# strip sign
   1472	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
   1473	beq.b		fso_dbl_dnrm_zero	# yes
   1474	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
   1475	beq.b		fso_infnan		# yes
   1476	rts					# no
   1477
   1478fso_dbl_dnrm_zero:
   1479	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
   1480	bne.b		fso_dbl_dnrm		# it's a skewed denorm
   1481	tst.l		LOCAL_LO(%a0)		# is it a zero?
   1482	beq.b		fso_zero		# yes
   1483fso_dbl_dnrm:
   1484# here, we count on norm not to alter a0...
   1485	bsr.l		norm			# normalize mantissa
   1486	neg.w		%d0			# -shft amt
   1487	addi.w		&0x3c01,%d0		# adjust new exponent
   1488	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
   1489	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
   1490	rts
   1491
   1492#################################################################
   1493
   1494# fmove out took an unimplemented data type exception.
   1495# the src operand is in FP_SRC. Call _fout() to write out the result and
   1496# to determine which exceptions, if any, to take.
   1497fu_out:
   1498
   1499# Separate packed move outs from the UNNORM and DENORM move outs.
   1500	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
   1501	cmpi.b		%d0,&0x3
   1502	beq.w		fu_out_pack
   1503	cmpi.b		%d0,&0x7
   1504	beq.w		fu_out_pack
   1505
   1506
   1507# I'm not sure at this point what FPSR bits are valid for this instruction.
   1508# so, since the emulation routines re-create them anyways, zero exception field.
   1509# fmove out doesn't affect ccodes.
   1510	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
   1511
   1512	fmov.l		&0x0,%fpcr		# zero current control regs
   1513	fmov.l		&0x0,%fpsr
   1514
   1515# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
   1516# call here. just figure out what it is...
   1517	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
   1518	andi.w		&0x7fff,%d0		# strip sign
   1519	beq.b		fu_out_denorm		# it's a DENORM
   1520
   1521	lea		FP_SRC(%a6),%a0
   1522	bsr.l		unnorm_fix		# yes; fix it
   1523
   1524	mov.b		%d0,STAG(%a6)
   1525
   1526	bra.b		fu_out_cont
   1527fu_out_denorm:
   1528	mov.b		&DENORM,STAG(%a6)
   1529fu_out_cont:
   1530
   1531	clr.l		%d0
   1532	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   1533
   1534	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   1535
   1536	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
   1537	bsr.l		fout			# call fmove out routine
   1538
   1539# Exceptions in order of precedence:
   1540#	BSUN	: none
   1541#	SNAN	: none
   1542#	OPERR	: fmove.{b,w,l} out of large UNNORM
   1543#	OVFL	: fmove.{s,d}
   1544#	UNFL	: fmove.{s,d,x}
   1545#	DZ	: none
   1546#	INEX2	: all
   1547#	INEX1	: none (packed doesn't travel through here)
   1548
   1549# determine the highest priority exception(if any) set by the
   1550# emulation routine that has also been enabled by the user.
   1551	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   1552	bne.w		fu_out_ena		# some are enabled
   1553
   1554fu_out_done:
   1555
   1556	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
   1557
   1558# on extended precision opclass three instructions using pre-decrement or
   1559# post-increment addressing mode, the address register is not updated. is the
   1560# address register was the stack pointer used from user mode, then let's update
   1561# it here. if it was used from supervisor mode, then we have to handle this
   1562# as a special case.
   1563	btst		&0x5,EXC_SR(%a6)
   1564	bne.b		fu_out_done_s
   1565
   1566	mov.l		EXC_A7(%a6),%a0		# restore a7
   1567	mov.l		%a0,%usp
   1568
   1569fu_out_done_cont:
   1570	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1571	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1572	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1573
   1574	unlk		%a6
   1575
   1576	btst		&0x7,(%sp)		# is trace on?
   1577	bne.b		fu_out_trace		# yes
   1578
   1579	bra.l		_fpsp_done
   1580
   1581# is the ea mode pre-decrement of the stack pointer from supervisor mode?
   1582# ("fmov.x fpm,-(a7)") if so,
   1583fu_out_done_s:
   1584	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   1585	bne.b		fu_out_done_cont
   1586
   1587# the extended precision result is still in fp0. but, we need to save it
   1588# somewhere on the stack until we can copy it to its final resting place.
   1589# here, we're counting on the top of the stack to be the old place-holders
   1590# for fp0/fp1 which have already been restored. that way, we can write
   1591# over those destinations with the shifted stack frame.
   1592	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
   1593
   1594	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1595	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1596	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1597
   1598	mov.l		(%a6),%a6		# restore frame pointer
   1599
   1600	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   1601	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   1602
   1603# now, copy the result to the proper place on the stack
   1604	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
   1605	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
   1606	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
   1607
   1608	add.l		&LOCAL_SIZE-0x8,%sp
   1609
   1610	btst		&0x7,(%sp)
   1611	bne.b		fu_out_trace
   1612
   1613	bra.l		_fpsp_done
   1614
   1615fu_out_ena:
   1616	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
   1617	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   1618	bne.b		fu_out_exc		# there is at least one set
   1619
   1620# no exceptions were set.
   1621# if a disabled overflow occurred and inexact was enabled but the result
   1622# was exact, then a branch to _real_inex() is made.
   1623	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
   1624	beq.w		fu_out_done		# no
   1625
   1626fu_out_ovflchk:
   1627	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
   1628	beq.w		fu_out_done		# no
   1629	bra.w		fu_inex			# yes
   1630
   1631#
   1632# The fp move out that took the "Unimplemented Data Type" exception was
   1633# being traced. Since the stack frames are similar, get the "current" PC
   1634# from FPIAR and put it in the trace stack frame then jump to _real_trace().
   1635#
   1636#		  UNSUPP FRAME		   TRACE FRAME
   1637#		*****************	*****************
   1638#		*      EA	*	*    Current	*
   1639#		*		*	*      PC	*
   1640#		*****************	*****************
   1641#		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
   1642#		*****************	*****************
   1643#		*     Next	*	*     Next	*
   1644#		*      PC	*	*      PC	*
   1645#		*****************	*****************
   1646#		*      SR	*	*      SR	*
   1647#		*****************	*****************
   1648#
   1649fu_out_trace:
   1650	mov.w		&0x2024,0x6(%sp)
   1651	fmov.l		%fpiar,0x8(%sp)
   1652	bra.l		_real_trace
   1653
   1654# an exception occurred and that exception was enabled.
   1655fu_out_exc:
   1656	subi.l		&24,%d0			# fix offset to be 0-8
   1657
   1658# we don't mess with the existing fsave frame. just re-insert it and
   1659# jump to the "_real_{}()" handler...
   1660	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
   1661	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
   1662
   1663	swbeg		&0x8
   1664tbl_fu_out:
   1665	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
   1666	short		tbl_fu_out	- tbl_fu_out	# SNAN can't happen
   1667	short		fu_operr	- tbl_fu_out	# OPERR
   1668	short		fu_ovfl		- tbl_fu_out	# OVFL
   1669	short		fu_unfl		- tbl_fu_out	# UNFL
   1670	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
   1671	short		fu_inex		- tbl_fu_out	# INEX2
   1672	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
   1673
   1674# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
   1675# frestore it.
   1676fu_snan:
   1677	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1678	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1679	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1680
   1681	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
   1682	mov.w		&0xe006,2+FP_SRC(%a6)
   1683
   1684	frestore	FP_SRC(%a6)
   1685
   1686	unlk		%a6
   1687
   1688
   1689	bra.l		_real_snan
   1690
   1691fu_operr:
   1692	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1693	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1694	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1695
   1696	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
   1697	mov.w		&0xe004,2+FP_SRC(%a6)
   1698
   1699	frestore	FP_SRC(%a6)
   1700
   1701	unlk		%a6
   1702
   1703
   1704	bra.l		_real_operr
   1705
   1706fu_ovfl:
   1707	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
   1708
   1709	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1710	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1711	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1712
   1713	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
   1714	mov.w		&0xe005,2+FP_SRC(%a6)
   1715
   1716	frestore	FP_SRC(%a6)		# restore EXOP
   1717
   1718	unlk		%a6
   1719
   1720	bra.l		_real_ovfl
   1721
   1722# underflow can happen for extended precision. extended precision opclass
   1723# three instruction exceptions don't update the stack pointer. so, if the
   1724# exception occurred from user mode, then simply update a7 and exit normally.
   1725# if the exception occurred from supervisor mode, check if
   1726fu_unfl:
   1727	mov.l		EXC_A6(%a6),(%a6)	# restore a6
   1728
   1729	btst		&0x5,EXC_SR(%a6)
   1730	bne.w		fu_unfl_s
   1731
   1732	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
   1733	mov.l		%a0,%usp		# to or not...
   1734
   1735fu_unfl_cont:
   1736	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
   1737
   1738	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1739	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1740	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1741
   1742	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
   1743	mov.w		&0xe003,2+FP_SRC(%a6)
   1744
   1745	frestore	FP_SRC(%a6)		# restore EXOP
   1746
   1747	unlk		%a6
   1748
   1749	bra.l		_real_unfl
   1750
   1751fu_unfl_s:
   1752	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
   1753	bne.b		fu_unfl_cont
   1754
   1755# the extended precision result is still in fp0. but, we need to save it
   1756# somewhere on the stack until we can copy it to its final resting place
   1757# (where the exc frame is currently). make sure it's not at the top of the
   1758# frame or it will get overwritten when the exc stack frame is shifted "down".
   1759	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
   1760	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
   1761
   1762	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1763	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1764	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1765
   1766	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
   1767	mov.w		&0xe003,2+FP_DST(%a6)
   1768
   1769	frestore	FP_DST(%a6)		# restore EXOP
   1770
   1771	mov.l		(%a6),%a6		# restore frame pointer
   1772
   1773	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   1774	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   1775	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   1776
   1777# now, copy the result to the proper place on the stack
   1778	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
   1779	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
   1780	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
   1781
   1782	add.l		&LOCAL_SIZE-0x8,%sp
   1783
   1784	bra.l		_real_unfl
   1785
   1786# fmove in and out enter here.
   1787fu_inex:
   1788	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
   1789
   1790	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1791	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1792	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1793
   1794	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
   1795	mov.w		&0xe001,2+FP_SRC(%a6)
   1796
   1797	frestore	FP_SRC(%a6)		# restore EXOP
   1798
   1799	unlk		%a6
   1800
   1801
   1802	bra.l		_real_inex
   1803
   1804#########################################################################
   1805#########################################################################
   1806fu_in_pack:
   1807
   1808
   1809# I'm not sure at this point what FPSR bits are valid for this instruction.
   1810# so, since the emulation routines re-create them anyways, zero exception field
   1811	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
   1812
   1813	fmov.l		&0x0,%fpcr		# zero current control regs
   1814	fmov.l		&0x0,%fpsr
   1815
   1816	bsr.l		get_packed		# fetch packed src operand
   1817
   1818	lea		FP_SRC(%a6),%a0		# pass ptr to src
   1819	bsr.l		set_tag_x		# set src optype tag
   1820
   1821	mov.b		%d0,STAG(%a6)		# save src optype tag
   1822
   1823	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1824
   1825# bit five of the fp extension word separates the monadic and dyadic operations
   1826# at this point
   1827	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   1828	beq.b		fu_extract_p		# monadic
   1829	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
   1830	beq.b		fu_extract_p		# yes, so it's monadic, too
   1831
   1832	bsr.l		load_fpn2		# load dst into FP_DST
   1833
   1834	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   1835	bsr.l		set_tag_x		# tag the operand type
   1836	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   1837	bne.b		fu_op2_done_p		# no
   1838	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   1839fu_op2_done_p:
   1840	mov.b		%d0,DTAG(%a6)		# save dst optype tag
   1841
   1842fu_extract_p:
   1843	clr.l		%d0
   1844	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   1845
   1846	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
   1847
   1848	lea		FP_SRC(%a6),%a0
   1849	lea		FP_DST(%a6),%a1
   1850
   1851	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
   1852	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   1853
   1854#
   1855# Exceptions in order of precedence:
   1856#	BSUN	: none
   1857#	SNAN	: all dyadic ops
   1858#	OPERR	: fsqrt(-NORM)
   1859#	OVFL	: all except ftst,fcmp
   1860#	UNFL	: all except ftst,fcmp
   1861#	DZ	: fdiv
   1862#	INEX2	: all except ftst,fcmp
   1863#	INEX1	: all
   1864#
   1865
   1866# we determine the highest priority exception(if any) set by the
   1867# emulation routine that has also been enabled by the user.
   1868	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   1869	bne.w		fu_in_ena_p		# some are enabled
   1870
   1871fu_in_cont_p:
   1872# fcmp and ftst do not store any result.
   1873	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
   1874	andi.b		&0x38,%d0		# extract bits 3-5
   1875	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
   1876	beq.b		fu_in_exit_p		# yes
   1877
   1878	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   1879	bsr.l		store_fpreg		# store the result
   1880
   1881fu_in_exit_p:
   1882
   1883	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
   1884	bne.w		fu_in_exit_s_p		# supervisor
   1885
   1886	mov.l		EXC_A7(%a6),%a0		# update user a7
   1887	mov.l		%a0,%usp
   1888
   1889fu_in_exit_cont_p:
   1890	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1891	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1892	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1893
   1894	unlk		%a6			# unravel stack frame
   1895
   1896	btst		&0x7,(%sp)		# is trace on?
   1897	bne.w		fu_trace_p		# yes
   1898
   1899	bra.l		_fpsp_done		# exit to os
   1900
   1901# the exception occurred in supervisor mode. check to see if the
   1902# addressing mode was (a7)+. if so, we'll need to shift the
   1903# stack frame "up".
   1904fu_in_exit_s_p:
   1905	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
   1906	beq.b		fu_in_exit_cont_p	# no
   1907
   1908	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1909	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1910	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1911
   1912	unlk		%a6			# unravel stack frame
   1913
   1914# shift the stack frame "up". we don't really care about the <ea> field.
   1915	mov.l		0x4(%sp),0x10(%sp)
   1916	mov.l		0x0(%sp),0xc(%sp)
   1917	add.l		&0xc,%sp
   1918
   1919	btst		&0x7,(%sp)		# is trace on?
   1920	bne.w		fu_trace_p		# yes
   1921
   1922	bra.l		_fpsp_done		# exit to os
   1923
   1924fu_in_ena_p:
   1925	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
   1926	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   1927	bne.b		fu_in_exc_p		# at least one was set
   1928
   1929#
   1930# No exceptions occurred that were also enabled. Now:
   1931#
   1932#	if (OVFL && ovfl_disabled && inexact_enabled) {
   1933#	    branch to _real_inex() (even if the result was exact!);
   1934#	} else {
   1935#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
   1936#	    return;
   1937#	}
   1938#
   1939	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
   1940	beq.w		fu_in_cont_p		# no
   1941
   1942fu_in_ovflchk_p:
   1943	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
   1944	beq.w		fu_in_cont_p		# no
   1945	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
   1946
   1947#
   1948# An exception occurred and that exception was enabled:
   1949#
   1950#	shift enabled exception field into lo byte of d0;
   1951#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
   1952#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
   1953#		/*
   1954#		 * this is the case where we must call _real_inex() now or else
   1955#		 * there will be no other way to pass it the exceptional operand
   1956#		 */
   1957#		call _real_inex();
   1958#	} else {
   1959#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
   1960#	}
   1961#
   1962fu_in_exc_p:
   1963	subi.l		&24,%d0			# fix offset to be 0-8
   1964	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
   1965	blt.b		fu_in_exc_exit_p	# no
   1966
   1967# the enabled exception was inexact
   1968	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
   1969	bne.w		fu_in_exc_unfl_p	# yes
   1970	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
   1971	bne.w		fu_in_exc_ovfl_p	# yes
   1972
   1973# here, we insert the correct fsave status value into the fsave frame for the
   1974# corresponding exception. the operand in the fsave frame should be the original
   1975# src operand.
   1976# as a reminder for future predicted pain and agony, we are passing in fsave the
   1977# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
   1978# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
   1979fu_in_exc_exit_p:
   1980	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
   1981	bne.w		fu_in_exc_exit_s_p	# supervisor
   1982
   1983	mov.l		EXC_A7(%a6),%a0		# update user a7
   1984	mov.l		%a0,%usp
   1985
   1986fu_in_exc_exit_cont_p:
   1987	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
   1988
   1989	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   1990	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   1991	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   1992
   1993	frestore	FP_SRC(%a6)		# restore src op
   1994
   1995	unlk		%a6
   1996
   1997	btst		&0x7,(%sp)		# is trace enabled?
   1998	bne.w		fu_trace_p		# yes
   1999
   2000	bra.l		_fpsp_done
   2001
   2002tbl_except_p:
   2003	short		0xe000,0xe006,0xe004,0xe005
   2004	short		0xe003,0xe002,0xe001,0xe001
   2005
   2006fu_in_exc_ovfl_p:
   2007	mov.w		&0x3,%d0
   2008	bra.w		fu_in_exc_exit_p
   2009
   2010fu_in_exc_unfl_p:
   2011	mov.w		&0x4,%d0
   2012	bra.w		fu_in_exc_exit_p
   2013
   2014fu_in_exc_exit_s_p:
   2015	btst		&mia7_bit,SPCOND_FLG(%a6)
   2016	beq.b		fu_in_exc_exit_cont_p
   2017
   2018	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
   2019
   2020	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2021	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2022	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2023
   2024	frestore	FP_SRC(%a6)		# restore src op
   2025
   2026	unlk		%a6			# unravel stack frame
   2027
   2028# shift stack frame "up". who cares about <ea> field.
   2029	mov.l		0x4(%sp),0x10(%sp)
   2030	mov.l		0x0(%sp),0xc(%sp)
   2031	add.l		&0xc,%sp
   2032
   2033	btst		&0x7,(%sp)		# is trace on?
   2034	bne.b		fu_trace_p		# yes
   2035
   2036	bra.l		_fpsp_done		# exit to os
   2037
   2038#
   2039# The opclass two PACKED instruction that took an "Unimplemented Data Type"
   2040# exception was being traced. Make the "current" PC the FPIAR and put it in the
   2041# trace stack frame then jump to _real_trace().
   2042#
   2043#		  UNSUPP FRAME		   TRACE FRAME
   2044#		*****************	*****************
   2045#		*      EA	*	*    Current	*
   2046#		*		*	*      PC	*
   2047#		*****************	*****************
   2048#		* 0x2 *	0x0dc	*	* 0x2 *  0x024	*
   2049#		*****************	*****************
   2050#		*     Next	*	*     Next	*
   2051#		*      PC	*	*      PC	*
   2052#		*****************	*****************
   2053#		*      SR	*	*      SR	*
   2054#		*****************	*****************
   2055fu_trace_p:
   2056	mov.w		&0x2024,0x6(%sp)
   2057	fmov.l		%fpiar,0x8(%sp)
   2058
   2059	bra.l		_real_trace
   2060
   2061#########################################################
   2062#########################################################
   2063fu_out_pack:
   2064
   2065
   2066# I'm not sure at this point what FPSR bits are valid for this instruction.
   2067# so, since the emulation routines re-create them anyways, zero exception field.
   2068# fmove out doesn't affect ccodes.
   2069	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
   2070
   2071	fmov.l		&0x0,%fpcr		# zero current control regs
   2072	fmov.l		&0x0,%fpsr
   2073
   2074	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
   2075	bsr.l		load_fpn1
   2076
   2077# unlike other opclass 3, unimplemented data type exceptions, packed must be
   2078# able to detect all operand types.
   2079	lea		FP_SRC(%a6),%a0
   2080	bsr.l		set_tag_x		# tag the operand type
   2081	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   2082	bne.b		fu_op2_p		# no
   2083	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   2084
   2085fu_op2_p:
   2086	mov.b		%d0,STAG(%a6)		# save src optype tag
   2087
   2088	clr.l		%d0
   2089	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
   2090
   2091	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   2092
   2093	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
   2094	bsr.l		fout			# call fmove out routine
   2095
   2096# Exceptions in order of precedence:
   2097#	BSUN	: no
   2098#	SNAN	: yes
   2099#	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
   2100#	OVFL	: no
   2101#	UNFL	: no
   2102#	DZ	: no
   2103#	INEX2	: yes
   2104#	INEX1	: no
   2105
   2106# determine the highest priority exception(if any) set by the
   2107# emulation routine that has also been enabled by the user.
   2108	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   2109	bne.w		fu_out_ena_p		# some are enabled
   2110
   2111fu_out_exit_p:
   2112	mov.l		EXC_A6(%a6),(%a6)	# restore a6
   2113
   2114	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
   2115	bne.b		fu_out_exit_s_p		# supervisor
   2116
   2117	mov.l		EXC_A7(%a6),%a0		# update user a7
   2118	mov.l		%a0,%usp
   2119
   2120fu_out_exit_cont_p:
   2121	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2122	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2123	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2124
   2125	unlk		%a6			# unravel stack frame
   2126
   2127	btst		&0x7,(%sp)		# is trace on?
   2128	bne.w		fu_trace_p		# yes
   2129
   2130	bra.l		_fpsp_done		# exit to os
   2131
   2132# the exception occurred in supervisor mode. check to see if the
   2133# addressing mode was -(a7). if so, we'll need to shift the
   2134# stack frame "down".
   2135fu_out_exit_s_p:
   2136	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
   2137	beq.b		fu_out_exit_cont_p	# no
   2138
   2139	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2140	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2141	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2142
   2143	mov.l		(%a6),%a6		# restore frame pointer
   2144
   2145	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2146	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2147
   2148# now, copy the result to the proper place on the stack
   2149	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
   2150	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
   2151	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
   2152
   2153	add.l		&LOCAL_SIZE-0x8,%sp
   2154
   2155	btst		&0x7,(%sp)
   2156	bne.w		fu_trace_p
   2157
   2158	bra.l		_fpsp_done
   2159
   2160fu_out_ena_p:
   2161	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
   2162	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   2163	beq.w		fu_out_exit_p
   2164
   2165	mov.l		EXC_A6(%a6),(%a6)	# restore a6
   2166
   2167# an exception occurred and that exception was enabled.
   2168# the only exception possible on packed move out are INEX, OPERR, and SNAN.
   2169fu_out_exc_p:
   2170	cmpi.b		%d0,&0x1a
   2171	bgt.w		fu_inex_p2
   2172	beq.w		fu_operr_p
   2173
   2174fu_snan_p:
   2175	btst		&0x5,EXC_SR(%a6)
   2176	bne.b		fu_snan_s_p
   2177
   2178	mov.l		EXC_A7(%a6),%a0
   2179	mov.l		%a0,%usp
   2180	bra.w		fu_snan
   2181
   2182fu_snan_s_p:
   2183	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2184	bne.w		fu_snan
   2185
   2186# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
   2187# the strategy is to move the exception frame "down" 12 bytes. then, we
   2188# can store the default result where the exception frame was.
   2189	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2190	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2191	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2192
   2193	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
   2194	mov.w		&0xe006,2+FP_SRC(%a6)	# set fsave status
   2195
   2196	frestore	FP_SRC(%a6)		# restore src operand
   2197
   2198	mov.l		(%a6),%a6		# restore frame pointer
   2199
   2200	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2201	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2202	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   2203
   2204# now, we copy the default result to its proper location
   2205	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
   2206	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
   2207	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
   2208
   2209	add.l		&LOCAL_SIZE-0x8,%sp
   2210
   2211
   2212	bra.l		_real_snan
   2213
   2214fu_operr_p:
   2215	btst		&0x5,EXC_SR(%a6)
   2216	bne.w		fu_operr_p_s
   2217
   2218	mov.l		EXC_A7(%a6),%a0
   2219	mov.l		%a0,%usp
   2220	bra.w		fu_operr
   2221
   2222fu_operr_p_s:
   2223	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2224	bne.w		fu_operr
   2225
   2226# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
   2227# the strategy is to move the exception frame "down" 12 bytes. then, we
   2228# can store the default result where the exception frame was.
   2229	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2230	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2231	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2232
   2233	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
   2234	mov.w		&0xe004,2+FP_SRC(%a6)	# set fsave status
   2235
   2236	frestore	FP_SRC(%a6)		# restore src operand
   2237
   2238	mov.l		(%a6),%a6		# restore frame pointer
   2239
   2240	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2241	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2242	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   2243
   2244# now, we copy the default result to its proper location
   2245	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
   2246	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
   2247	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
   2248
   2249	add.l		&LOCAL_SIZE-0x8,%sp
   2250
   2251
   2252	bra.l		_real_operr
   2253
   2254fu_inex_p2:
   2255	btst		&0x5,EXC_SR(%a6)
   2256	bne.w		fu_inex_s_p2
   2257
   2258	mov.l		EXC_A7(%a6),%a0
   2259	mov.l		%a0,%usp
   2260	bra.w		fu_inex
   2261
   2262fu_inex_s_p2:
   2263	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2264	bne.w		fu_inex
   2265
   2266# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
   2267# the strategy is to move the exception frame "down" 12 bytes. then, we
   2268# can store the default result where the exception frame was.
   2269	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
   2270	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2271	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2272
   2273	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
   2274	mov.w		&0xe001,2+FP_SRC(%a6)	# set fsave status
   2275
   2276	frestore	FP_SRC(%a6)		# restore src operand
   2277
   2278	mov.l		(%a6),%a6		# restore frame pointer
   2279
   2280	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   2281	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
   2282	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   2283
   2284# now, we copy the default result to its proper location
   2285	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
   2286	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
   2287	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
   2288
   2289	add.l		&LOCAL_SIZE-0x8,%sp
   2290
   2291
   2292	bra.l		_real_inex
   2293
   2294#########################################################################
   2295
   2296#
   2297# if we're stuffing a source operand back into an fsave frame then we
   2298# have to make sure that for single or double source operands that the
   2299# format stuffed is as weird as the hardware usually makes it.
   2300#
   2301	global		funimp_skew
   2302funimp_skew:
   2303	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
   2304	cmpi.b		%d0,&0x1		# was src sgl?
   2305	beq.b		funimp_skew_sgl		# yes
   2306	cmpi.b		%d0,&0x5		# was src dbl?
   2307	beq.b		funimp_skew_dbl		# yes
   2308	rts
   2309
   2310funimp_skew_sgl:
   2311	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
   2312	andi.w		&0x7fff,%d0		# strip sign
   2313	beq.b		funimp_skew_sgl_not
   2314	cmpi.w		%d0,&0x3f80
   2315	bgt.b		funimp_skew_sgl_not
   2316	neg.w		%d0			# make exponent negative
   2317	addi.w		&0x3f81,%d0		# find amt to shift
   2318	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
   2319	lsr.l		%d0,%d1			# shift it
   2320	bset		&31,%d1			# set j-bit
   2321	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
   2322	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
   2323	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
   2324funimp_skew_sgl_not:
   2325	rts
   2326
   2327funimp_skew_dbl:
   2328	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
   2329	andi.w		&0x7fff,%d0		# strip sign
   2330	beq.b		funimp_skew_dbl_not
   2331	cmpi.w		%d0,&0x3c00
   2332	bgt.b		funimp_skew_dbl_not
   2333
   2334	tst.b		FP_SRC_EX(%a6)		# make "internal format"
   2335	smi.b		0x2+FP_SRC(%a6)
   2336	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
   2337	clr.l		%d0			# clear g,r,s
   2338	lea		FP_SRC(%a6),%a0		# pass ptr to src op
   2339	mov.w		&0x3c01,%d1		# pass denorm threshold
   2340	bsr.l		dnrm_lp			# denorm it
   2341	mov.w		&0x3c00,%d0		# new exponent
   2342	tst.b		0x2+FP_SRC(%a6)		# is sign set?
   2343	beq.b		fss_dbl_denorm_done	# no
   2344	bset		&15,%d0			# set sign
   2345fss_dbl_denorm_done:
   2346	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
   2347	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
   2348funimp_skew_dbl_not:
   2349	rts
   2350
   2351#########################################################################
   2352	global		_mem_write2
   2353_mem_write2:
   2354	btst		&0x5,EXC_SR(%a6)
   2355	beq.l		_dmem_write
   2356	mov.l		0x0(%a0),FP_DST_EX(%a6)
   2357	mov.l		0x4(%a0),FP_DST_HI(%a6)
   2358	mov.l		0x8(%a0),FP_DST_LO(%a6)
   2359	clr.l		%d1
   2360	rts
   2361
   2362#########################################################################
   2363# XDEF ****************************************************************	#
   2364#	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
   2365#			effective address" exception.			#
   2366#									#
   2367#	This handler should be the first code executed upon taking the	#
   2368#	FP Unimplemented Effective Address exception in an operating	#
   2369#	system.								#
   2370#									#
   2371# XREF ****************************************************************	#
   2372#	_imem_read_long() - read instruction longword			#
   2373#	fix_skewed_ops() - adjust src operand in fsave frame		#
   2374#	set_tag_x() - determine optype of src/dst operands		#
   2375#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
   2376#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
   2377#	load_fpn2() - load dst operand from FP regfile			#
   2378#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
   2379#	decbin() - convert packed data to FP binary data		#
   2380#	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
   2381#	_real_access() - "callout" for access error exception		#
   2382#	_mem_read() - read extended immediate operand from memory	#
   2383#	_fpsp_done() - "callout" for exit; work all done		#
   2384#	_real_trace() - "callout" for Trace enabled exception		#
   2385#	fmovm_dynamic() - emulate dynamic fmovm instruction		#
   2386#	fmovm_ctrl() - emulate fmovm control instruction		#
   2387#									#
   2388# INPUT ***************************************************************	#
   2389#	- The system stack contains the "Unimplemented <ea>" stk frame	#
   2390#									#
   2391# OUTPUT **************************************************************	#
   2392#	If access error:						#
   2393#	- The system stack is changed to an access error stack frame	#
   2394#	If FPU disabled:						#
   2395#	- The system stack is changed to an FPU disabled stack frame	#
   2396#	If Trace exception enabled:					#
   2397#	- The system stack is changed to a Trace exception stack frame	#
   2398#	Else: (normal case)						#
   2399#	- None (correct result has been stored as appropriate)		#
   2400#									#
   2401# ALGORITHM ***********************************************************	#
   2402#	This exception handles 3 types of operations:			#
   2403# (1) FP Instructions using extended precision or packed immediate	#
   2404#     addressing mode.							#
   2405# (2) The "fmovm.x" instruction w/ dynamic register specification.	#
   2406# (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
   2407#									#
   2408#	For immediate data operations, the data is read in w/ a		#
   2409# _mem_read() "callout", converted to FP binary (if packed), and used	#
   2410# as the source operand to the instruction specified by the instruction	#
   2411# word. If no FP exception should be reported ads a result of the	#
   2412# emulation, then the result is stored to the destination register and	#
   2413# the handler exits through _fpsp_done(). If an enabled exc has been	#
   2414# signalled as a result of emulation, then an fsave state frame		#
   2415# corresponding to the FP exception type must be entered into the 060	#
   2416# FPU before exiting. In either the enabled or disabled cases, we	#
   2417# must also check if a Trace exception is pending, in which case, we	#
   2418# must create a Trace exception stack frame from the current exception	#
   2419# stack frame. If no Trace is pending, we simply exit through		#
   2420# _fpsp_done().								#
   2421#	For "fmovm.x", call the routine fmovm_dynamic() which will	#
   2422# decode and emulate the instruction. No FP exceptions can be pending	#
   2423# as a result of this operation emulation. A Trace exception can be	#
   2424# pending, though, which means the current stack frame must be changed	#
   2425# to a Trace stack frame and an exit made through _real_trace().	#
   2426# For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
   2427# was executed from supervisor mode, this handler must store the FP	#
   2428# register file values to the system stack by itself since		#
   2429# fmovm_dynamic() can't handle this. A normal exit is made through	#
   2430# fpsp_done().								#
   2431#	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
   2432# Again, a Trace exception may be pending and an exit made through	#
   2433# _real_trace(). Else, a normal exit is made through _fpsp_done().	#
   2434#									#
   2435#	Before any of the above is attempted, it must be checked to	#
   2436# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
   2437# before the "FPU disabled" exception, but the "FPU disabled" exception	#
   2438# has higher priority, we check the disabled bit in the PCR. If set,	#
   2439# then we must create an 8 word "FPU disabled" exception stack frame	#
   2440# from the current 4 word exception stack frame. This includes		#
   2441# reproducing the effective address of the instruction to put on the	#
   2442# new stack frame.							#
   2443#									#
   2444#	In the process of all emulation work, if a _mem_read()		#
   2445# "callout" returns a failing result indicating an access error, then	#
   2446# we must create an access error stack frame from the current stack	#
   2447# frame. This information includes a faulting address and a fault-	#
   2448# status-longword. These are created within this handler.		#
   2449#									#
   2450#########################################################################
   2451
   2452	global		_fpsp_effadd
   2453_fpsp_effadd:
   2454
   2455# This exception type takes priority over the "Line F Emulator"
   2456# exception. Therefore, the FPU could be disabled when entering here.
   2457# So, we must check to see if it's disabled and handle that case separately.
   2458	mov.l		%d0,-(%sp)		# save d0
   2459	movc		%pcr,%d0		# load proc cr
   2460	btst		&0x1,%d0		# is FPU disabled?
   2461	bne.w		iea_disabled		# yes
   2462	mov.l		(%sp)+,%d0		# restore d0
   2463
   2464	link		%a6,&-LOCAL_SIZE	# init stack frame
   2465
   2466	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   2467	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   2468	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   2469
   2470# PC of instruction that took the exception is the PC in the frame
   2471	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
   2472
   2473	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   2474	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   2475	bsr.l		_imem_read_long		# fetch the instruction words
   2476	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
   2477
   2478#########################################################################
   2479
   2480	tst.w		%d0			# is operation fmovem?
   2481	bmi.w		iea_fmovm		# yes
   2482
   2483#
   2484# here, we will have:
   2485#	fabs	fdabs	fsabs		facos		fmod
   2486#	fadd	fdadd	fsadd		fasin		frem
   2487#	fcmp				fatan		fscale
   2488#	fdiv	fddiv	fsdiv		fatanh		fsin
   2489#	fint				fcos		fsincos
   2490#	fintrz				fcosh		fsinh
   2491#	fmove	fdmove	fsmove		fetox		ftan
   2492#	fmul	fdmul	fsmul		fetoxm1		ftanh
   2493#	fneg	fdneg	fsneg		fgetexp		ftentox
   2494#	fsgldiv				fgetman		ftwotox
   2495#	fsglmul				flog10
   2496#	fsqrt				flog2
   2497#	fsub	fdsub	fssub		flogn
   2498#	ftst				flognp1
   2499# which can all use f<op>.{x,p}
   2500# so, now it's immediate data extended precision AND PACKED FORMAT!
   2501#
   2502iea_op:
   2503	andi.l		&0x00ff00ff,USER_FPSR(%a6)
   2504
   2505	btst		&0xa,%d0		# is src fmt x or p?
   2506	bne.b		iea_op_pack		# packed
   2507
   2508
   2509	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
   2510	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
   2511	mov.l		&0xc,%d0		# pass: 12 bytes
   2512	bsr.l		_imem_read		# read extended immediate
   2513
   2514	tst.l		%d1			# did ifetch fail?
   2515	bne.w		iea_iacc		# yes
   2516
   2517	bra.b		iea_op_setsrc
   2518
   2519iea_op_pack:
   2520
   2521	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
   2522	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
   2523	mov.l		&0xc,%d0		# pass: 12 bytes
   2524	bsr.l		_imem_read		# read packed operand
   2525
   2526	tst.l		%d1			# did ifetch fail?
   2527	bne.w		iea_iacc		# yes
   2528
   2529# The packed operand is an INF or a NAN if the exponent field is all ones.
   2530	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
   2531	cmpi.w		%d0,&0x7fff		# INF or NAN?
   2532	beq.b		iea_op_setsrc		# operand is an INF or NAN
   2533
   2534# The packed operand is a zero if the mantissa is all zero, else it's
   2535# a normal packed op.
   2536	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
   2537	andi.b		&0x0f,%d0		# clear all but last nybble
   2538	bne.b		iea_op_gp_not_spec	# not a zero
   2539	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
   2540	bne.b		iea_op_gp_not_spec	# not a zero
   2541	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
   2542	beq.b		iea_op_setsrc		# operand is a ZERO
   2543iea_op_gp_not_spec:
   2544	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
   2545	bsr.l		decbin			# convert to extended
   2546	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
   2547
   2548iea_op_setsrc:
   2549	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
   2550
   2551# FP_SRC now holds the src operand.
   2552	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   2553	bsr.l		set_tag_x		# tag the operand type
   2554	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
   2555	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   2556	bne.b		iea_op_getdst		# no
   2557	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
   2558	mov.b		%d0,STAG(%a6)		# set new optype tag
   2559iea_op_getdst:
   2560	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
   2561
   2562	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   2563	beq.b		iea_op_extract		# monadic
   2564	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
   2565	bne.b		iea_op_spec		# yes
   2566
   2567iea_op_loaddst:
   2568	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
   2569	bsr.l		load_fpn2		# load dst operand
   2570
   2571	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   2572	bsr.l		set_tag_x		# tag the operand type
   2573	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
   2574	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   2575	bne.b		iea_op_extract		# no
   2576	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
   2577	mov.b		%d0,DTAG(%a6)		# set new optype tag
   2578	bra.b		iea_op_extract
   2579
   2580# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
   2581iea_op_spec:
   2582	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
   2583	beq.b		iea_op_extract		# yes
   2584# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
   2585# store a result. then, only fcmp will branch back and pick up a dst operand.
   2586	st		STORE_FLG(%a6)		# don't store a final result
   2587	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
   2588	beq.b		iea_op_loaddst		# yes
   2589
   2590iea_op_extract:
   2591	clr.l		%d0
   2592	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
   2593
   2594	mov.b		1+EXC_CMDREG(%a6),%d1
   2595	andi.w		&0x007f,%d1		# extract extension
   2596
   2597	fmov.l		&0x0,%fpcr
   2598	fmov.l		&0x0,%fpsr
   2599
   2600	lea		FP_SRC(%a6),%a0
   2601	lea		FP_DST(%a6),%a1
   2602
   2603	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
   2604	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   2605
   2606#
   2607# Exceptions in order of precedence:
   2608#	BSUN	: none
   2609#	SNAN	: all operations
   2610#	OPERR	: all reg-reg or mem-reg operations that can normally operr
   2611#	OVFL	: same as OPERR
   2612#	UNFL	: same as OPERR
   2613#	DZ	: same as OPERR
   2614#	INEX2	: same as OPERR
   2615#	INEX1	: all packed immediate operations
   2616#
   2617
   2618# we determine the highest priority exception(if any) set by the
   2619# emulation routine that has also been enabled by the user.
   2620	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   2621	bne.b		iea_op_ena		# some are enabled
   2622
   2623# now, we save the result, unless, of course, the operation was ftst or fcmp.
   2624# these don't save results.
   2625iea_op_save:
   2626	tst.b		STORE_FLG(%a6)		# does this op store a result?
   2627	bne.b		iea_op_exit1		# exit with no frestore
   2628
   2629iea_op_store:
   2630	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
   2631	bsr.l		store_fpreg		# store the result
   2632
   2633iea_op_exit1:
   2634	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
   2635	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
   2636
   2637	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   2638	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2639	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2640
   2641	unlk		%a6			# unravel the frame
   2642
   2643	btst		&0x7,(%sp)		# is trace on?
   2644	bne.w		iea_op_trace		# yes
   2645
   2646	bra.l		_fpsp_done		# exit to os
   2647
   2648iea_op_ena:
   2649	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
   2650	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   2651	bne.b		iea_op_exc		# at least one was set
   2652
   2653# no exception occurred. now, did a disabled, exact overflow occur with inexact
   2654# enabled? if so, then we have to stuff an overflow frame into the FPU.
   2655	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
   2656	beq.b		iea_op_save
   2657
   2658iea_op_ovfl:
   2659	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
   2660	beq.b		iea_op_store		# no
   2661	bra.b		iea_op_exc_ovfl		# yes
   2662
   2663# an enabled exception occurred. we have to insert the exception type back into
   2664# the machine.
   2665iea_op_exc:
   2666	subi.l		&24,%d0			# fix offset to be 0-8
   2667	cmpi.b		%d0,&0x6		# is exception INEX?
   2668	bne.b		iea_op_exc_force	# no
   2669
   2670# the enabled exception was inexact. so, if it occurs with an overflow
   2671# or underflow that was disabled, then we have to force an overflow or
   2672# underflow frame.
   2673	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
   2674	bne.b		iea_op_exc_ovfl		# yes
   2675	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
   2676	bne.b		iea_op_exc_unfl		# yes
   2677
   2678iea_op_exc_force:
   2679	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
   2680	bra.b		iea_op_exit2		# exit with frestore
   2681
   2682tbl_iea_except:
   2683	short		0xe002, 0xe006, 0xe004, 0xe005
   2684	short		0xe003, 0xe002, 0xe001, 0xe001
   2685
   2686iea_op_exc_ovfl:
   2687	mov.w		&0xe005,2+FP_SRC(%a6)
   2688	bra.b		iea_op_exit2
   2689
   2690iea_op_exc_unfl:
   2691	mov.w		&0xe003,2+FP_SRC(%a6)
   2692
   2693iea_op_exit2:
   2694	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
   2695	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
   2696
   2697	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   2698	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2699	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2700
   2701	frestore	FP_SRC(%a6)		# restore exceptional state
   2702
   2703	unlk		%a6			# unravel the frame
   2704
   2705	btst		&0x7,(%sp)		# is trace on?
   2706	bne.b		iea_op_trace		# yes
   2707
   2708	bra.l		_fpsp_done		# exit to os
   2709
   2710#
   2711# The opclass two instruction that took an "Unimplemented Effective Address"
   2712# exception was being traced. Make the "current" PC the FPIAR and put it in
   2713# the trace stack frame then jump to _real_trace().
   2714#
   2715#		 UNIMP EA FRAME		   TRACE FRAME
   2716#		*****************	*****************
   2717#		* 0x0 *  0x0f0	*	*    Current	*
   2718#		*****************	*      PC	*
   2719#		*    Current	*	*****************
   2720#		*      PC	*	* 0x2 *  0x024	*
   2721#		*****************	*****************
   2722#		*      SR	*	*     Next	*
   2723#		*****************	*      PC	*
   2724#					*****************
   2725#					*      SR	*
   2726#					*****************
   2727iea_op_trace:
   2728	mov.l		(%sp),-(%sp)		# shift stack frame "down"
   2729	mov.w		0x8(%sp),0x4(%sp)
   2730	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
   2731	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
   2732
   2733	bra.l		_real_trace
   2734
   2735#########################################################################
   2736iea_fmovm:
   2737	btst		&14,%d0			# ctrl or data reg
   2738	beq.w		iea_fmovm_ctrl
   2739
   2740iea_fmovm_data:
   2741
   2742	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
   2743	bne.b		iea_fmovm_data_s
   2744
   2745iea_fmovm_data_u:
   2746	mov.l		%usp,%a0
   2747	mov.l		%a0,EXC_A7(%a6)		# store current a7
   2748	bsr.l		fmovm_dynamic		# do dynamic fmovm
   2749	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
   2750	mov.l		%a0,%usp		# update usp
   2751	bra.w		iea_fmovm_exit
   2752
   2753iea_fmovm_data_s:
   2754	clr.b		SPCOND_FLG(%a6)
   2755	lea		0x2+EXC_VOFF(%a6),%a0
   2756	mov.l		%a0,EXC_A7(%a6)
   2757	bsr.l		fmovm_dynamic		# do dynamic fmovm
   2758
   2759	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
   2760	beq.w		iea_fmovm_data_predec
   2761	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
   2762	bne.w		iea_fmovm_exit
   2763
   2764# right now, d0 = the size.
   2765# the data has been fetched from the supervisor stack, but we have not
   2766# incremented the stack pointer by the appropriate number of bytes.
   2767# do it here.
   2768iea_fmovm_data_postinc:
   2769	btst		&0x7,EXC_SR(%a6)
   2770	bne.b		iea_fmovm_data_pi_trace
   2771
   2772	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
   2773	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
   2774	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
   2775
   2776	lea		(EXC_SR,%a6,%d0),%a0
   2777	mov.l		%a0,EXC_SR(%a6)
   2778
   2779	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   2780	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2781	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2782
   2783	unlk		%a6
   2784	mov.l		(%sp)+,%sp
   2785	bra.l		_fpsp_done
   2786
   2787iea_fmovm_data_pi_trace:
   2788	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
   2789	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
   2790	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
   2791	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
   2792
   2793	lea		(EXC_SR-0x4,%a6,%d0),%a0
   2794	mov.l		%a0,EXC_SR(%a6)
   2795
   2796	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   2797	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2798	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2799
   2800	unlk		%a6
   2801	mov.l		(%sp)+,%sp
   2802	bra.l		_real_trace
   2803
   2804# right now, d1 = size and d0 = the strg.
   2805iea_fmovm_data_predec:
   2806	mov.b		%d1,EXC_VOFF(%a6)	# store strg
   2807	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
   2808
   2809	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   2810	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2811	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2812
   2813	mov.l		(%a6),-(%sp)		# make a copy of a6
   2814	mov.l		%d0,-(%sp)		# save d0
   2815	mov.l		%d1,-(%sp)		# save d1
   2816	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
   2817
   2818	clr.l		%d0
   2819	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
   2820	neg.l		%d0			# get negative of size
   2821
   2822	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
   2823	beq.b		iea_fmovm_data_p2
   2824
   2825	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
   2826	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
   2827	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
   2828	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
   2829
   2830	pea		(%a6,%d0)		# create final sp
   2831	bra.b		iea_fmovm_data_p3
   2832
   2833iea_fmovm_data_p2:
   2834	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
   2835	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
   2836	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
   2837
   2838	pea		(0x4,%a6,%d0)		# create final sp
   2839
   2840iea_fmovm_data_p3:
   2841	clr.l		%d1
   2842	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
   2843
   2844	tst.b		%d1
   2845	bpl.b		fm_1
   2846	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
   2847	addi.l		&0xc,%d0
   2848fm_1:
   2849	lsl.b		&0x1,%d1
   2850	bpl.b		fm_2
   2851	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
   2852	addi.l		&0xc,%d0
   2853fm_2:
   2854	lsl.b		&0x1,%d1
   2855	bpl.b		fm_3
   2856	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
   2857	addi.l		&0xc,%d0
   2858fm_3:
   2859	lsl.b		&0x1,%d1
   2860	bpl.b		fm_4
   2861	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
   2862	addi.l		&0xc,%d0
   2863fm_4:
   2864	lsl.b		&0x1,%d1
   2865	bpl.b		fm_5
   2866	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
   2867	addi.l		&0xc,%d0
   2868fm_5:
   2869	lsl.b		&0x1,%d1
   2870	bpl.b		fm_6
   2871	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
   2872	addi.l		&0xc,%d0
   2873fm_6:
   2874	lsl.b		&0x1,%d1
   2875	bpl.b		fm_7
   2876	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
   2877	addi.l		&0xc,%d0
   2878fm_7:
   2879	lsl.b		&0x1,%d1
   2880	bpl.b		fm_end
   2881	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
   2882fm_end:
   2883	mov.l		0x4(%sp),%d1
   2884	mov.l		0x8(%sp),%d0
   2885	mov.l		0xc(%sp),%a6
   2886	mov.l		(%sp)+,%sp
   2887
   2888	btst		&0x7,(%sp)		# is trace enabled?
   2889	beq.l		_fpsp_done
   2890	bra.l		_real_trace
   2891
   2892#########################################################################
   2893iea_fmovm_ctrl:
   2894
   2895	bsr.l		fmovm_ctrl		# load ctrl regs
   2896
   2897iea_fmovm_exit:
   2898	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   2899	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   2900	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2901
   2902	btst		&0x7,EXC_SR(%a6)	# is trace on?
   2903	bne.b		iea_fmovm_trace		# yes
   2904
   2905	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
   2906
   2907	unlk		%a6			# unravel the frame
   2908
   2909	bra.l		_fpsp_done		# exit to os
   2910
   2911#
   2912# The control reg instruction that took an "Unimplemented Effective Address"
   2913# exception was being traced. The "Current PC" for the trace frame is the
   2914# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
   2915# After fixing the stack frame, jump to _real_trace().
   2916#
   2917#		 UNIMP EA FRAME		   TRACE FRAME
   2918#		*****************	*****************
   2919#		* 0x0 *  0x0f0	*	*    Current	*
   2920#		*****************	*      PC	*
   2921#		*    Current	*	*****************
   2922#		*      PC	*	* 0x2 *  0x024	*
   2923#		*****************	*****************
   2924#		*      SR	*	*     Next	*
   2925#		*****************	*      PC	*
   2926#					*****************
   2927#					*      SR	*
   2928#					*****************
   2929# this ain't a pretty solution, but it works:
   2930# -restore a6 (not with unlk)
   2931# -shift stack frame down over where old a6 used to be
   2932# -add LOCAL_SIZE to stack pointer
   2933iea_fmovm_trace:
   2934	mov.l		(%a6),%a6		# restore frame pointer
   2935	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
   2936	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
   2937	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
   2938	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
   2939	add.l		&LOCAL_SIZE,%sp		# clear stack frame
   2940
   2941	bra.l		_real_trace
   2942
   2943#########################################################################
   2944# The FPU is disabled and so we should really have taken the "Line
   2945# F Emulator" exception. So, here we create an 8-word stack frame
   2946# from our 4-word stack frame. This means we must calculate the length
   2947# the faulting instruction to get the "next PC". This is trivial for
   2948# immediate operands but requires some extra work for fmovm dynamic
   2949# which can use most addressing modes.
   2950iea_disabled:
   2951	mov.l		(%sp)+,%d0		# restore d0
   2952
   2953	link		%a6,&-LOCAL_SIZE	# init stack frame
   2954
   2955	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   2956
   2957# PC of instruction that took the exception is the PC in the frame
   2958	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
   2959	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   2960	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   2961	bsr.l		_imem_read_long		# fetch the instruction words
   2962	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
   2963
   2964	tst.w		%d0			# is instr fmovm?
   2965	bmi.b		iea_dis_fmovm		# yes
   2966# instruction is using an extended precision immediate operand. Therefore,
   2967# the total instruction length is 16 bytes.
   2968iea_dis_immed:
   2969	mov.l		&0x10,%d0		# 16 bytes of instruction
   2970	bra.b		iea_dis_cont
   2971iea_dis_fmovm:
   2972	btst		&0xe,%d0		# is instr fmovm ctrl
   2973	bne.b		iea_dis_fmovm_data	# no
   2974# the instruction is a fmovm.l with 2 or 3 registers.
   2975	bfextu		%d0{&19:&3},%d1
   2976	mov.l		&0xc,%d0
   2977	cmpi.b		%d1,&0x7		# move all regs?
   2978	bne.b		iea_dis_cont
   2979	addq.l		&0x4,%d0
   2980	bra.b		iea_dis_cont
   2981# the instruction is an fmovm.x dynamic which can use many addressing
   2982# modes and thus can have several different total instruction lengths.
   2983# call fmovm_calc_ea which will go through the ea calc process and,
   2984# as a by-product, will tell us how long the instruction is.
   2985iea_dis_fmovm_data:
   2986	clr.l		%d0
   2987	bsr.l		fmovm_calc_ea
   2988	mov.l		EXC_EXTWPTR(%a6),%d0
   2989	sub.l		EXC_PC(%a6),%d0
   2990iea_dis_cont:
   2991	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
   2992
   2993	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   2994
   2995	unlk		%a6
   2996
   2997# here, we actually create the 8-word frame from the 4-word frame,
   2998# with the "next PC" as additional info.
   2999# the <ea> field is let as undefined.
   3000	subq.l		&0x8,%sp		# make room for new stack
   3001	mov.l		%d0,-(%sp)		# save d0
   3002	mov.w		0xc(%sp),0x4(%sp)	# move SR
   3003	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
   3004	clr.l		%d0
   3005	mov.w		0x12(%sp),%d0
   3006	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
   3007	add.l		%d0,0x6(%sp)		# make Next PC
   3008	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
   3009	mov.l		(%sp)+,%d0		# restore d0
   3010
   3011	bra.l		_real_fpu_disabled
   3012
   3013##########
   3014
   3015iea_iacc:
   3016	movc		%pcr,%d0
   3017	btst		&0x1,%d0
   3018	bne.b		iea_iacc_cont
   3019	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3020	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
   3021iea_iacc_cont:
   3022	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3023
   3024	unlk		%a6
   3025
   3026	subq.w		&0x8,%sp		# make stack frame bigger
   3027	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
   3028	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
   3029	mov.w		&0x4008,0x6(%sp)	# store voff
   3030	mov.l		0x2(%sp),0x8(%sp)	# store ea
   3031	mov.l		&0x09428001,0xc(%sp)	# store fslw
   3032
   3033iea_acc_done:
   3034	btst		&0x5,(%sp)		# user or supervisor mode?
   3035	beq.b		iea_acc_done2		# user
   3036	bset		&0x2,0xd(%sp)		# set supervisor TM bit
   3037
   3038iea_acc_done2:
   3039	bra.l		_real_access
   3040
   3041iea_dacc:
   3042	lea		-LOCAL_SIZE(%a6),%sp
   3043
   3044	movc		%pcr,%d1
   3045	btst		&0x1,%d1
   3046	bne.b		iea_dacc_cont
   3047	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
   3048	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3049iea_dacc_cont:
   3050	mov.l		(%a6),%a6
   3051
   3052	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
   3053	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
   3054	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
   3055	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
   3056	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
   3057	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
   3058
   3059	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
   3060	add.w		&LOCAL_SIZE-0x4,%sp
   3061
   3062	bra.b		iea_acc_done
   3063
   3064#########################################################################
   3065# XDEF ****************************************************************	#
   3066#	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
   3067#									#
   3068#	This handler should be the first code executed upon taking the	#
   3069#	FP Operand Error exception in an operating system.		#
   3070#									#
   3071# XREF ****************************************************************	#
   3072#	_imem_read_long() - read instruction longword			#
   3073#	fix_skewed_ops() - adjust src operand in fsave frame		#
   3074#	_real_operr() - "callout" to operating system operr handler	#
   3075#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
   3076#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
   3077#	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
   3078#									#
   3079# INPUT ***************************************************************	#
   3080#	- The system stack contains the FP Operr exception frame	#
   3081#	- The fsave frame contains the source operand			#
   3082#									#
   3083# OUTPUT **************************************************************	#
   3084#	No access error:						#
   3085#	- The system stack is unchanged					#
   3086#	- The fsave frame contains the adjusted src op for opclass 0,2	#
   3087#									#
   3088# ALGORITHM ***********************************************************	#
   3089#	In a system where the FP Operr exception is enabled, the goal	#
   3090# is to get to the handler specified at _real_operr(). But, on the 060,	#
   3091# for opclass zero and two instruction taking this exception, the	#
   3092# input operand in the fsave frame may be incorrect for some cases	#
   3093# and needs to be corrected. This handler calls fix_skewed_ops() to	#
   3094# do just this and then exits through _real_operr().			#
   3095#	For opclass 3 instructions, the 060 doesn't store the default	#
   3096# operr result out to memory or data register file as it should.	#
   3097# This code must emulate the move out before finally exiting through	#
   3098# _real_inex(). The move out, if to memory, is performed using		#
   3099# _mem_write() "callout" routines that may return a failing result.	#
   3100# In this special case, the handler must exit through facc_out()	#
   3101# which creates an access error stack frame from the current operr	#
   3102# stack frame.								#
   3103#									#
   3104#########################################################################
   3105
   3106	global		_fpsp_operr
   3107_fpsp_operr:
   3108
   3109	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3110
   3111	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3112
   3113	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3114	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3115	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3116
   3117# the FPIAR holds the "current PC" of the faulting instruction
   3118	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3119
   3120	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3121	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3122	bsr.l		_imem_read_long		# fetch the instruction words
   3123	mov.l		%d0,EXC_OPWORD(%a6)
   3124
   3125##############################################################################
   3126
   3127	btst		&13,%d0			# is instr an fmove out?
   3128	bne.b		foperr_out		# fmove out
   3129
   3130
   3131# here, we simply see if the operand in the fsave frame needs to be "unskewed".
   3132# this would be the case for opclass two operations with a source infinity or
   3133# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
   3134# cause an operr so we don't need to check for them here.
   3135	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3136	bsr.l		fix_skewed_ops		# fix src op
   3137
   3138foperr_exit:
   3139	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3140	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3141	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3142
   3143	frestore	FP_SRC(%a6)
   3144
   3145	unlk		%a6
   3146	bra.l		_real_operr
   3147
   3148########################################################################
   3149
   3150#
   3151# the hardware does not save the default result to memory on enabled
   3152# operand error exceptions. we do this here before passing control to
   3153# the user operand error handler.
   3154#
   3155# byte, word, and long destination format operations can pass
   3156# through here. we simply need to test the sign of the src
   3157# operand and save the appropriate minimum or maximum integer value
   3158# to the effective address as pointed to by the stacked effective address.
   3159#
   3160# although packed opclass three operations can take operand error
   3161# exceptions, they won't pass through here since they are caught
   3162# first by the unsupported data format exception handler. that handler
   3163# sends them directly to _real_operr() if necessary.
   3164#
   3165foperr_out:
   3166
   3167	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
   3168	andi.w		&0x7fff,%d1
   3169	cmpi.w		%d1,&0x7fff
   3170	bne.b		foperr_out_not_qnan
   3171# the operand is either an infinity or a QNAN.
   3172	tst.l		FP_SRC_LO(%a6)
   3173	bne.b		foperr_out_qnan
   3174	mov.l		FP_SRC_HI(%a6),%d1
   3175	andi.l		&0x7fffffff,%d1
   3176	beq.b		foperr_out_not_qnan
   3177foperr_out_qnan:
   3178	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
   3179	bra.b		foperr_out_jmp
   3180
   3181foperr_out_not_qnan:
   3182	mov.l		&0x7fffffff,%d1
   3183	tst.b		FP_SRC_EX(%a6)
   3184	bpl.b		foperr_out_not_qnan2
   3185	addq.l		&0x1,%d1
   3186foperr_out_not_qnan2:
   3187	mov.l		%d1,L_SCR1(%a6)
   3188
   3189foperr_out_jmp:
   3190	bfextu		%d0{&19:&3},%d0		# extract dst format field
   3191	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
   3192	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
   3193	jmp		(tbl_operr.b,%pc,%a0)
   3194
   3195tbl_operr:
   3196	short		foperr_out_l - tbl_operr # long word integer
   3197	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
   3198	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
   3199	short		foperr_exit  - tbl_operr # packed won't enter here
   3200	short		foperr_out_w - tbl_operr # word integer
   3201	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
   3202	short		foperr_out_b - tbl_operr # byte integer
   3203	short		tbl_operr    - tbl_operr # packed won't enter here
   3204
   3205foperr_out_b:
   3206	mov.b		L_SCR1(%a6),%d0		# load positive default result
   3207	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3208	ble.b		foperr_out_b_save_dn	# yes
   3209	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3210	bsr.l		_dmem_write_byte	# write the default result
   3211
   3212	tst.l		%d1			# did dstore fail?
   3213	bne.l		facc_out_b		# yes
   3214
   3215	bra.w		foperr_exit
   3216foperr_out_b_save_dn:
   3217	andi.w		&0x0007,%d1
   3218	bsr.l		store_dreg_b		# store result to regfile
   3219	bra.w		foperr_exit
   3220
   3221foperr_out_w:
   3222	mov.w		L_SCR1(%a6),%d0		# load positive default result
   3223	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3224	ble.b		foperr_out_w_save_dn	# yes
   3225	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3226	bsr.l		_dmem_write_word	# write the default result
   3227
   3228	tst.l		%d1			# did dstore fail?
   3229	bne.l		facc_out_w		# yes
   3230
   3231	bra.w		foperr_exit
   3232foperr_out_w_save_dn:
   3233	andi.w		&0x0007,%d1
   3234	bsr.l		store_dreg_w		# store result to regfile
   3235	bra.w		foperr_exit
   3236
   3237foperr_out_l:
   3238	mov.l		L_SCR1(%a6),%d0		# load positive default result
   3239	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3240	ble.b		foperr_out_l_save_dn	# yes
   3241	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3242	bsr.l		_dmem_write_long	# write the default result
   3243
   3244	tst.l		%d1			# did dstore fail?
   3245	bne.l		facc_out_l		# yes
   3246
   3247	bra.w		foperr_exit
   3248foperr_out_l_save_dn:
   3249	andi.w		&0x0007,%d1
   3250	bsr.l		store_dreg_l		# store result to regfile
   3251	bra.w		foperr_exit
   3252
   3253#########################################################################
   3254# XDEF ****************************************************************	#
   3255#	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
   3256#									#
   3257#	This handler should be the first code executed upon taking the	#
   3258#	FP Signalling NAN exception in an operating system.		#
   3259#									#
   3260# XREF ****************************************************************	#
   3261#	_imem_read_long() - read instruction longword			#
   3262#	fix_skewed_ops() - adjust src operand in fsave frame		#
   3263#	_real_snan() - "callout" to operating system SNAN handler	#
   3264#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
   3265#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
   3266#	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
   3267#	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
   3268#									#
   3269# INPUT ***************************************************************	#
   3270#	- The system stack contains the FP SNAN exception frame		#
   3271#	- The fsave frame contains the source operand			#
   3272#									#
   3273# OUTPUT **************************************************************	#
   3274#	No access error:						#
   3275#	- The system stack is unchanged					#
   3276#	- The fsave frame contains the adjusted src op for opclass 0,2	#
   3277#									#
   3278# ALGORITHM ***********************************************************	#
   3279#	In a system where the FP SNAN exception is enabled, the goal	#
   3280# is to get to the handler specified at _real_snan(). But, on the 060,	#
   3281# for opclass zero and two instructions taking this exception, the	#
   3282# input operand in the fsave frame may be incorrect for some cases	#
   3283# and needs to be corrected. This handler calls fix_skewed_ops() to	#
   3284# do just this and then exits through _real_snan().			#
   3285#	For opclass 3 instructions, the 060 doesn't store the default	#
   3286# SNAN result out to memory or data register file as it should.		#
   3287# This code must emulate the move out before finally exiting through	#
   3288# _real_snan(). The move out, if to memory, is performed using		#
   3289# _mem_write() "callout" routines that may return a failing result.	#
   3290# In this special case, the handler must exit through facc_out()	#
   3291# which creates an access error stack frame from the current SNAN	#
   3292# stack frame.								#
   3293#	For the case of an extended precision opclass 3 instruction,	#
   3294# if the effective addressing mode was -() or ()+, then the address	#
   3295# register must get updated by calling _calc_ea_fout(). If the <ea>	#
   3296# was -(a7) from supervisor mode, then the exception frame currently	#
   3297# on the system stack must be carefully moved "down" to make room	#
   3298# for the operand being moved.						#
   3299#									#
   3300#########################################################################
   3301
   3302	global		_fpsp_snan
   3303_fpsp_snan:
   3304
   3305	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3306
   3307	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3308
   3309	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3310	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3311	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3312
   3313# the FPIAR holds the "current PC" of the faulting instruction
   3314	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3315
   3316	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3317	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3318	bsr.l		_imem_read_long		# fetch the instruction words
   3319	mov.l		%d0,EXC_OPWORD(%a6)
   3320
   3321##############################################################################
   3322
   3323	btst		&13,%d0			# is instr an fmove out?
   3324	bne.w		fsnan_out		# fmove out
   3325
   3326
   3327# here, we simply see if the operand in the fsave frame needs to be "unskewed".
   3328# this would be the case for opclass two operations with a source infinity or
   3329# denorm operand in the sgl or dbl format. NANs also become skewed and must be
   3330# fixed here.
   3331	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3332	bsr.l		fix_skewed_ops		# fix src op
   3333
   3334fsnan_exit:
   3335	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3336	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3337	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3338
   3339	frestore	FP_SRC(%a6)
   3340
   3341	unlk		%a6
   3342	bra.l		_real_snan
   3343
   3344########################################################################
   3345
   3346#
   3347# the hardware does not save the default result to memory on enabled
   3348# snan exceptions. we do this here before passing control to
   3349# the user snan handler.
   3350#
   3351# byte, word, long, and packed destination format operations can pass
   3352# through here. since packed format operations already were handled by
   3353# fpsp_unsupp(), then we need to do nothing else for them here.
   3354# for byte, word, and long, we simply need to test the sign of the src
   3355# operand and save the appropriate minimum or maximum integer value
   3356# to the effective address as pointed to by the stacked effective address.
   3357#
   3358fsnan_out:
   3359
   3360	bfextu		%d0{&19:&3},%d0		# extract dst format field
   3361	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
   3362	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
   3363	jmp		(tbl_snan.b,%pc,%a0)
   3364
   3365tbl_snan:
   3366	short		fsnan_out_l - tbl_snan # long word integer
   3367	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
   3368	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
   3369	short		tbl_snan    - tbl_snan # packed needs no help
   3370	short		fsnan_out_w - tbl_snan # word integer
   3371	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
   3372	short		fsnan_out_b - tbl_snan # byte integer
   3373	short		tbl_snan    - tbl_snan # packed needs no help
   3374
   3375fsnan_out_b:
   3376	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
   3377	bset		&6,%d0			# set SNAN bit
   3378	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3379	ble.b		fsnan_out_b_dn		# yes
   3380	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3381	bsr.l		_dmem_write_byte	# write the default result
   3382
   3383	tst.l		%d1			# did dstore fail?
   3384	bne.l		facc_out_b		# yes
   3385
   3386	bra.w		fsnan_exit
   3387fsnan_out_b_dn:
   3388	andi.w		&0x0007,%d1
   3389	bsr.l		store_dreg_b		# store result to regfile
   3390	bra.w		fsnan_exit
   3391
   3392fsnan_out_w:
   3393	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
   3394	bset		&14,%d0			# set SNAN bit
   3395	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3396	ble.b		fsnan_out_w_dn		# yes
   3397	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3398	bsr.l		_dmem_write_word	# write the default result
   3399
   3400	tst.l		%d1			# did dstore fail?
   3401	bne.l		facc_out_w		# yes
   3402
   3403	bra.w		fsnan_exit
   3404fsnan_out_w_dn:
   3405	andi.w		&0x0007,%d1
   3406	bsr.l		store_dreg_w		# store result to regfile
   3407	bra.w		fsnan_exit
   3408
   3409fsnan_out_l:
   3410	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
   3411	bset		&30,%d0			# set SNAN bit
   3412	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3413	ble.b		fsnan_out_l_dn		# yes
   3414	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3415	bsr.l		_dmem_write_long	# write the default result
   3416
   3417	tst.l		%d1			# did dstore fail?
   3418	bne.l		facc_out_l		# yes
   3419
   3420	bra.w		fsnan_exit
   3421fsnan_out_l_dn:
   3422	andi.w		&0x0007,%d1
   3423	bsr.l		store_dreg_l		# store result to regfile
   3424	bra.w		fsnan_exit
   3425
   3426fsnan_out_s:
   3427	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
   3428	ble.b		fsnan_out_d_dn		# yes
   3429	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
   3430	andi.l		&0x80000000,%d0		# keep sign
   3431	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
   3432	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
   3433	lsr.l		&0x8,%d1		# shift mantissa for sgl
   3434	or.l		%d1,%d0			# create sgl SNAN
   3435	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
   3436	bsr.l		_dmem_write_long	# write the default result
   3437
   3438	tst.l		%d1			# did dstore fail?
   3439	bne.l		facc_out_l		# yes
   3440
   3441	bra.w		fsnan_exit
   3442fsnan_out_d_dn:
   3443	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
   3444	andi.l		&0x80000000,%d0		# keep sign
   3445	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
   3446	mov.l		%d1,-(%sp)
   3447	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
   3448	lsr.l		&0x8,%d1		# shift mantissa for sgl
   3449	or.l		%d1,%d0			# create sgl SNAN
   3450	mov.l		(%sp)+,%d1
   3451	andi.w		&0x0007,%d1
   3452	bsr.l		store_dreg_l		# store result to regfile
   3453	bra.w		fsnan_exit
   3454
   3455fsnan_out_d:
   3456	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
   3457	andi.l		&0x80000000,%d0		# keep sign
   3458	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
   3459	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
   3460	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
   3461	mov.l		&11,%d0			# load shift amt
   3462	lsr.l		%d0,%d1
   3463	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
   3464	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
   3465	andi.l		&0x000007ff,%d1
   3466	ror.l		%d0,%d1
   3467	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
   3468	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
   3469	lsr.l		%d0,%d1
   3470	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
   3471	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   3472	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
   3473	movq.l		&0x8,%d0		# pass: size of 8 bytes
   3474	bsr.l		_dmem_write		# write the default result
   3475
   3476	tst.l		%d1			# did dstore fail?
   3477	bne.l		facc_out_d		# yes
   3478
   3479	bra.w		fsnan_exit
   3480
   3481# for extended precision, if the addressing mode is pre-decrement or
   3482# post-increment, then the address register did not get updated.
   3483# in addition, for pre-decrement, the stacked <ea> is incorrect.
   3484fsnan_out_x:
   3485	clr.b		SPCOND_FLG(%a6)		# clear special case flag
   3486
   3487	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
   3488	clr.w		2+FP_SCR0(%a6)
   3489	mov.l		FP_SRC_HI(%a6),%d0
   3490	bset		&30,%d0
   3491	mov.l		%d0,FP_SCR0_HI(%a6)
   3492	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
   3493
   3494	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
   3495	bne.b		fsnan_out_x_s		# yes
   3496
   3497	mov.l		%usp,%a0		# fetch user stack pointer
   3498	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
   3499	mov.l		(%a6),EXC_A6(%a6)
   3500
   3501	bsr.l		_calc_ea_fout		# find the correct ea,update An
   3502	mov.l		%a0,%a1
   3503	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
   3504
   3505	mov.l		EXC_A7(%a6),%a0
   3506	mov.l		%a0,%usp		# restore user stack pointer
   3507	mov.l		EXC_A6(%a6),(%a6)
   3508
   3509fsnan_out_x_save:
   3510	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
   3511	movq.l		&0xc,%d0		# pass: size of extended
   3512	bsr.l		_dmem_write		# write the default result
   3513
   3514	tst.l		%d1			# did dstore fail?
   3515	bne.l		facc_out_x		# yes
   3516
   3517	bra.w		fsnan_exit
   3518
   3519fsnan_out_x_s:
   3520	mov.l		(%a6),EXC_A6(%a6)
   3521
   3522	bsr.l		_calc_ea_fout		# find the correct ea,update An
   3523	mov.l		%a0,%a1
   3524	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
   3525
   3526	mov.l		EXC_A6(%a6),(%a6)
   3527
   3528	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
   3529	bne.b		fsnan_out_x_save	# no
   3530
   3531# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
   3532	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3533	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3534	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3535
   3536	frestore	FP_SRC(%a6)
   3537
   3538	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
   3539
   3540	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
   3541	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
   3542	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
   3543
   3544	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
   3545	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
   3546	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
   3547
   3548	add.l		&LOCAL_SIZE-0x8,%sp
   3549
   3550	bra.l		_real_snan
   3551
   3552#########################################################################
   3553# XDEF ****************************************************************	#
   3554#	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
   3555#									#
   3556#	This handler should be the first code executed upon taking the	#
   3557#	FP Inexact exception in an operating system.			#
   3558#									#
   3559# XREF ****************************************************************	#
   3560#	_imem_read_long() - read instruction longword			#
   3561#	fix_skewed_ops() - adjust src operand in fsave frame		#
   3562#	set_tag_x() - determine optype of src/dst operands		#
   3563#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
   3564#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
   3565#	load_fpn2() - load dst operand from FP regfile			#
   3566#	smovcr() - emulate an "fmovcr" instruction			#
   3567#	fout() - emulate an opclass 3 instruction			#
   3568#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
   3569#	_real_inex() - "callout" to operating system inexact handler	#
   3570#									#
   3571# INPUT ***************************************************************	#
   3572#	- The system stack contains the FP Inexact exception frame	#
   3573#	- The fsave frame contains the source operand			#
   3574#									#
   3575# OUTPUT **************************************************************	#
   3576#	- The system stack is unchanged					#
   3577#	- The fsave frame contains the adjusted src op for opclass 0,2	#
   3578#									#
   3579# ALGORITHM ***********************************************************	#
   3580#	In a system where the FP Inexact exception is enabled, the goal	#
   3581# is to get to the handler specified at _real_inex(). But, on the 060,	#
   3582# for opclass zero and two instruction taking this exception, the	#
   3583# hardware doesn't store the correct result to the destination FP	#
   3584# register as did the '040 and '881/2. This handler must emulate the	#
   3585# instruction in order to get this value and then store it to the	#
   3586# correct register before calling _real_inex().				#
   3587#	For opclass 3 instructions, the 060 doesn't store the default	#
   3588# inexact result out to memory or data register file as it should.	#
   3589# This code must emulate the move out by calling fout() before finally	#
   3590# exiting through _real_inex().						#
   3591#									#
   3592#########################################################################
   3593
   3594	global		_fpsp_inex
   3595_fpsp_inex:
   3596
   3597	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3598
   3599	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3600
   3601	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3602	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3603	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3604
   3605# the FPIAR holds the "current PC" of the faulting instruction
   3606	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3607
   3608	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3609	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3610	bsr.l		_imem_read_long		# fetch the instruction words
   3611	mov.l		%d0,EXC_OPWORD(%a6)
   3612
   3613##############################################################################
   3614
   3615	btst		&13,%d0			# is instr an fmove out?
   3616	bne.w		finex_out		# fmove out
   3617
   3618
   3619# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
   3620# longword integer directly into the upper longword of the mantissa along
   3621# w/ an exponent value of 0x401e. we convert this to extended precision here.
   3622	bfextu		%d0{&19:&3},%d0		# fetch instr size
   3623	bne.b		finex_cont		# instr size is not long
   3624	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
   3625	bne.b		finex_cont		# no
   3626	fmov.l		&0x0,%fpcr
   3627	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
   3628	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
   3629	mov.w		&0xe001,0x2+FP_SRC(%a6)
   3630
   3631finex_cont:
   3632	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3633	bsr.l		fix_skewed_ops		# fix src op
   3634
   3635# Here, we zero the ccode and exception byte field since we're going to
   3636# emulate the whole instruction. Notice, though, that we don't kill the
   3637# INEX1 bit. This is because a packed op has long since been converted
   3638# to extended before arriving here. Therefore, we need to retain the
   3639# INEX1 bit from when the operand was first converted.
   3640	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
   3641
   3642	fmov.l		&0x0,%fpcr		# zero current control regs
   3643	fmov.l		&0x0,%fpsr
   3644
   3645	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
   3646	cmpi.b		%d1,&0x17		# is op an fmovecr?
   3647	beq.w		finex_fmovcr		# yes
   3648
   3649	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3650	bsr.l		set_tag_x		# tag the operand type
   3651	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
   3652
   3653# bits four and five of the fp extension word separate the monadic and dyadic
   3654# operations that can pass through fpsp_inex(). remember that fcmp and ftst
   3655# will never take this exception, but fsincos will.
   3656	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
   3657	beq.b		finex_extract		# monadic
   3658
   3659	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
   3660	bne.b		finex_extract		# yes
   3661
   3662	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
   3663	bsr.l		load_fpn2		# load dst into FP_DST
   3664
   3665	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
   3666	bsr.l		set_tag_x		# tag the operand type
   3667	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
   3668	bne.b		finex_op2_done		# no
   3669	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
   3670finex_op2_done:
   3671	mov.b		%d0,DTAG(%a6)		# save dst optype tag
   3672
   3673finex_extract:
   3674	clr.l		%d0
   3675	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
   3676
   3677	mov.b		1+EXC_CMDREG(%a6),%d1
   3678	andi.w		&0x007f,%d1		# extract extension
   3679
   3680	lea		FP_SRC(%a6),%a0
   3681	lea		FP_DST(%a6),%a1
   3682
   3683	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
   3684	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
   3685
   3686# the operation has been emulated. the result is in fp0.
   3687finex_save:
   3688	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
   3689	bsr.l		store_fpreg
   3690
   3691finex_exit:
   3692	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3693	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3694	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3695
   3696	frestore	FP_SRC(%a6)
   3697
   3698	unlk		%a6
   3699	bra.l		_real_inex
   3700
   3701finex_fmovcr:
   3702	clr.l		%d0
   3703	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
   3704	mov.b		1+EXC_CMDREG(%a6),%d1
   3705	andi.l		&0x0000007f,%d1		# pass rom offset
   3706	bsr.l		smovcr
   3707	bra.b		finex_save
   3708
   3709########################################################################
   3710
   3711#
   3712# the hardware does not save the default result to memory on enabled
   3713# inexact exceptions. we do this here before passing control to
   3714# the user inexact handler.
   3715#
   3716# byte, word, and long destination format operations can pass
   3717# through here. so can double and single precision.
   3718# although packed opclass three operations can take inexact
   3719# exceptions, they won't pass through here since they are caught
   3720# first by the unsupported data format exception handler. that handler
   3721# sends them directly to _real_inex() if necessary.
   3722#
   3723finex_out:
   3724
   3725	mov.b		&NORM,STAG(%a6)		# src is a NORM
   3726
   3727	clr.l		%d0
   3728	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
   3729
   3730	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
   3731
   3732	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
   3733
   3734	bsr.l		fout			# store the default result
   3735
   3736	bra.b		finex_exit
   3737
   3738#########################################################################
   3739# XDEF ****************************************************************	#
   3740#	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
   3741#									#
   3742#	This handler should be the first code executed upon taking	#
   3743#	the FP DZ exception in an operating system.			#
   3744#									#
   3745# XREF ****************************************************************	#
   3746#	_imem_read_long() - read instruction longword from memory	#
   3747#	fix_skewed_ops() - adjust fsave operand				#
   3748#	_real_dz() - "callout" exit point from FP DZ handler		#
   3749#									#
   3750# INPUT ***************************************************************	#
   3751#	- The system stack contains the FP DZ exception stack.		#
   3752#	- The fsave frame contains the source operand.			#
   3753#									#
   3754# OUTPUT **************************************************************	#
   3755#	- The system stack contains the FP DZ exception stack.		#
   3756#	- The fsave frame contains the adjusted source operand.		#
   3757#									#
   3758# ALGORITHM ***********************************************************	#
   3759#	In a system where the DZ exception is enabled, the goal is to	#
   3760# get to the handler specified at _real_dz(). But, on the 060, when the	#
   3761# exception is taken, the input operand in the fsave state frame may	#
   3762# be incorrect for some cases and need to be adjusted. So, this package	#
   3763# adjusts the operand using fix_skewed_ops() and then branches to	#
   3764# _real_dz().								#
   3765#									#
   3766#########################################################################
   3767
   3768	global		_fpsp_dz
   3769_fpsp_dz:
   3770
   3771	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3772
   3773	fsave		FP_SRC(%a6)		# grab the "busy" frame
   3774
   3775	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3776	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   3777	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
   3778
   3779# the FPIAR holds the "current PC" of the faulting instruction
   3780	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   3781
   3782	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3783	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3784	bsr.l		_imem_read_long		# fetch the instruction words
   3785	mov.l		%d0,EXC_OPWORD(%a6)
   3786
   3787##############################################################################
   3788
   3789
   3790# here, we simply see if the operand in the fsave frame needs to be "unskewed".
   3791# this would be the case for opclass two operations with a source zero
   3792# in the sgl or dbl format.
   3793	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
   3794	bsr.l		fix_skewed_ops		# fix src op
   3795
   3796fdz_exit:
   3797	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   3798	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   3799	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3800
   3801	frestore	FP_SRC(%a6)
   3802
   3803	unlk		%a6
   3804	bra.l		_real_dz
   3805
   3806#########################################################################
   3807# XDEF ****************************************************************	#
   3808#	_fpsp_fline(): 060FPSP entry point for "Line F emulator" exc.	#
   3809#									#
   3810#	This handler should be the first code executed upon taking the	#
   3811#	"Line F Emulator" exception in an operating system.		#
   3812#									#
   3813# XREF ****************************************************************	#
   3814#	_fpsp_unimp() - handle "FP Unimplemented" exceptions		#
   3815#	_real_fpu_disabled() - handle "FPU disabled" exceptions		#
   3816#	_real_fline() - handle "FLINE" exceptions			#
   3817#	_imem_read_long() - read instruction longword			#
   3818#									#
   3819# INPUT ***************************************************************	#
   3820#	- The system stack contains a "Line F Emulator" exception	#
   3821#	  stack frame.							#
   3822#									#
   3823# OUTPUT **************************************************************	#
   3824#	- The system stack is unchanged					#
   3825#									#
   3826# ALGORITHM ***********************************************************	#
   3827#	When a "Line F Emulator" exception occurs, there are 3 possible	#
   3828# exception types, denoted by the exception stack frame format number:	#
   3829#	(1) FPU unimplemented instruction (6 word stack frame)		#
   3830#	(2) FPU disabled (8 word stack frame)				#
   3831#	(3) Line F (4 word stack frame)					#
   3832#									#
   3833#	This module determines which and forks the flow off to the	#
   3834# appropriate "callout" (for "disabled" and "Line F") or to the		#
   3835# correct emulation code (for "FPU unimplemented").			#
   3836#	This code also must check for "fmovecr" instructions w/ a	#
   3837# non-zero <ea> field. These may get flagged as "Line F" but should	#
   3838# really be flagged as "FPU Unimplemented". (This is a "feature" on	#
   3839# the '060.								#
   3840#									#
   3841#########################################################################
   3842
   3843	global		_fpsp_fline
   3844_fpsp_fline:
   3845
   3846# check to see if this exception is a "FP Unimplemented Instruction"
   3847# exception. if so, branch directly to that handler's entry point.
   3848	cmpi.w		0x6(%sp),&0x202c
   3849	beq.l		_fpsp_unimp
   3850
   3851# check to see if the FPU is disabled. if so, jump to the OS entry
   3852# point for that condition.
   3853	cmpi.w		0x6(%sp),&0x402c
   3854	beq.l		_real_fpu_disabled
   3855
   3856# the exception was an "F-Line Illegal" exception. we check to see
   3857# if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
   3858# so, convert the F-Line exception stack frame to an FP Unimplemented
   3859# Instruction exception stack frame else branch to the OS entry
   3860# point for the F-Line exception handler.
   3861	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   3862
   3863	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   3864
   3865	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
   3866	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   3867	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   3868	bsr.l		_imem_read_long		# fetch instruction words
   3869
   3870	bfextu		%d0{&0:&10},%d1		# is it an fmovecr?
   3871	cmpi.w		%d1,&0x03c8
   3872	bne.b		fline_fline		# no
   3873
   3874	bfextu		%d0{&16:&6},%d1		# is it an fmovecr?
   3875	cmpi.b		%d1,&0x17
   3876	bne.b		fline_fline		# no
   3877
   3878# it's an fmovecr w/ a non-zero <ea> that has entered through
   3879# the F-Line Illegal exception.
   3880# so, we need to convert the F-Line exception stack frame into an
   3881# FP Unimplemented Instruction stack frame and jump to that entry
   3882# point.
   3883#
   3884# but, if the FPU is disabled, then we need to jump to the FPU disabled
   3885# entry point.
   3886	movc		%pcr,%d0
   3887	btst		&0x1,%d0
   3888	beq.b		fline_fmovcr
   3889
   3890	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3891
   3892	unlk		%a6
   3893
   3894	sub.l		&0x8,%sp		# make room for "Next PC", <ea>
   3895	mov.w		0x8(%sp),(%sp)
   3896	mov.l		0xa(%sp),0x2(%sp)	# move "Current PC"
   3897	mov.w		&0x402c,0x6(%sp)
   3898	mov.l		0x2(%sp),0xc(%sp)
   3899	addq.l		&0x4,0x2(%sp)		# set "Next PC"
   3900
   3901	bra.l		_real_fpu_disabled
   3902
   3903fline_fmovcr:
   3904	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3905
   3906	unlk		%a6
   3907
   3908	fmov.l		0x2(%sp),%fpiar		# set current PC
   3909	addq.l		&0x4,0x2(%sp)		# set Next PC
   3910
   3911	mov.l		(%sp),-(%sp)
   3912	mov.l		0x8(%sp),0x4(%sp)
   3913	mov.b		&0x20,0x6(%sp)
   3914
   3915	bra.l		_fpsp_unimp
   3916
   3917fline_fline:
   3918	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   3919
   3920	unlk		%a6
   3921
   3922	bra.l		_real_fline
   3923
   3924#########################################################################
   3925# XDEF ****************************************************************	#
   3926#	_fpsp_unimp(): 060FPSP entry point for FP "Unimplemented	#
   3927#		       Instruction" exception.				#
   3928#									#
   3929#	This handler should be the first code executed upon taking the	#
   3930#	FP Unimplemented Instruction exception in an operating system.	#
   3931#									#
   3932# XREF ****************************************************************	#
   3933#	_imem_read_{word,long}() - read instruction word/longword	#
   3934#	load_fop() - load src/dst ops from memory and/or FP regfile	#
   3935#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
   3936#	tbl_trans - addr of table of emulation routines for trnscndls	#
   3937#	_real_access() - "callout" for access error exception		#
   3938#	_fpsp_done() - "callout" for exit; work all done		#
   3939#	_real_trace() - "callout" for Trace enabled exception		#
   3940#	smovcr() - emulate "fmovecr" instruction			#
   3941#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
   3942#	_ftrapcc() - emulate an "ftrapcc" instruction			#
   3943#	_fdbcc() - emulate an "fdbcc" instruction			#
   3944#	_fscc() - emulate an "fscc" instruction				#
   3945#	_real_trap() - "callout" for Trap exception			#
   3946#	_real_bsun() - "callout" for enabled Bsun exception		#
   3947#									#
   3948# INPUT ***************************************************************	#
   3949#	- The system stack contains the "Unimplemented Instr" stk frame	#
   3950#									#
   3951# OUTPUT **************************************************************	#
   3952#	If access error:						#
   3953#	- The system stack is changed to an access error stack frame	#
   3954#	If Trace exception enabled:					#
   3955#	- The system stack is changed to a Trace exception stack frame	#
   3956#	Else: (normal case)						#
   3957#	- Correct result has been stored as appropriate			#
   3958#									#
   3959# ALGORITHM ***********************************************************	#
   3960#	There are two main cases of instructions that may enter here to	#
   3961# be emulated: (1) the FPgen instructions, most of which were also	#
   3962# unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc".	#
   3963#	For the first set, this handler calls the routine load_fop()	#
   3964# to load the source and destination (for dyadic) operands to be used	#
   3965# for instruction emulation. The correct emulation routine is then	#
   3966# chosen by decoding the instruction type and indexing into an		#
   3967# emulation subroutine index table. After emulation returns, this	#
   3968# handler checks to see if an exception should occur as a result of the #
   3969# FP instruction emulation. If so, then an FP exception of the correct	#
   3970# type is inserted into the FPU state frame using the "frestore"	#
   3971# instruction before exiting through _fpsp_done(). In either the	#
   3972# exceptional or non-exceptional cases, we must check to see if the	#
   3973# Trace exception is enabled. If so, then we must create a Trace	#
   3974# exception frame from the current exception frame and exit through	#
   3975# _real_trace().							#
   3976#	For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines	#
   3977# _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three	#
   3978# may flag that a BSUN exception should be taken. If so, then the	#
   3979# current exception stack frame is converted into a BSUN exception	#
   3980# stack frame and an exit is made through _real_bsun(). If the		#
   3981# instruction was "ftrapcc" and a Trap exception should result, a Trap	#
   3982# exception stack frame is created from the current frame and an exit	#
   3983# is made through _real_trap(). If a Trace exception is pending, then	#
   3984# a Trace exception frame is created from the current frame and a jump	#
   3985# is made to _real_trace(). Finally, if none of these conditions exist,	#
   3986# then the handler exits though the callout _fpsp_done().		#
   3987#									#
   3988#	In any of the above scenarios, if a _mem_read() or _mem_write()	#
   3989# "callout" returns a failing value, then an access error stack frame	#
   3990# is created from the current stack frame and an exit is made through	#
   3991# _real_access().							#
   3992#									#
   3993#########################################################################
   3994
   3995#
   3996# FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
   3997#
   3998#	*****************
   3999#	*		* => <ea> of fp unimp instr.
   4000#	-      EA	-
   4001#	*		*
   4002#	*****************
   4003#	* 0x2 *  0x02c	* => frame format and vector offset(vector #11)
   4004#	*****************
   4005#	*		*
   4006#	-    Next PC	- => PC of instr to execute after exc handling
   4007#	*		*
   4008#	*****************
   4009#	*      SR	* => SR at the time the exception was taken
   4010#	*****************
   4011#
   4012# Note: the !NULL bit does not get set in the fsave frame when the
   4013# machine encounters an fp unimp exception. Therefore, it must be set
   4014# before leaving this handler.
   4015#
   4016	global		_fpsp_unimp
   4017_fpsp_unimp:
   4018
   4019	link.w		%a6,&-LOCAL_SIZE	# init stack frame
   4020
   4021	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
   4022	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
   4023	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1
   4024
   4025	btst		&0x5,EXC_SR(%a6)	# user mode exception?
   4026	bne.b		funimp_s		# no; supervisor mode
   4027
   4028# save the value of the user stack pointer onto the stack frame
   4029funimp_u:
   4030	mov.l		%usp,%a0		# fetch user stack pointer
   4031	mov.l		%a0,EXC_A7(%a6)		# store in stack frame
   4032	bra.b		funimp_cont
   4033
   4034# store the value of the supervisor stack pointer BEFORE the exc occurred.
   4035# old_sp is address just above stacked effective address.
   4036funimp_s:
   4037	lea		4+EXC_EA(%a6),%a0	# load old a7'
   4038	mov.l		%a0,EXC_A7(%a6)		# store a7'
   4039	mov.l		%a0,OLD_A7(%a6)		# make a copy
   4040
   4041funimp_cont:
   4042
   4043# the FPIAR holds the "current PC" of the faulting instruction.
   4044	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
   4045
   4046	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4047	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4048	bsr.l		_imem_read_long		# fetch the instruction words
   4049	mov.l		%d0,EXC_OPWORD(%a6)
   4050
   4051############################################################################
   4052
   4053	fmov.l		&0x0,%fpcr		# clear FPCR
   4054	fmov.l		&0x0,%fpsr		# clear FPSR
   4055
   4056	clr.b		SPCOND_FLG(%a6)		# clear "special case" flag
   4057
   4058# Divide the fp instructions into 8 types based on the TYPE field in
   4059# bits 6-8 of the opword(classes 6,7 are undefined).
   4060# (for the '060, only two types  can take this exception)
   4061#	bftst		%d0{&7:&3}		# test TYPE
   4062	btst		&22,%d0			# type 0 or 1 ?
   4063	bne.w		funimp_misc		# type 1
   4064
   4065#########################################
   4066# TYPE == 0: General instructions	#
   4067#########################################
   4068funimp_gen:
   4069
   4070	clr.b		STORE_FLG(%a6)		# clear "store result" flag
   4071
   4072# clear the ccode byte and exception status byte
   4073	andi.l		&0x00ff00ff,USER_FPSR(%a6)
   4074
   4075	bfextu		%d0{&16:&6},%d1		# extract upper 6 of cmdreg
   4076	cmpi.b		%d1,&0x17		# is op an fmovecr?
   4077	beq.w		funimp_fmovcr		# yes
   4078
   4079funimp_gen_op:
   4080	bsr.l		_load_fop		# load
   4081
   4082	clr.l		%d0
   4083	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode
   4084
   4085	mov.b		1+EXC_CMDREG(%a6),%d1
   4086	andi.w		&0x003f,%d1		# extract extension bits
   4087	lsl.w		&0x3,%d1		# shift right 3 bits
   4088	or.b		STAG(%a6),%d1		# insert src optag bits
   4089
   4090	lea		FP_DST(%a6),%a1		# pass dst ptr in a1
   4091	lea		FP_SRC(%a6),%a0		# pass src ptr in a0
   4092
   4093	mov.w		(tbl_trans.w,%pc,%d1.w*2),%d1
   4094	jsr		(tbl_trans.w,%pc,%d1.w*1) # emulate
   4095
   4096funimp_fsave:
   4097	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
   4098	bne.w		funimp_ena		# some are enabled
   4099
   4100funimp_store:
   4101	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn
   4102	bsr.l		store_fpreg		# store result to fp regfile
   4103
   4104funimp_gen_exit:
   4105	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   4106	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   4107	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   4108
   4109funimp_gen_exit_cmp:
   4110	cmpi.b		SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ?
   4111	beq.b		funimp_gen_exit_a7	# yes
   4112
   4113	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ?
   4114	beq.b		funimp_gen_exit_a7	# yes
   4115
   4116funimp_gen_exit_cont:
   4117	unlk		%a6
   4118
   4119funimp_gen_exit_cont2:
   4120	btst		&0x7,(%sp)		# is trace on?
   4121	beq.l		_fpsp_done		# no
   4122
   4123# this catches a problem with the case where an exception will be re-inserted
   4124# into the machine. the frestore has already been executed...so, the fmov.l
   4125# alone of the control register would trigger an unwanted exception.
   4126# until I feel like fixing this, we'll sidestep the exception.
   4127	fsave		-(%sp)
   4128	fmov.l		%fpiar,0x14(%sp)	# "Current PC" is in FPIAR
   4129	frestore	(%sp)+
   4130	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x24
   4131	bra.l		_real_trace
   4132
   4133funimp_gen_exit_a7:
   4134	btst		&0x5,EXC_SR(%a6)	# supervisor or user mode?
   4135	bne.b		funimp_gen_exit_a7_s	# supervisor
   4136
   4137	mov.l		%a0,-(%sp)
   4138	mov.l		EXC_A7(%a6),%a0
   4139	mov.l		%a0,%usp
   4140	mov.l		(%sp)+,%a0
   4141	bra.b		funimp_gen_exit_cont
   4142
   4143# if the instruction was executed from supervisor mode and the addressing
   4144# mode was (a7)+, then the stack frame for the rte must be shifted "up"
   4145# "n" bytes where "n" is the size of the src operand type.
   4146# f<op>.{b,w,l,s,d,x,p}
   4147funimp_gen_exit_a7_s:
   4148	mov.l		%d0,-(%sp)		# save d0
   4149	mov.l		EXC_A7(%a6),%d0		# load new a7'
   4150	sub.l		OLD_A7(%a6),%d0		# subtract old a7'
   4151	mov.l		0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame
   4152	mov.l		EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame
   4153	mov.w		%d0,EXC_SR(%a6)		# store incr number
   4154	mov.l		(%sp)+,%d0		# restore d0
   4155
   4156	unlk		%a6
   4157
   4158	add.w		(%sp),%sp		# stack frame shifted
   4159	bra.b		funimp_gen_exit_cont2
   4160
   4161######################
   4162# fmovecr.x #ccc,fpn #
   4163######################
   4164funimp_fmovcr:
   4165	clr.l		%d0
   4166	mov.b		FPCR_MODE(%a6),%d0
   4167	mov.b		1+EXC_CMDREG(%a6),%d1
   4168	andi.l		&0x0000007f,%d1		# pass rom offset in d1
   4169	bsr.l		smovcr
   4170	bra.w		funimp_fsave
   4171
   4172#########################################################################
   4173
   4174#
   4175# the user has enabled some exceptions. we figure not to see this too
   4176# often so that's why it gets lower priority.
   4177#
   4178funimp_ena:
   4179
   4180# was an exception set that was also enabled?
   4181	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled and set
   4182	bfffo		%d0{&24:&8},%d0		# find highest priority exception
   4183	bne.b		funimp_exc		# at least one was set
   4184
   4185# no exception that was enabled was set BUT if we got an exact overflow
   4186# and overflow wasn't enabled but inexact was (yech!) then this is
   4187# an inexact exception; otherwise, return to normal non-exception flow.
   4188	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
   4189	beq.w		funimp_store		# no; return to normal flow
   4190
   4191# the overflow w/ exact result happened but was inexact set in the FPCR?
   4192funimp_ovfl:
   4193	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
   4194	beq.w		funimp_store		# no; return to normal flow
   4195	bra.b		funimp_exc_ovfl		# yes
   4196
   4197# some exception happened that was actually enabled.
   4198# we'll insert this new exception into the FPU and then return.
   4199funimp_exc:
   4200	subi.l		&24,%d0			# fix offset to be 0-8
   4201	cmpi.b		%d0,&0x6		# is exception INEX?
   4202	bne.b		funimp_exc_force	# no
   4203
   4204# the enabled exception was inexact. so, if it occurs with an overflow
   4205# or underflow that was disabled, then we have to force an overflow or
   4206# underflow frame. the eventual overflow or underflow handler will see that
   4207# it's actually an inexact and act appropriately. this is the only easy
   4208# way to have the EXOP available for the enabled inexact handler when
   4209# a disabled overflow or underflow has also happened.
   4210	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
   4211	bne.b		funimp_exc_ovfl		# yes
   4212	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
   4213	bne.b		funimp_exc_unfl		# yes
   4214
   4215# force the fsave exception status bits to signal an exception of the
   4216# appropriate type. don't forget to "skew" the source operand in case we
   4217# "unskewed" the one the hardware initially gave us.
   4218funimp_exc_force:
   4219	mov.l		%d0,-(%sp)		# save d0
   4220	bsr.l		funimp_skew		# check for special case
   4221	mov.l		(%sp)+,%d0		# restore d0
   4222	mov.w		(tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
   4223	bra.b		funimp_gen_exit2	# exit with frestore
   4224
   4225tbl_funimp_except:
   4226	short		0xe002, 0xe006, 0xe004, 0xe005
   4227	short		0xe003, 0xe002, 0xe001, 0xe001
   4228
   4229# insert an overflow frame
   4230funimp_exc_ovfl:
   4231	bsr.l		funimp_skew		# check for special case
   4232	mov.w		&0xe005,2+FP_SRC(%a6)
   4233	bra.b		funimp_gen_exit2
   4234
   4235# insert an underflow frame
   4236funimp_exc_unfl:
   4237	bsr.l		funimp_skew		# check for special case
   4238	mov.w		&0xe003,2+FP_SRC(%a6)
   4239
   4240# this is the general exit point for an enabled exception that will be
   4241# restored into the machine for the instruction just emulated.
   4242funimp_gen_exit2:
   4243	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   4244	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   4245	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   4246
   4247	frestore	FP_SRC(%a6)		# insert exceptional status
   4248
   4249	bra.w		funimp_gen_exit_cmp
   4250
   4251############################################################################
   4252
   4253#
   4254# TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
   4255#
   4256# These instructions were implemented on the '881/2 and '040 in hardware but
   4257# are emulated in software on the '060.
   4258#
   4259funimp_misc:
   4260	bfextu		%d0{&10:&3},%d1		# extract mode field
   4261	cmpi.b		%d1,&0x1		# is it an fdb<cc>?
   4262	beq.w		funimp_fdbcc		# yes
   4263	cmpi.b		%d1,&0x7		# is it an fs<cc>?
   4264	bne.w		funimp_fscc		# yes
   4265	bfextu		%d0{&13:&3},%d1
   4266	cmpi.b		%d1,&0x2		# is it an fs<cc>?
   4267	blt.w		funimp_fscc		# yes
   4268
   4269#########################
   4270# ftrap<cc>		#
   4271# ftrap<cc>.w #<data>	#
   4272# ftrap<cc>.l #<data>	#
   4273#########################
   4274funimp_ftrapcc:
   4275
   4276	bsr.l		_ftrapcc		# FTRAP<cc>()
   4277
   4278	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
   4279	beq.w		funimp_bsun		# yes
   4280
   4281	cmpi.b		SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur?
   4282	bne.w		funimp_done		# no
   4283
   4284#	 FP UNIMP FRAME		   TRAP  FRAME
   4285#	*****************	*****************
   4286#	**    <EA>     **	**  Current PC **
   4287#	*****************	*****************
   4288#	* 0x2 *  0x02c	*	* 0x2 *  0x01c  *
   4289#	*****************	*****************
   4290#	**   Next PC   **	**   Next PC   **
   4291#	*****************	*****************
   4292#	*      SR	*	*      SR	*
   4293#	*****************	*****************
   4294#	    (6 words)		    (6 words)
   4295#
   4296# the ftrapcc instruction should take a trap. so, here we must create a
   4297# trap stack frame from an unimplemented fp instruction stack frame and
   4298# jump to the user supplied entry point for the trap exception
   4299funimp_ftrapcc_tp:
   4300	mov.l		USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC
   4301	mov.w		&0x201c,EXC_VOFF(%a6)	# Vector Offset = 0x01c
   4302
   4303	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   4304	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   4305	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   4306
   4307	unlk		%a6
   4308	bra.l		_real_trap
   4309
   4310#########################
   4311# fdb<cc> Dn,<label>	#
   4312#########################
   4313funimp_fdbcc:
   4314
   4315	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
   4316	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
   4317	bsr.l		_imem_read_word		# read displacement
   4318
   4319	tst.l		%d1			# did ifetch fail?
   4320	bne.w		funimp_iacc		# yes
   4321
   4322	ext.l		%d0			# sign extend displacement
   4323
   4324	bsr.l		_fdbcc			# FDB<cc>()
   4325
   4326	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
   4327	beq.w		funimp_bsun
   4328
   4329	bra.w		funimp_done		# branch to finish
   4330
   4331#################
   4332# fs<cc>.b <ea>	#
   4333#################
   4334funimp_fscc:
   4335
   4336	bsr.l		_fscc			# FS<cc>()
   4337
   4338# I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
   4339# does not need to update "An" before taking a bsun exception.
   4340	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
   4341	beq.w		funimp_bsun
   4342
   4343	btst		&0x5,EXC_SR(%a6)	# yes; is it a user mode exception?
   4344	bne.b		funimp_fscc_s		# no
   4345
   4346funimp_fscc_u:
   4347	mov.l		EXC_A7(%a6),%a0		# yes; set new USP
   4348	mov.l		%a0,%usp
   4349	bra.w		funimp_done		# branch to finish
   4350
   4351# remember, I'm assuming that post-increment is bogus...(it IS!!!)
   4352# so, the least significant WORD of the stacked effective address got
   4353# overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
   4354# so that the rte will work correctly without destroying the result.
   4355# even though the operation size is byte, the stack ptr is decr by 2.
   4356#
   4357# remember, also, this instruction may be traced.
   4358funimp_fscc_s:
   4359	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was a7 modified?
   4360	bne.w		funimp_done		# no
   4361
   4362	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   4363	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   4364	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   4365
   4366	unlk		%a6
   4367
   4368	btst		&0x7,(%sp)		# is trace enabled?
   4369	bne.b		funimp_fscc_s_trace	# yes
   4370
   4371	subq.l		&0x2,%sp
   4372	mov.l		0x2(%sp),(%sp)		# shift SR,hi(PC) "down"
   4373	mov.l		0x6(%sp),0x4(%sp)	# shift lo(PC),voff "down"
   4374	bra.l		_fpsp_done
   4375
   4376funimp_fscc_s_trace:
   4377	subq.l		&0x2,%sp
   4378	mov.l		0x2(%sp),(%sp)		# shift SR,hi(PC) "down"
   4379	mov.w		0x6(%sp),0x4(%sp)	# shift lo(PC)
   4380	mov.w		&0x2024,0x6(%sp)	# fmt/voff = $2024
   4381	fmov.l		%fpiar,0x8(%sp)		# insert "current PC"
   4382
   4383	bra.l		_real_trace
   4384
   4385#
   4386# The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
   4387# the fp unimplemented instruction exception stack frame into a bsun stack frame,
   4388# restore a bsun exception into the machine, and branch to the user
   4389# supplied bsun hook.
   4390#
   4391#	 FP UNIMP FRAME		   BSUN FRAME
   4392#	*****************	*****************
   4393#	**    <EA>     **	* 0x0 * 0x0c0	*
   4394#	*****************	*****************
   4395#	* 0x2 *  0x02c  *	** Current PC  **
   4396#	*****************	*****************
   4397#	**   Next PC   **	*      SR	*
   4398#	*****************	*****************
   4399#	*      SR	*	    (4 words)
   4400#	*****************
   4401#	    (6 words)
   4402#
   4403funimp_bsun:
   4404	mov.w		&0x00c0,2+EXC_EA(%a6)	# Fmt = 0x0; Vector Offset = 0x0c0
   4405	mov.l		USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC
   4406	mov.w		EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up"
   4407
   4408	mov.w		&0xe000,2+FP_SRC(%a6)	# bsun exception enabled
   4409
   4410	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   4411	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   4412	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   4413
   4414	frestore	FP_SRC(%a6)		# restore bsun exception
   4415
   4416	unlk		%a6
   4417
   4418	addq.l		&0x4,%sp		# erase sludge
   4419
   4420	bra.l		_real_bsun		# branch to user bsun hook
   4421
   4422#
   4423# all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
   4424# and return.
   4425#
   4426# as usual, we have to check for trace mode being on here. since instructions
   4427# modifying the supervisor stack frame don't pass through here, this is a
   4428# relatively easy task.
   4429#
   4430funimp_done:
   4431	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
   4432	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   4433	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   4434
   4435	unlk		%a6
   4436
   4437	btst		&0x7,(%sp)		# is trace enabled?
   4438	bne.b		funimp_trace		# yes
   4439
   4440	bra.l		_fpsp_done
   4441
   4442#	 FP UNIMP FRAME		  TRACE  FRAME
   4443#	*****************	*****************
   4444#	**    <EA>     **	**  Current PC **
   4445#	*****************	*****************
   4446#	* 0x2 *  0x02c	*	* 0x2 *  0x024  *
   4447#	*****************	*****************
   4448#	**   Next PC   **	**   Next PC   **
   4449#	*****************	*****************
   4450#	*      SR	*	*      SR	*
   4451#	*****************	*****************
   4452#	    (6 words)		    (6 words)
   4453#
   4454# the fscc instruction should take a trace trap. so, here we must create a
   4455# trace stack frame from an unimplemented fp instruction stack frame and
   4456# jump to the user supplied entry point for the trace exception
   4457funimp_trace:
   4458	fmov.l		%fpiar,0x8(%sp)		# current PC is in fpiar
   4459	mov.b		&0x24,0x7(%sp)		# vector offset = 0x024
   4460
   4461	bra.l		_real_trace
   4462
   4463################################################################
   4464
   4465	global		tbl_trans
   4466	swbeg		&0x1c0
   4467tbl_trans:
   4468	short		tbl_trans - tbl_trans	# $00-0 fmovecr all
   4469	short		tbl_trans - tbl_trans	# $00-1 fmovecr all
   4470	short		tbl_trans - tbl_trans	# $00-2 fmovecr all
   4471	short		tbl_trans - tbl_trans	# $00-3 fmovecr all
   4472	short		tbl_trans - tbl_trans	# $00-4 fmovecr all
   4473	short		tbl_trans - tbl_trans	# $00-5 fmovecr all
   4474	short		tbl_trans - tbl_trans	# $00-6 fmovecr all
   4475	short		tbl_trans - tbl_trans	# $00-7 fmovecr all
   4476
   4477	short		tbl_trans - tbl_trans	# $01-0 fint norm
   4478	short		tbl_trans - tbl_trans	# $01-1 fint zero
   4479	short		tbl_trans - tbl_trans	# $01-2 fint inf
   4480	short		tbl_trans - tbl_trans	# $01-3 fint qnan
   4481	short		tbl_trans - tbl_trans	# $01-5 fint denorm
   4482	short		tbl_trans - tbl_trans	# $01-4 fint snan
   4483	short		tbl_trans - tbl_trans	# $01-6 fint unnorm
   4484	short		tbl_trans - tbl_trans	# $01-7 ERROR
   4485
   4486	short		ssinh	 - tbl_trans	# $02-0 fsinh norm
   4487	short		src_zero - tbl_trans	# $02-1 fsinh zero
   4488	short		src_inf	 - tbl_trans	# $02-2 fsinh inf
   4489	short		src_qnan - tbl_trans	# $02-3 fsinh qnan
   4490	short		ssinhd	 - tbl_trans	# $02-5 fsinh denorm
   4491	short		src_snan - tbl_trans	# $02-4 fsinh snan
   4492	short		tbl_trans - tbl_trans	# $02-6 fsinh unnorm
   4493	short		tbl_trans - tbl_trans	# $02-7 ERROR
   4494
   4495	short		tbl_trans - tbl_trans	# $03-0 fintrz norm
   4496	short		tbl_trans - tbl_trans	# $03-1 fintrz zero
   4497	short		tbl_trans - tbl_trans	# $03-2 fintrz inf
   4498	short		tbl_trans - tbl_trans	# $03-3 fintrz qnan
   4499	short		tbl_trans - tbl_trans	# $03-5 fintrz denorm
   4500	short		tbl_trans - tbl_trans	# $03-4 fintrz snan
   4501	short		tbl_trans - tbl_trans	# $03-6 fintrz unnorm
   4502	short		tbl_trans - tbl_trans	# $03-7 ERROR
   4503
   4504	short		tbl_trans - tbl_trans	# $04-0 fsqrt norm
   4505	short		tbl_trans - tbl_trans	# $04-1 fsqrt zero
   4506	short		tbl_trans - tbl_trans	# $04-2 fsqrt inf
   4507	short		tbl_trans - tbl_trans	# $04-3 fsqrt qnan
   4508	short		tbl_trans - tbl_trans	# $04-5 fsqrt denorm
   4509	short		tbl_trans - tbl_trans	# $04-4 fsqrt snan
   4510	short		tbl_trans - tbl_trans	# $04-6 fsqrt unnorm
   4511	short		tbl_trans - tbl_trans	# $04-7 ERROR
   4512
   4513	short		tbl_trans - tbl_trans	# $05-0 ERROR
   4514	short		tbl_trans - tbl_trans	# $05-1 ERROR
   4515	short		tbl_trans - tbl_trans	# $05-2 ERROR
   4516	short		tbl_trans - tbl_trans	# $05-3 ERROR
   4517	short		tbl_trans - tbl_trans	# $05-4 ERROR
   4518	short		tbl_trans - tbl_trans	# $05-5 ERROR
   4519	short		tbl_trans - tbl_trans	# $05-6 ERROR
   4520	short		tbl_trans - tbl_trans	# $05-7 ERROR
   4521
   4522	short		slognp1	 - tbl_trans	# $06-0 flognp1 norm
   4523	short		src_zero - tbl_trans	# $06-1 flognp1 zero
   4524	short		sopr_inf - tbl_trans	# $06-2 flognp1 inf
   4525	short		src_qnan - tbl_trans	# $06-3 flognp1 qnan
   4526	short		slognp1d - tbl_trans	# $06-5 flognp1 denorm
   4527	short		src_snan - tbl_trans	# $06-4 flognp1 snan
   4528	short		tbl_trans - tbl_trans	# $06-6 flognp1 unnorm
   4529	short		tbl_trans - tbl_trans	# $06-7 ERROR
   4530
   4531	short		tbl_trans - tbl_trans	# $07-0 ERROR
   4532	short		tbl_trans - tbl_trans	# $07-1 ERROR
   4533	short		tbl_trans - tbl_trans	# $07-2 ERROR
   4534	short		tbl_trans - tbl_trans	# $07-3 ERROR
   4535	short		tbl_trans - tbl_trans	# $07-4 ERROR
   4536	short		tbl_trans - tbl_trans	# $07-5 ERROR
   4537	short		tbl_trans - tbl_trans	# $07-6 ERROR
   4538	short		tbl_trans - tbl_trans	# $07-7 ERROR
   4539
   4540	short		setoxm1	 - tbl_trans	# $08-0 fetoxm1 norm
   4541	short		src_zero - tbl_trans	# $08-1 fetoxm1 zero
   4542	short		setoxm1i - tbl_trans	# $08-2 fetoxm1 inf
   4543	short		src_qnan - tbl_trans	# $08-3 fetoxm1 qnan
   4544	short		setoxm1d - tbl_trans	# $08-5 fetoxm1 denorm
   4545	short		src_snan - tbl_trans	# $08-4 fetoxm1 snan
   4546	short		tbl_trans - tbl_trans	# $08-6 fetoxm1 unnorm
   4547	short		tbl_trans - tbl_trans	# $08-7 ERROR
   4548
   4549	short		stanh	 - tbl_trans	# $09-0 ftanh norm
   4550	short		src_zero - tbl_trans	# $09-1 ftanh zero
   4551	short		src_one	 - tbl_trans	# $09-2 ftanh inf
   4552	short		src_qnan - tbl_trans	# $09-3 ftanh qnan
   4553	short		stanhd	 - tbl_trans	# $09-5 ftanh denorm
   4554	short		src_snan - tbl_trans	# $09-4 ftanh snan
   4555	short		tbl_trans - tbl_trans	# $09-6 ftanh unnorm
   4556	short		tbl_trans - tbl_trans	# $09-7 ERROR
   4557
   4558	short		satan	 - tbl_trans	# $0a-0 fatan norm
   4559	short		src_zero - tbl_trans	# $0a-1 fatan zero
   4560	short		spi_2	 - tbl_trans	# $0a-2 fatan inf
   4561	short		src_qnan - tbl_trans	# $0a-3 fatan qnan
   4562	short		satand	 - tbl_trans	# $0a-5 fatan denorm
   4563	short		src_snan - tbl_trans	# $0a-4 fatan snan
   4564	short		tbl_trans - tbl_trans	# $0a-6 fatan unnorm
   4565	short		tbl_trans - tbl_trans	# $0a-7 ERROR
   4566
   4567	short		tbl_trans - tbl_trans	# $0b-0 ERROR
   4568	short		tbl_trans - tbl_trans	# $0b-1 ERROR
   4569	short		tbl_trans - tbl_trans	# $0b-2 ERROR
   4570	short		tbl_trans - tbl_trans	# $0b-3 ERROR
   4571	short		tbl_trans - tbl_trans	# $0b-4 ERROR
   4572	short		tbl_trans - tbl_trans	# $0b-5 ERROR
   4573	short		tbl_trans - tbl_trans	# $0b-6 ERROR
   4574	short		tbl_trans - tbl_trans	# $0b-7 ERROR
   4575
   4576	short		sasin	 - tbl_trans	# $0c-0 fasin norm
   4577	short		src_zero - tbl_trans	# $0c-1 fasin zero
   4578	short		t_operr	 - tbl_trans	# $0c-2 fasin inf
   4579	short		src_qnan - tbl_trans	# $0c-3 fasin qnan
   4580	short		sasind	 - tbl_trans	# $0c-5 fasin denorm
   4581	short		src_snan - tbl_trans	# $0c-4 fasin snan
   4582	short		tbl_trans - tbl_trans	# $0c-6 fasin unnorm
   4583	short		tbl_trans - tbl_trans	# $0c-7 ERROR
   4584
   4585	short		satanh	 - tbl_trans	# $0d-0 fatanh norm
   4586	short		src_zero - tbl_trans	# $0d-1 fatanh zero
   4587	short		t_operr	 - tbl_trans	# $0d-2 fatanh inf
   4588	short		src_qnan - tbl_trans	# $0d-3 fatanh qnan
   4589	short		satanhd	 - tbl_trans	# $0d-5 fatanh denorm
   4590	short		src_snan - tbl_trans	# $0d-4 fatanh snan
   4591	short		tbl_trans - tbl_trans	# $0d-6 fatanh unnorm
   4592	short		tbl_trans - tbl_trans	# $0d-7 ERROR
   4593
   4594	short		ssin	 - tbl_trans	# $0e-0 fsin norm
   4595	short		src_zero - tbl_trans	# $0e-1 fsin zero
   4596	short		t_operr	 - tbl_trans	# $0e-2 fsin inf
   4597	short		src_qnan - tbl_trans	# $0e-3 fsin qnan
   4598	short		ssind	 - tbl_trans	# $0e-5 fsin denorm
   4599	short		src_snan - tbl_trans	# $0e-4 fsin snan
   4600	short		tbl_trans - tbl_trans	# $0e-6 fsin unnorm
   4601	short		tbl_trans - tbl_trans	# $0e-7 ERROR
   4602
   4603	short		stan	 - tbl_trans	# $0f-0 ftan norm
   4604	short		src_zero - tbl_trans	# $0f-1 ftan zero
   4605	short		t_operr	 - tbl_trans	# $0f-2 ftan inf
   4606	short		src_qnan - tbl_trans	# $0f-3 ftan qnan
   4607	short		stand	 - tbl_trans	# $0f-5 ftan denorm
   4608	short		src_snan - tbl_trans	# $0f-4 ftan snan
   4609	short		tbl_trans - tbl_trans	# $0f-6 ftan unnorm
   4610	short		tbl_trans - tbl_trans	# $0f-7 ERROR
   4611
   4612	short		setox	 - tbl_trans	# $10-0 fetox norm
   4613	short		ld_pone	 - tbl_trans	# $10-1 fetox zero
   4614	short		szr_inf	 - tbl_trans	# $10-2 fetox inf
   4615	short		src_qnan - tbl_trans	# $10-3 fetox qnan
   4616	short		setoxd	 - tbl_trans	# $10-5 fetox denorm
   4617	short		src_snan - tbl_trans	# $10-4 fetox snan
   4618	short		tbl_trans - tbl_trans	# $10-6 fetox unnorm
   4619	short		tbl_trans - tbl_trans	# $10-7 ERROR
   4620
   4621	short		stwotox	 - tbl_trans	# $11-0 ftwotox norm
   4622	short		ld_pone	 - tbl_trans	# $11-1 ftwotox zero
   4623	short		szr_inf	 - tbl_trans	# $11-2 ftwotox inf
   4624	short		src_qnan - tbl_trans	# $11-3 ftwotox qnan
   4625	short		stwotoxd - tbl_trans	# $11-5 ftwotox denorm
   4626	short		src_snan - tbl_trans	# $11-4 ftwotox snan
   4627	short		tbl_trans - tbl_trans	# $11-6 ftwotox unnorm
   4628	short		tbl_trans - tbl_trans	# $11-7 ERROR
   4629
   4630	short		stentox	 - tbl_trans	# $12-0 ftentox norm
   4631	short		ld_pone	 - tbl_trans	# $12-1 ftentox zero
   4632	short		szr_inf	 - tbl_trans	# $12-2 ftentox inf
   4633	short		src_qnan - tbl_trans	# $12-3 ftentox qnan
   4634	short		stentoxd - tbl_trans	# $12-5 ftentox denorm
   4635	short		src_snan - tbl_trans	# $12-4 ftentox snan
   4636	short		tbl_trans - tbl_trans	# $12-6 ftentox unnorm
   4637	short		tbl_trans - tbl_trans	# $12-7 ERROR
   4638
   4639	short		tbl_trans - tbl_trans	# $13-0 ERROR
   4640	short		tbl_trans - tbl_trans	# $13-1 ERROR
   4641	short		tbl_trans - tbl_trans	# $13-2 ERROR
   4642	short		tbl_trans - tbl_trans	# $13-3 ERROR
   4643	short		tbl_trans - tbl_trans	# $13-4 ERROR
   4644	short		tbl_trans - tbl_trans	# $13-5 ERROR
   4645	short		tbl_trans - tbl_trans	# $13-6 ERROR
   4646	short		tbl_trans - tbl_trans	# $13-7 ERROR
   4647
   4648	short		slogn	 - tbl_trans	# $14-0 flogn norm
   4649	short		t_dz2	 - tbl_trans	# $14-1 flogn zero
   4650	short		sopr_inf - tbl_trans	# $14-2 flogn inf
   4651	short		src_qnan - tbl_trans	# $14-3 flogn qnan
   4652	short		slognd	 - tbl_trans	# $14-5 flogn denorm
   4653	short		src_snan - tbl_trans	# $14-4 flogn snan
   4654	short		tbl_trans - tbl_trans	# $14-6 flogn unnorm
   4655	short		tbl_trans - tbl_trans	# $14-7 ERROR
   4656
   4657	short		slog10	 - tbl_trans	# $15-0 flog10 norm
   4658	short		t_dz2	 - tbl_trans	# $15-1 flog10 zero
   4659	short		sopr_inf - tbl_trans	# $15-2 flog10 inf
   4660	short		src_qnan - tbl_trans	# $15-3 flog10 qnan
   4661	short		slog10d	 - tbl_trans	# $15-5 flog10 denorm
   4662	short		src_snan - tbl_trans	# $15-4 flog10 snan
   4663	short		tbl_trans - tbl_trans	# $15-6 flog10 unnorm
   4664	short		tbl_trans - tbl_trans	# $15-7 ERROR
   4665
   4666	short		slog2	 - tbl_trans	# $16-0 flog2 norm
   4667	short		t_dz2	 - tbl_trans	# $16-1 flog2 zero
   4668	short		sopr_inf - tbl_trans	# $16-2 flog2 inf
   4669	short		src_qnan - tbl_trans	# $16-3 flog2 qnan
   4670	short		slog2d	 - tbl_trans	# $16-5 flog2 denorm
   4671	short		src_snan - tbl_trans	# $16-4 flog2 snan
   4672	short		tbl_trans - tbl_trans	# $16-6 flog2 unnorm
   4673	short		tbl_trans - tbl_trans	# $16-7 ERROR
   4674
   4675	short		tbl_trans - tbl_trans	# $17-0 ERROR
   4676	short		tbl_trans - tbl_trans	# $17-1 ERROR
   4677	short		tbl_trans - tbl_trans	# $17-2 ERROR
   4678	short		tbl_trans - tbl_trans	# $17-3 ERROR
   4679	short		tbl_trans - tbl_trans	# $17-4 ERROR
   4680	short		tbl_trans - tbl_trans	# $17-5 ERROR
   4681	short		tbl_trans - tbl_trans	# $17-6 ERROR
   4682	short		tbl_trans - tbl_trans	# $17-7 ERROR
   4683
   4684	short		tbl_trans - tbl_trans	# $18-0 fabs norm
   4685	short		tbl_trans - tbl_trans	# $18-1 fabs zero
   4686	short		tbl_trans - tbl_trans	# $18-2 fabs inf
   4687	short		tbl_trans - tbl_trans	# $18-3 fabs qnan
   4688	short		tbl_trans - tbl_trans	# $18-5 fabs denorm
   4689	short		tbl_trans - tbl_trans	# $18-4 fabs snan
   4690	short		tbl_trans - tbl_trans	# $18-6 fabs unnorm
   4691	short		tbl_trans - tbl_trans	# $18-7 ERROR
   4692
   4693	short		scosh	 - tbl_trans	# $19-0 fcosh norm
   4694	short		ld_pone	 - tbl_trans	# $19-1 fcosh zero
   4695	short		ld_pinf	 - tbl_trans	# $19-2 fcosh inf
   4696	short		src_qnan - tbl_trans	# $19-3 fcosh qnan
   4697	short		scoshd	 - tbl_trans	# $19-5 fcosh denorm
   4698	short		src_snan - tbl_trans	# $19-4 fcosh snan
   4699	short		tbl_trans - tbl_trans	# $19-6 fcosh unnorm
   4700	short		tbl_trans - tbl_trans	# $19-7 ERROR
   4701
   4702	short		tbl_trans - tbl_trans	# $1a-0 fneg norm
   4703	short		tbl_trans - tbl_trans	# $1a-1 fneg zero
   4704	short		tbl_trans - tbl_trans	# $1a-2 fneg inf
   4705	short		tbl_trans - tbl_trans	# $1a-3 fneg qnan
   4706	short		tbl_trans - tbl_trans	# $1a-5 fneg denorm
   4707	short		tbl_trans - tbl_trans	# $1a-4 fneg snan
   4708	short		tbl_trans - tbl_trans	# $1a-6 fneg unnorm
   4709	short		tbl_trans - tbl_trans	# $1a-7 ERROR
   4710
   4711	short		tbl_trans - tbl_trans	# $1b-0 ERROR
   4712	short		tbl_trans - tbl_trans	# $1b-1 ERROR
   4713	short		tbl_trans - tbl_trans	# $1b-2 ERROR
   4714	short		tbl_trans - tbl_trans	# $1b-3 ERROR
   4715	short		tbl_trans - tbl_trans	# $1b-4 ERROR
   4716	short		tbl_trans - tbl_trans	# $1b-5 ERROR
   4717	short		tbl_trans - tbl_trans	# $1b-6 ERROR
   4718	short		tbl_trans - tbl_trans	# $1b-7 ERROR
   4719
   4720	short		sacos	 - tbl_trans	# $1c-0 facos norm
   4721	short		ld_ppi2	 - tbl_trans	# $1c-1 facos zero
   4722	short		t_operr	 - tbl_trans	# $1c-2 facos inf
   4723	short		src_qnan - tbl_trans	# $1c-3 facos qnan
   4724	short		sacosd	 - tbl_trans	# $1c-5 facos denorm
   4725	short		src_snan - tbl_trans	# $1c-4 facos snan
   4726	short		tbl_trans - tbl_trans	# $1c-6 facos unnorm
   4727	short		tbl_trans - tbl_trans	# $1c-7 ERROR
   4728
   4729	short		scos	 - tbl_trans	# $1d-0 fcos norm
   4730	short		ld_pone	 - tbl_trans	# $1d-1 fcos zero
   4731	short		t_operr	 - tbl_trans	# $1d-2 fcos inf
   4732	short		src_qnan - tbl_trans	# $1d-3 fcos qnan
   4733	short		scosd	 - tbl_trans	# $1d-5 fcos denorm
   4734	short		src_snan - tbl_trans	# $1d-4 fcos snan
   4735	short		tbl_trans - tbl_trans	# $1d-6 fcos unnorm
   4736	short		tbl_trans - tbl_trans	# $1d-7 ERROR
   4737
   4738	short		sgetexp	 - tbl_trans	# $1e-0 fgetexp norm
   4739	short		src_zero - tbl_trans	# $1e-1 fgetexp zero
   4740	short		t_operr	 - tbl_trans	# $1e-2 fgetexp inf
   4741	short		src_qnan - tbl_trans	# $1e-3 fgetexp qnan
   4742	short		sgetexpd - tbl_trans	# $1e-5 fgetexp denorm
   4743	short		src_snan - tbl_trans	# $1e-4 fgetexp snan
   4744	short		tbl_trans - tbl_trans	# $1e-6 fgetexp unnorm
   4745	short		tbl_trans - tbl_trans	# $1e-7 ERROR
   4746
   4747	short		sgetman	 - tbl_trans	# $1f-0 fgetman norm
   4748	short		src_zero - tbl_trans	# $1f-1 fgetman zero
   4749	short		t_operr	 - tbl_trans	# $1f-2 fgetman inf
   4750	short		src_qnan - tbl_trans	# $1f-3 fgetman qnan
   4751	short		sgetmand - tbl_trans	# $1f-5 fgetman denorm
   4752	short		src_snan - tbl_trans	# $1f-4 fgetman snan
   4753	short		tbl_trans - tbl_trans	# $1f-6 fgetman unnorm
   4754	short		tbl_trans - tbl_trans	# $1f-7 ERROR
   4755
   4756	short		tbl_trans - tbl_trans	# $20-0 fdiv norm
   4757	short		tbl_trans - tbl_trans	# $20-1 fdiv zero
   4758	short		tbl_trans - tbl_trans	# $20-2 fdiv inf
   4759	short		tbl_trans - tbl_trans	# $20-3 fdiv qnan
   4760	short		tbl_trans - tbl_trans	# $20-5 fdiv denorm
   4761	short		tbl_trans - tbl_trans	# $20-4 fdiv snan
   4762	short		tbl_trans - tbl_trans	# $20-6 fdiv unnorm
   4763	short		tbl_trans - tbl_trans	# $20-7 ERROR
   4764
   4765	short		smod_snorm - tbl_trans	# $21-0 fmod norm
   4766	short		smod_szero - tbl_trans	# $21-1 fmod zero
   4767	short		smod_sinf - tbl_trans	# $21-2 fmod inf
   4768	short		sop_sqnan - tbl_trans	# $21-3 fmod qnan
   4769	short		smod_sdnrm - tbl_trans	# $21-5 fmod denorm
   4770	short		sop_ssnan - tbl_trans	# $21-4 fmod snan
   4771	short		tbl_trans - tbl_trans	# $21-6 fmod unnorm
   4772	short		tbl_trans - tbl_trans	# $21-7 ERROR
   4773
   4774	short		tbl_trans - tbl_trans	# $22-0 fadd norm
   4775	short		tbl_trans - tbl_trans	# $22-1 fadd zero
   4776	short		tbl_trans - tbl_trans	# $22-2 fadd inf
   4777	short		tbl_trans - tbl_trans	# $22-3 fadd qnan
   4778	short		tbl_trans - tbl_trans	# $22-5 fadd denorm
   4779	short		tbl_trans - tbl_trans	# $22-4 fadd snan
   4780	short		tbl_trans - tbl_trans	# $22-6 fadd unnorm
   4781	short		tbl_trans - tbl_trans	# $22-7 ERROR
   4782
   4783	short		tbl_trans - tbl_trans	# $23-0 fmul norm
   4784	short		tbl_trans - tbl_trans	# $23-1 fmul zero
   4785	short		tbl_trans - tbl_trans	# $23-2 fmul inf
   4786	short		tbl_trans - tbl_trans	# $23-3 fmul qnan
   4787	short		tbl_trans - tbl_trans	# $23-5 fmul denorm
   4788	short		tbl_trans - tbl_trans	# $23-4 fmul snan
   4789	short		tbl_trans - tbl_trans	# $23-6 fmul unnorm
   4790	short		tbl_trans - tbl_trans	# $23-7 ERROR
   4791
   4792	short		tbl_trans - tbl_trans	# $24-0 fsgldiv norm
   4793	short		tbl_trans - tbl_trans	# $24-1 fsgldiv zero
   4794	short		tbl_trans - tbl_trans	# $24-2 fsgldiv inf
   4795	short		tbl_trans - tbl_trans	# $24-3 fsgldiv qnan
   4796	short		tbl_trans - tbl_trans	# $24-5 fsgldiv denorm
   4797	short		tbl_trans - tbl_trans	# $24-4 fsgldiv snan
   4798	short		tbl_trans - tbl_trans	# $24-6 fsgldiv unnorm
   4799	short		tbl_trans - tbl_trans	# $24-7 ERROR
   4800
   4801	short		srem_snorm - tbl_trans	# $25-0 frem norm
   4802	short		srem_szero - tbl_trans	# $25-1 frem zero
   4803	short		srem_sinf - tbl_trans	# $25-2 frem inf
   4804	short		sop_sqnan - tbl_trans	# $25-3 frem qnan
   4805	short		srem_sdnrm - tbl_trans	# $25-5 frem denorm
   4806	short		sop_ssnan - tbl_trans	# $25-4 frem snan
   4807	short		tbl_trans - tbl_trans	# $25-6 frem unnorm
   4808	short		tbl_trans - tbl_trans	# $25-7 ERROR
   4809
   4810	short		sscale_snorm - tbl_trans # $26-0 fscale norm
   4811	short		sscale_szero - tbl_trans # $26-1 fscale zero
   4812	short		sscale_sinf - tbl_trans	# $26-2 fscale inf
   4813	short		sop_sqnan - tbl_trans	# $26-3 fscale qnan
   4814	short		sscale_sdnrm - tbl_trans # $26-5 fscale denorm
   4815	short		sop_ssnan - tbl_trans	# $26-4 fscale snan
   4816	short		tbl_trans - tbl_trans	# $26-6 fscale unnorm
   4817	short		tbl_trans - tbl_trans	# $26-7 ERROR
   4818
   4819	short		tbl_trans - tbl_trans	# $27-0 fsglmul norm
   4820	short		tbl_trans - tbl_trans	# $27-1 fsglmul zero
   4821	short		tbl_trans - tbl_trans	# $27-2 fsglmul inf
   4822	short		tbl_trans - tbl_trans	# $27-3 fsglmul qnan
   4823	short		tbl_trans - tbl_trans	# $27-5 fsglmul denorm
   4824	short		tbl_trans - tbl_trans	# $27-4 fsglmul snan
   4825	short		tbl_trans - tbl_trans	# $27-6 fsglmul unnorm
   4826	short		tbl_trans - tbl_trans	# $27-7 ERROR
   4827
   4828	short		tbl_trans - tbl_trans	# $28-0 fsub norm
   4829	short		tbl_trans - tbl_trans	# $28-1 fsub zero
   4830	short		tbl_trans - tbl_trans	# $28-2 fsub inf
   4831	short		tbl_trans - tbl_trans	# $28-3 fsub qnan
   4832	short		tbl_trans - tbl_trans	# $28-5 fsub denorm
   4833	short		tbl_trans - tbl_trans	# $28-4 fsub snan
   4834	short		tbl_trans - tbl_trans	# $28-6 fsub unnorm
   4835	short		tbl_trans - tbl_trans	# $28-7 ERROR
   4836
   4837	short		tbl_trans - tbl_trans	# $29-0 ERROR
   4838	short		tbl_trans - tbl_trans	# $29-1 ERROR
   4839	short		tbl_trans - tbl_trans	# $29-2 ERROR
   4840	short		tbl_trans - tbl_trans	# $29-3 ERROR
   4841	short		tbl_trans - tbl_trans	# $29-4 ERROR
   4842	short		tbl_trans - tbl_trans	# $29-5 ERROR
   4843	short		tbl_trans - tbl_trans	# $29-6 ERROR
   4844	short		tbl_trans - tbl_trans	# $29-7 ERROR
   4845
   4846	short		tbl_trans - tbl_trans	# $2a-0 ERROR
   4847	short		tbl_trans - tbl_trans	# $2a-1 ERROR
   4848	short		tbl_trans - tbl_trans	# $2a-2 ERROR
   4849	short		tbl_trans - tbl_trans	# $2a-3 ERROR
   4850	short		tbl_trans - tbl_trans	# $2a-4 ERROR
   4851	short		tbl_trans - tbl_trans	# $2a-5 ERROR
   4852	short		tbl_trans - tbl_trans	# $2a-6 ERROR
   4853	short		tbl_trans - tbl_trans	# $2a-7 ERROR
   4854
   4855	short		tbl_trans - tbl_trans	# $2b-0 ERROR
   4856	short		tbl_trans - tbl_trans	# $2b-1 ERROR
   4857	short		tbl_trans - tbl_trans	# $2b-2 ERROR
   4858	short		tbl_trans - tbl_trans	# $2b-3 ERROR
   4859	short		tbl_trans - tbl_trans	# $2b-4 ERROR
   4860	short		tbl_trans - tbl_trans	# $2b-5 ERROR
   4861	short		tbl_trans - tbl_trans	# $2b-6 ERROR
   4862	short		tbl_trans - tbl_trans	# $2b-7 ERROR
   4863
   4864	short		tbl_trans - tbl_trans	# $2c-0 ERROR
   4865	short		tbl_trans - tbl_trans	# $2c-1 ERROR
   4866	short		tbl_trans - tbl_trans	# $2c-2 ERROR
   4867	short		tbl_trans - tbl_trans	# $2c-3 ERROR
   4868	short		tbl_trans - tbl_trans	# $2c-4 ERROR
   4869	short		tbl_trans - tbl_trans	# $2c-5 ERROR
   4870	short		tbl_trans - tbl_trans	# $2c-6 ERROR
   4871	short		tbl_trans - tbl_trans	# $2c-7 ERROR
   4872
   4873	short		tbl_trans - tbl_trans	# $2d-0 ERROR
   4874	short		tbl_trans - tbl_trans	# $2d-1 ERROR
   4875	short		tbl_trans - tbl_trans	# $2d-2 ERROR
   4876	short		tbl_trans - tbl_trans	# $2d-3 ERROR
   4877	short		tbl_trans - tbl_trans	# $2d-4 ERROR
   4878	short		tbl_trans - tbl_trans	# $2d-5 ERROR
   4879	short		tbl_trans - tbl_trans	# $2d-6 ERROR
   4880	short		tbl_trans - tbl_trans	# $2d-7 ERROR
   4881
   4882	short		tbl_trans - tbl_trans	# $2e-0 ERROR
   4883	short		tbl_trans - tbl_trans	# $2e-1 ERROR
   4884	short		tbl_trans - tbl_trans	# $2e-2 ERROR
   4885	short		tbl_trans - tbl_trans	# $2e-3 ERROR
   4886	short		tbl_trans - tbl_trans	# $2e-4 ERROR
   4887	short		tbl_trans - tbl_trans	# $2e-5 ERROR
   4888	short		tbl_trans - tbl_trans	# $2e-6 ERROR
   4889	short		tbl_trans - tbl_trans	# $2e-7 ERROR
   4890
   4891	short		tbl_trans - tbl_trans	# $2f-0 ERROR
   4892	short		tbl_trans - tbl_trans	# $2f-1 ERROR
   4893	short		tbl_trans - tbl_trans	# $2f-2 ERROR
   4894	short		tbl_trans - tbl_trans	# $2f-3 ERROR
   4895	short		tbl_trans - tbl_trans	# $2f-4 ERROR
   4896	short		tbl_trans - tbl_trans	# $2f-5 ERROR
   4897	short		tbl_trans - tbl_trans	# $2f-6 ERROR
   4898	short		tbl_trans - tbl_trans	# $2f-7 ERROR
   4899
   4900	short		ssincos	 - tbl_trans	# $30-0 fsincos norm
   4901	short		ssincosz - tbl_trans	# $30-1 fsincos zero
   4902	short		ssincosi - tbl_trans	# $30-2 fsincos inf
   4903	short		ssincosqnan - tbl_trans	# $30-3 fsincos qnan
   4904	short		ssincosd - tbl_trans	# $30-5 fsincos denorm
   4905	short		ssincossnan - tbl_trans	# $30-4 fsincos snan
   4906	short		tbl_trans - tbl_trans	# $30-6 fsincos unnorm
   4907	short		tbl_trans - tbl_trans	# $30-7 ERROR
   4908
   4909	short		ssincos	 - tbl_trans	# $31-0 fsincos norm
   4910	short		ssincosz - tbl_trans	# $31-1 fsincos zero
   4911	short		ssincosi - tbl_trans	# $31-2 fsincos inf
   4912	short		ssincosqnan - tbl_trans	# $31-3 fsincos qnan
   4913	short		ssincosd - tbl_trans	# $31-5 fsincos denorm
   4914	short		ssincossnan - tbl_trans	# $31-4 fsincos snan
   4915	short		tbl_trans - tbl_trans	# $31-6 fsincos unnorm
   4916	short		tbl_trans - tbl_trans	# $31-7 ERROR
   4917
   4918	short		ssincos	 - tbl_trans	# $32-0 fsincos norm
   4919	short		ssincosz - tbl_trans	# $32-1 fsincos zero
   4920	short		ssincosi - tbl_trans	# $32-2 fsincos inf
   4921	short		ssincosqnan - tbl_trans	# $32-3 fsincos qnan
   4922	short		ssincosd - tbl_trans	# $32-5 fsincos denorm
   4923	short		ssincossnan - tbl_trans	# $32-4 fsincos snan
   4924	short		tbl_trans - tbl_trans	# $32-6 fsincos unnorm
   4925	short		tbl_trans - tbl_trans	# $32-7 ERROR
   4926
   4927	short		ssincos	 - tbl_trans	# $33-0 fsincos norm
   4928	short		ssincosz - tbl_trans	# $33-1 fsincos zero
   4929	short		ssincosi - tbl_trans	# $33-2 fsincos inf
   4930	short		ssincosqnan - tbl_trans	# $33-3 fsincos qnan
   4931	short		ssincosd - tbl_trans	# $33-5 fsincos denorm
   4932	short		ssincossnan - tbl_trans	# $33-4 fsincos snan
   4933	short		tbl_trans - tbl_trans	# $33-6 fsincos unnorm
   4934	short		tbl_trans - tbl_trans	# $33-7 ERROR
   4935
   4936	short		ssincos	 - tbl_trans	# $34-0 fsincos norm
   4937	short		ssincosz - tbl_trans	# $34-1 fsincos zero
   4938	short		ssincosi - tbl_trans	# $34-2 fsincos inf
   4939	short		ssincosqnan - tbl_trans	# $34-3 fsincos qnan
   4940	short		ssincosd - tbl_trans	# $34-5 fsincos denorm
   4941	short		ssincossnan - tbl_trans	# $34-4 fsincos snan
   4942	short		tbl_trans - tbl_trans	# $34-6 fsincos unnorm
   4943	short		tbl_trans - tbl_trans	# $34-7 ERROR
   4944
   4945	short		ssincos	 - tbl_trans	# $35-0 fsincos norm
   4946	short		ssincosz - tbl_trans	# $35-1 fsincos zero
   4947	short		ssincosi - tbl_trans	# $35-2 fsincos inf
   4948	short		ssincosqnan - tbl_trans	# $35-3 fsincos qnan
   4949	short		ssincosd - tbl_trans	# $35-5 fsincos denorm
   4950	short		ssincossnan - tbl_trans	# $35-4 fsincos snan
   4951	short		tbl_trans - tbl_trans	# $35-6 fsincos unnorm
   4952	short		tbl_trans - tbl_trans	# $35-7 ERROR
   4953
   4954	short		ssincos	 - tbl_trans	# $36-0 fsincos norm
   4955	short		ssincosz - tbl_trans	# $36-1 fsincos zero
   4956	short		ssincosi - tbl_trans	# $36-2 fsincos inf
   4957	short		ssincosqnan - tbl_trans	# $36-3 fsincos qnan
   4958	short		ssincosd - tbl_trans	# $36-5 fsincos denorm
   4959	short		ssincossnan - tbl_trans	# $36-4 fsincos snan
   4960	short		tbl_trans - tbl_trans	# $36-6 fsincos unnorm
   4961	short		tbl_trans - tbl_trans	# $36-7 ERROR
   4962
   4963	short		ssincos	 - tbl_trans	# $37-0 fsincos norm
   4964	short		ssincosz - tbl_trans	# $37-1 fsincos zero
   4965	short		ssincosi - tbl_trans	# $37-2 fsincos inf
   4966	short		ssincosqnan - tbl_trans	# $37-3 fsincos qnan
   4967	short		ssincosd - tbl_trans	# $37-5 fsincos denorm
   4968	short		ssincossnan - tbl_trans	# $37-4 fsincos snan
   4969	short		tbl_trans - tbl_trans	# $37-6 fsincos unnorm
   4970	short		tbl_trans - tbl_trans	# $37-7 ERROR
   4971
   4972##########
   4973
   4974# the instruction fetch access for the displacement word for the
   4975# fdbcc emulation failed. here, we create an access error frame
   4976# from the current frame and branch to _real_access().
   4977funimp_iacc:
   4978	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
   4979	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
   4980	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
   4981
   4982	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
   4983
   4984	unlk		%a6
   4985
   4986	mov.l		(%sp),-(%sp)		# store SR,hi(PC)
   4987	mov.w		0x8(%sp),0x4(%sp)	# store lo(PC)
   4988	mov.w		&0x4008,0x6(%sp)	# store voff
   4989	mov.l		0x2(%sp),0x8(%sp)	# store EA
   4990	mov.l		&0x09428001,0xc(%sp)	# store FSLW
   4991
   4992	btst		&0x5,(%sp)		# user or supervisor mode?
   4993	beq.b		funimp_iacc_end		# user
   4994	bset		&0x2,0xd(%sp)		# set supervisor TM bit
   4995
   4996funimp_iacc_end:
   4997	bra.l		_real_access
   4998
   4999#########################################################################
   5000# ssin():     computes the sine of a normalized input			#
   5001# ssind():    computes the sine of a denormalized input			#
   5002# scos():     computes the cosine of a normalized input			#
   5003# scosd():    computes the cosine of a denormalized input		#
   5004# ssincos():  computes the sine and cosine of a normalized input	#
   5005# ssincosd(): computes the sine and cosine of a denormalized input	#
   5006#									#
   5007# INPUT *************************************************************** #
   5008#	a0 = pointer to extended precision input			#
   5009#	d0 = round precision,mode					#
   5010#									#
   5011# OUTPUT ************************************************************** #
   5012#	fp0 = sin(X) or cos(X)						#
   5013#									#
   5014#    For ssincos(X):							#
   5015#	fp0 = sin(X)							#
   5016#	fp1 = cos(X)							#
   5017#									#
   5018# ACCURACY and MONOTONICITY ******************************************* #
   5019#	The returned result is within 1 ulp in 64 significant bit, i.e.	#
   5020#	within 0.5001 ulp to 53 bits if the result is subsequently	#
   5021#	rounded to double precision. The result is provably monotonic	#
   5022#	in double precision.						#
   5023#									#
   5024# ALGORITHM ***********************************************************	#
   5025#									#
   5026#	SIN and COS:							#
   5027#	1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.	#
   5028#									#
   5029#	2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.			#
   5030#									#
   5031#	3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
   5032#		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
   5033#		Overwrite k by k := k + AdjN.				#
   5034#									#
   5035#	4. If k is even, go to 6.					#
   5036#									#
   5037#	5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j.			#
   5038#		Return sgn*cos(r) where cos(r) is approximated by an	#
   5039#		even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)),	#
   5040#		s = r*r.						#
   5041#		Exit.							#
   5042#									#
   5043#	6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)	#
   5044#		where sin(r) is approximated by an odd polynomial in r	#
   5045#		r + r*s*(A1+s*(A2+ ... + s*A7)),	s = r*r.	#
   5046#		Exit.							#
   5047#									#
   5048#	7. If |X| > 1, go to 9.						#
   5049#									#
   5050#	8. (|X|<2**(-40)) If SIN is invoked, return X;			#
   5051#		otherwise return 1.					#
   5052#									#
   5053#	9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,		#
   5054#		go back to 3.						#
   5055#									#
   5056#	SINCOS:								#
   5057#	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
   5058#									#
   5059#	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
   5060#		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
   5061#									#
   5062#	3. If k is even, go to 5.					#
   5063#									#
   5064#	4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie.	#
   5065#		j1 exclusive or with the l.s.b. of k.			#
   5066#		sgn1 := (-1)**j1, sgn2 := (-1)**j2.			#
   5067#		SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where	#
   5068#		sin(r) and cos(r) are computed as odd and even		#
   5069#		polynomials in r, respectively. Exit			#
   5070#									#
   5071#	5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.			#
   5072#		SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where	#
   5073#		sin(r) and cos(r) are computed as odd and even		#
   5074#		polynomials in r, respectively. Exit			#
   5075#									#
   5076#	6. If |X| > 1, go to 8.						#
   5077#									#
   5078#	7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.		#
   5079#									#
   5080#	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,		#
   5081#		go back to 2.						#
   5082#									#
   5083#########################################################################
   5084
   5085SINA7:	long		0xBD6AAA77,0xCCC994F5
   5086SINA6:	long		0x3DE61209,0x7AAE8DA1
   5087SINA5:	long		0xBE5AE645,0x2A118AE4
   5088SINA4:	long		0x3EC71DE3,0xA5341531
   5089SINA3:	long		0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
   5090SINA2:	long		0x3FF80000,0x88888888,0x888859AF,0x00000000
   5091SINA1:	long		0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
   5092
   5093COSB8:	long		0x3D2AC4D0,0xD6011EE3
   5094COSB7:	long		0xBDA9396F,0x9F45AC19
   5095COSB6:	long		0x3E21EED9,0x0612C972
   5096COSB5:	long		0xBE927E4F,0xB79D9FCF
   5097COSB4:	long		0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
   5098COSB3:	long		0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
   5099COSB2:	long		0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
   5100COSB1:	long		0xBF000000
   5101
   5102	set		INARG,FP_SCR0
   5103
   5104	set		X,FP_SCR0
   5105#	set		XDCARE,X+2
   5106	set		XFRAC,X+4
   5107
   5108	set		RPRIME,FP_SCR0
   5109	set		SPRIME,FP_SCR1
   5110
   5111	set		POSNEG1,L_SCR1
   5112	set		TWOTO63,L_SCR1
   5113
   5114	set		ENDFLAG,L_SCR2
   5115	set		INT,L_SCR2
   5116
   5117	set		ADJN,L_SCR3
   5118
   5119############################################
   5120	global		ssin
   5121ssin:
   5122	mov.l		&0,ADJN(%a6)		# yes; SET ADJN TO 0
   5123	bra.b		SINBGN
   5124
   5125############################################
   5126	global		scos
   5127scos:
   5128	mov.l		&1,ADJN(%a6)		# yes; SET ADJN TO 1
   5129
   5130############################################
   5131SINBGN:
   5132#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
   5133
   5134	fmov.x		(%a0),%fp0		# LOAD INPUT
   5135	fmov.x		%fp0,X(%a6)		# save input at X
   5136
   5137# "COMPACTIFY" X
   5138	mov.l		(%a0),%d1		# put exp in hi word
   5139	mov.w		4(%a0),%d1		# fetch hi(man)
   5140	and.l		&0x7FFFFFFF,%d1		# strip sign
   5141
   5142	cmpi.l		%d1,&0x3FD78000		# is |X| >= 2**(-40)?
   5143	bge.b		SOK1			# no
   5144	bra.w		SINSM			# yes; input is very small
   5145
   5146SOK1:
   5147	cmp.l		%d1,&0x4004BC7E		# is |X| < 15 PI?
   5148	blt.b		SINMAIN			# no
   5149	bra.w		SREDUCEX		# yes; input is very large
   5150
   5151#--THIS IS THE USUAL CASE, |X| <= 15 PI.
   5152#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
   5153SINMAIN:
   5154	fmov.x		%fp0,%fp1
   5155	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
   5156
   5157	lea		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
   5158
   5159	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
   5160
   5161	mov.l		INT(%a6),%d1		# make a copy of N
   5162	asl.l		&4,%d1			# N *= 16
   5163	add.l		%d1,%a1			# tbl_addr = a1 + (N*16)
   5164
   5165# A1 IS THE ADDRESS OF N*PIBY2
   5166# ...WHICH IS IN TWO PIECES Y1 & Y2
   5167	fsub.x		(%a1)+,%fp0		# X-Y1
   5168	fsub.s		(%a1),%fp0		# fp0 = R = (X-Y1)-Y2
   5169
   5170SINCONT:
   5171#--continuation from REDUCEX
   5172
   5173#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
   5174	mov.l		INT(%a6),%d1
   5175	add.l		ADJN(%a6),%d1		# SEE IF D0 IS ODD OR EVEN
   5176	ror.l		&1,%d1			# D0 WAS ODD IFF D0 IS NEGATIVE
   5177	cmp.l		%d1,&0
   5178	blt.w		COSPOLY
   5179
   5180#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
   5181#--THEN WE RETURN	SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
   5182#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
   5183#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
   5184#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
   5185#--WHERE T=S*S.
   5186#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
   5187#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
   5188SINPOLY:
   5189	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
   5190
   5191	fmov.x		%fp0,X(%a6)		# X IS R
   5192	fmul.x		%fp0,%fp0		# FP0 IS S
   5193
   5194	fmov.d		SINA7(%pc),%fp3
   5195	fmov.d		SINA6(%pc),%fp2
   5196
   5197	fmov.x		%fp0,%fp1
   5198	fmul.x		%fp1,%fp1		# FP1 IS T
   5199
   5200	ror.l		&1,%d1
   5201	and.l		&0x80000000,%d1
   5202# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
   5203	eor.l		%d1,X(%a6)		# X IS NOW R'= SGN*R
   5204
   5205	fmul.x		%fp1,%fp3		# TA7
   5206	fmul.x		%fp1,%fp2		# TA6
   5207
   5208	fadd.d		SINA5(%pc),%fp3		# A5+TA7
   5209	fadd.d		SINA4(%pc),%fp2		# A4+TA6
   5210
   5211	fmul.x		%fp1,%fp3		# T(A5+TA7)
   5212	fmul.x		%fp1,%fp2		# T(A4+TA6)
   5213
   5214	fadd.d		SINA3(%pc),%fp3		# A3+T(A5+TA7)
   5215	fadd.x		SINA2(%pc),%fp2		# A2+T(A4+TA6)
   5216
   5217	fmul.x		%fp3,%fp1		# T(A3+T(A5+TA7))
   5218
   5219	fmul.x		%fp0,%fp2		# S(A2+T(A4+TA6))
   5220	fadd.x		SINA1(%pc),%fp1		# A1+T(A3+T(A5+TA7))
   5221	fmul.x		X(%a6),%fp0		# R'*S
   5222
   5223	fadd.x		%fp2,%fp1		# [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
   5224
   5225	fmul.x		%fp1,%fp0		# SIN(R')-R'
   5226
   5227	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
   5228
   5229	fmov.l		%d0,%fpcr		# restore users round mode,prec
   5230	fadd.x		X(%a6),%fp0		# last inst - possible exception set
   5231	bra		t_inx2
   5232
   5233#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
   5234#--THEN WE RETURN	SGN*COS(R). SGN*COS(R) IS COMPUTED BY
   5235#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
   5236#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
   5237#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
   5238#--WHERE T=S*S.
   5239#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
   5240#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
   5241#--AND IS THEREFORE STORED AS SINGLE PRECISION.
   5242COSPOLY:
   5243	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
   5244
   5245	fmul.x		%fp0,%fp0		# FP0 IS S
   5246
   5247	fmov.d		COSB8(%pc),%fp2
   5248	fmov.d		COSB7(%pc),%fp3
   5249
   5250	fmov.x		%fp0,%fp1
   5251	fmul.x		%fp1,%fp1		# FP1 IS T
   5252
   5253	fmov.x		%fp0,X(%a6)		# X IS S
   5254	ror.l		&1,%d1
   5255	and.l		&0x80000000,%d1
   5256# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
   5257
   5258	fmul.x		%fp1,%fp2		# TB8
   5259
   5260	eor.l		%d1,X(%a6)		# X IS NOW S'= SGN*S
   5261	and.l		&0x80000000,%d1
   5262
   5263	fmul.x		%fp1,%fp3		# TB7
   5264
   5265	or.l		&0x3F800000,%d1		# D0 IS SGN IN SINGLE
   5266	mov.l		%d1,POSNEG1(%a6)
   5267
   5268	fadd.d		COSB6(%pc),%fp2		# B6+TB8
   5269	fadd.d		COSB5(%pc),%fp3		# B5+TB7
   5270
   5271	fmul.x		%fp1,%fp2		# T(B6+TB8)
   5272	fmul.x		%fp1,%fp3		# T(B5+TB7)
   5273
   5274	fadd.d		COSB4(%pc),%fp2		# B4+T(B6+TB8)
   5275	fadd.x		COSB3(%pc),%fp3		# B3+T(B5+TB7)
   5276
   5277	fmul.x		%fp1,%fp2		# T(B4+T(B6+TB8))
   5278	fmul.x		%fp3,%fp1		# T(B3+T(B5+TB7))
   5279
   5280	fadd.x		COSB2(%pc),%fp2		# B2+T(B4+T(B6+TB8))
   5281	fadd.s		COSB1(%pc),%fp1		# B1+T(B3+T(B5+TB7))
   5282
   5283	fmul.x		%fp2,%fp0		# S(B2+T(B4+T(B6+TB8)))
   5284
   5285	fadd.x		%fp1,%fp0
   5286
   5287	fmul.x		X(%a6),%fp0
   5288
   5289	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
   5290
   5291	fmov.l		%d0,%fpcr		# restore users round mode,prec
   5292	fadd.s		POSNEG1(%a6),%fp0	# last inst - possible exception set
   5293	bra		t_inx2
   5294
   5295##############################################
   5296
   5297# SINe: Big OR Small?
   5298#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
   5299#--IF |X| < 2**(-40), RETURN X OR 1.
   5300SINBORS:
   5301	cmp.l		%d1,&0x3FFF8000
   5302	bgt.l		SREDUCEX
   5303
   5304SINSM:
   5305	mov.l		ADJN(%a6),%d1
   5306	cmp.l		%d1,&0
   5307	bgt.b		COSTINY
   5308
   5309# here, the operation may underflow iff the precision is sgl or dbl.
   5310# extended denorms are handled through another entry point.
   5311SINTINY:
   5312#	mov.w		&0x0000,XDCARE(%a6)	# JUST IN CASE
   5313
   5314	fmov.l		%d0,%fpcr		# restore users round mode,prec
   5315	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   5316	fmov.x		X(%a6),%fp0		# last inst - possible exception set
   5317	bra		t_catch
   5318
   5319COSTINY:
   5320	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
   5321	fmov.l		%d0,%fpcr		# restore users round mode,prec
   5322	fadd.s		&0x80800000,%fp0	# last inst - possible exception set
   5323	bra		t_pinx2
   5324
   5325################################################
   5326	global		ssind
   5327#--SIN(X) = X FOR DENORMALIZED X
   5328ssind:
   5329	bra		t_extdnrm
   5330
   5331############################################
   5332	global		scosd
   5333#--COS(X) = 1 FOR DENORMALIZED X
   5334scosd:
   5335	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
   5336	bra		t_pinx2
   5337
   5338##################################################
   5339
   5340	global		ssincos
   5341ssincos:
   5342#--SET ADJN TO 4
   5343	mov.l		&4,ADJN(%a6)
   5344
   5345	fmov.x		(%a0),%fp0		# LOAD INPUT
   5346	fmov.x		%fp0,X(%a6)
   5347
   5348	mov.l		(%a0),%d1
   5349	mov.w		4(%a0),%d1
   5350	and.l		&0x7FFFFFFF,%d1		# COMPACTIFY X
   5351
   5352	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
   5353	bge.b		SCOK1
   5354	bra.w		SCSM
   5355
   5356SCOK1:
   5357	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
   5358	blt.b		SCMAIN
   5359	bra.w		SREDUCEX
   5360
   5361
   5362#--THIS IS THE USUAL CASE, |X| <= 15 PI.
   5363#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
   5364SCMAIN:
   5365	fmov.x		%fp0,%fp1
   5366
   5367	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
   5368
   5369	lea		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
   5370
   5371	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
   5372
   5373	mov.l		INT(%a6),%d1
   5374	asl.l		&4,%d1
   5375	add.l		%d1,%a1			# ADDRESS OF N*PIBY2, IN Y1, Y2
   5376
   5377	fsub.x		(%a1)+,%fp0		# X-Y1
   5378	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
   5379
   5380SCCONT:
   5381#--continuation point from REDUCEX
   5382
   5383	mov.l		INT(%a6),%d1
   5384	ror.l		&1,%d1
   5385	cmp.l		%d1,&0			# D0 < 0 IFF N IS ODD
   5386	bge.w		NEVEN
   5387
   5388SNODD:
   5389#--REGISTERS SAVED SO FAR: D0, A0, FP2.
   5390	fmovm.x		&0x04,-(%sp)		# save fp2
   5391
   5392	fmov.x		%fp0,RPRIME(%a6)
   5393	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
   5394	fmov.d		SINA7(%pc),%fp1		# A7
   5395	fmov.d		COSB8(%pc),%fp2		# B8
   5396	fmul.x		%fp0,%fp1		# SA7
   5397	fmul.x		%fp0,%fp2		# SB8
   5398
   5399	mov.l		%d2,-(%sp)
   5400	mov.l		%d1,%d2
   5401	ror.l		&1,%d2
   5402	and.l		&0x80000000,%d2
   5403	eor.l		%d1,%d2
   5404	and.l		&0x80000000,%d2
   5405
   5406	fadd.d		SINA6(%pc),%fp1		# A6+SA7
   5407	fadd.d		COSB7(%pc),%fp2		# B7+SB8
   5408
   5409	fmul.x		%fp0,%fp1		# S(A6+SA7)
   5410	eor.l		%d2,RPRIME(%a6)
   5411	mov.l		(%sp)+,%d2
   5412	fmul.x		%fp0,%fp2		# S(B7+SB8)
   5413	ror.l		&1,%d1
   5414	and.l		&0x80000000,%d1
   5415	mov.l		&0x3F800000,POSNEG1(%a6)
   5416	eor.l		%d1,POSNEG1(%a6)
   5417
   5418	fadd.d		SINA5(%pc),%fp1		# A5+S(A6+SA7)
   5419	fadd.d		COSB6(%pc),%fp2		# B6+S(B7+SB8)
   5420
   5421	fmul.x		%fp0,%fp1		# S(A5+S(A6+SA7))
   5422	fmul.x		%fp0,%fp2		# S(B6+S(B7+SB8))
   5423	fmov.x		%fp0,SPRIME(%a6)
   5424
   5425	fadd.d		SINA4(%pc),%fp1		# A4+S(A5+S(A6+SA7))
   5426	eor.l		%d1,SPRIME(%a6)
   5427	fadd.d		COSB5(%pc),%fp2		# B5+S(B6+S(B7+SB8))
   5428
   5429	fmul.x		%fp0,%fp1		# S(A4+...)
   5430	fmul.x		%fp0,%fp2		# S(B5+...)
   5431
   5432	fadd.d		SINA3(%pc),%fp1		# A3+S(A4+...)
   5433	fadd.d		COSB4(%pc),%fp2		# B4+S(B5+...)
   5434
   5435	fmul.x		%fp0,%fp1		# S(A3+...)
   5436	fmul.x		%fp0,%fp2		# S(B4+...)
   5437
   5438	fadd.x		SINA2(%pc),%fp1		# A2+S(A3+...)
   5439	fadd.x		COSB3(%pc),%fp2		# B3+S(B4+...)
   5440
   5441	fmul.x		%fp0,%fp1		# S(A2+...)
   5442	fmul.x		%fp0,%fp2		# S(B3+...)
   5443
   5444	fadd.x		SINA1(%pc),%fp1		# A1+S(A2+...)
   5445	fadd.x		COSB2(%pc),%fp2		# B2+S(B3+...)
   5446
   5447	fmul.x		%fp0,%fp1		# S(A1+...)
   5448	fmul.x		%fp2,%fp0		# S(B2+...)
   5449
   5450	fmul.x		RPRIME(%a6),%fp1	# R'S(A1+...)
   5451	fadd.s		COSB1(%pc),%fp0		# B1+S(B2...)
   5452	fmul.x		SPRIME(%a6),%fp0	# S'(B1+S(B2+...))
   5453
   5454	fmovm.x		(%sp)+,&0x20		# restore fp2
   5455
   5456	fmov.l		%d0,%fpcr
   5457	fadd.x		RPRIME(%a6),%fp1	# COS(X)
   5458	bsr		sto_cos			# store cosine result
   5459	fadd.s		POSNEG1(%a6),%fp0	# SIN(X)
   5460	bra		t_inx2
   5461
   5462NEVEN:
   5463#--REGISTERS SAVED SO FAR: FP2.
   5464	fmovm.x		&0x04,-(%sp)		# save fp2
   5465
   5466	fmov.x		%fp0,RPRIME(%a6)
   5467	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
   5468
   5469	fmov.d		COSB8(%pc),%fp1		# B8
   5470	fmov.d		SINA7(%pc),%fp2		# A7
   5471
   5472	fmul.x		%fp0,%fp1		# SB8
   5473	fmov.x		%fp0,SPRIME(%a6)
   5474	fmul.x		%fp0,%fp2		# SA7
   5475
   5476	ror.l		&1,%d1
   5477	and.l		&0x80000000,%d1
   5478
   5479	fadd.d		COSB7(%pc),%fp1		# B7+SB8
   5480	fadd.d		SINA6(%pc),%fp2		# A6+SA7
   5481
   5482	eor.l		%d1,RPRIME(%a6)
   5483	eor.l		%d1,SPRIME(%a6)
   5484
   5485	fmul.x		%fp0,%fp1		# S(B7+SB8)
   5486
   5487	or.l		&0x3F800000,%d1
   5488	mov.l		%d1,POSNEG1(%a6)
   5489
   5490	fmul.x		%fp0,%fp2		# S(A6+SA7)
   5491
   5492	fadd.d		COSB6(%pc),%fp1		# B6+S(B7+SB8)
   5493	fadd.d		SINA5(%pc),%fp2		# A5+S(A6+SA7)
   5494
   5495	fmul.x		%fp0,%fp1		# S(B6+S(B7+SB8))
   5496	fmul.x		%fp0,%fp2		# S(A5+S(A6+SA7))
   5497
   5498	fadd.d		COSB5(%pc),%fp1		# B5+S(B6+S(B7+SB8))
   5499	fadd.d		SINA4(%pc),%fp2		# A4+S(A5+S(A6+SA7))
   5500
   5501	fmul.x		%fp0,%fp1		# S(B5+...)
   5502	fmul.x		%fp0,%fp2		# S(A4+...)
   5503
   5504	fadd.d		COSB4(%pc),%fp1		# B4+S(B5+...)
   5505	fadd.d		SINA3(%pc),%fp2		# A3+S(A4+...)
   5506
   5507	fmul.x		%fp0,%fp1		# S(B4+...)
   5508	fmul.x		%fp0,%fp2		# S(A3+...)
   5509
   5510	fadd.x		COSB3(%pc),%fp1		# B3+S(B4+...)
   5511	fadd.x		SINA2(%pc),%fp2		# A2+S(A3+...)
   5512
   5513	fmul.x		%fp0,%fp1		# S(B3+...)
   5514	fmul.x		%fp0,%fp2		# S(A2+...)
   5515
   5516	fadd.x		COSB2(%pc),%fp1		# B2+S(B3+...)
   5517	fadd.x		SINA1(%pc),%fp2		# A1+S(A2+...)
   5518
   5519	fmul.x		%fp0,%fp1		# S(B2+...)
   5520	fmul.x		%fp2,%fp0		# s(a1+...)
   5521
   5522
   5523	fadd.s		COSB1(%pc),%fp1		# B1+S(B2...)
   5524	fmul.x		RPRIME(%a6),%fp0	# R'S(A1+...)
   5525	fmul.x		SPRIME(%a6),%fp1	# S'(B1+S(B2+...))
   5526
   5527	fmovm.x		(%sp)+,&0x20		# restore fp2
   5528
   5529	fmov.l		%d0,%fpcr
   5530	fadd.s		POSNEG1(%a6),%fp1	# COS(X)
   5531	bsr		sto_cos			# store cosine result
   5532	fadd.x		RPRIME(%a6),%fp0	# SIN(X)
   5533	bra		t_inx2
   5534
   5535################################################
   5536
   5537SCBORS:
   5538	cmp.l		%d1,&0x3FFF8000
   5539	bgt.w		SREDUCEX
   5540
   5541################################################
   5542
   5543SCSM:
   5544#	mov.w		&0x0000,XDCARE(%a6)
   5545	fmov.s		&0x3F800000,%fp1
   5546
   5547	fmov.l		%d0,%fpcr
   5548	fsub.s		&0x00800000,%fp1
   5549	bsr		sto_cos			# store cosine result
   5550	fmov.l		%fpcr,%d0		# d0 must have fpcr,too
   5551	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   5552	fmov.x		X(%a6),%fp0
   5553	bra		t_catch
   5554
   5555##############################################
   5556
   5557	global		ssincosd
   5558#--SIN AND COS OF X FOR DENORMALIZED X
   5559ssincosd:
   5560	mov.l		%d0,-(%sp)		# save d0
   5561	fmov.s		&0x3F800000,%fp1
   5562	bsr		sto_cos			# store cosine result
   5563	mov.l		(%sp)+,%d0		# restore d0
   5564	bra		t_extdnrm
   5565
   5566############################################
   5567
   5568#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
   5569#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
   5570#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
   5571SREDUCEX:
   5572	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
   5573	mov.l		%d2,-(%sp)		# save d2
   5574	fmov.s		&0x00000000,%fp1	# fp1 = 0
   5575
   5576#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
   5577#--there is a danger of unwanted overflow in first LOOP iteration.  In this
   5578#--case, reduce argument by one remainder step to make subsequent reduction
   5579#--safe.
   5580	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
   5581	bne.b		SLOOP			# no
   5582
   5583# yes; create 2**16383*PI/2
   5584	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
   5585	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
   5586	clr.l		FP_SCR0_LO(%a6)
   5587
   5588# create low half of 2**16383*PI/2 at FP_SCR1
   5589	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
   5590	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
   5591	clr.l		FP_SCR1_LO(%a6)
   5592
   5593	ftest.x		%fp0			# test sign of argument
   5594	fblt.w		sred_neg
   5595
   5596	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
   5597	or.b		&0x80,FP_SCR1_EX(%a6)
   5598sred_neg:
   5599	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
   5600	fmov.x		%fp0,%fp1		# save high result in fp1
   5601	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
   5602	fsub.x		%fp0,%fp1		# determine low component of result
   5603	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
   5604
   5605#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
   5606#--integer quotient will be stored in N
   5607#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
   5608SLOOP:
   5609	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
   5610	mov.w		INARG(%a6),%d1
   5611	mov.l		%d1,%a1			# save a copy of D0
   5612	and.l		&0x00007FFF,%d1
   5613	sub.l		&0x00003FFF,%d1		# d0 = K
   5614	cmp.l		%d1,&28
   5615	ble.b		SLASTLOOP
   5616SCONTLOOP:
   5617	sub.l		&27,%d1			# d0 = L := K-27
   5618	mov.b		&0,ENDFLAG(%a6)
   5619	bra.b		SWORK
   5620SLASTLOOP:
   5621	clr.l		%d1			# d0 = L := 0
   5622	mov.b		&1,ENDFLAG(%a6)
   5623
   5624SWORK:
   5625#--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
   5626#--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
   5627
   5628#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
   5629#--2**L * (PIby2_1), 2**L * (PIby2_2)
   5630
   5631	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
   5632	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
   5633
   5634	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
   5635	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
   5636	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
   5637
   5638	fmov.x		%fp0,%fp2
   5639	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
   5640
   5641#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
   5642#--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
   5643#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
   5644#--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
   5645#--US THE DESIRED VALUE IN FLOATING POINT.
   5646	mov.l		%a1,%d2
   5647	swap		%d2
   5648	and.l		&0x80000000,%d2
   5649	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
   5650	mov.l		%d2,TWOTO63(%a6)
   5651	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
   5652	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
   5653#	fint.x		%fp2
   5654
   5655#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
   5656	mov.l		%d1,%d2			# d2 = L
   5657
   5658	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
   5659	mov.w		%d2,FP_SCR0_EX(%a6)
   5660	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
   5661	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
   5662
   5663	add.l		&0x00003FDD,%d1
   5664	mov.w		%d1,FP_SCR1_EX(%a6)
   5665	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
   5666	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
   5667
   5668	mov.b		ENDFLAG(%a6),%d1
   5669
   5670#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
   5671#--P2 = 2**(L) * Piby2_2
   5672	fmov.x		%fp2,%fp4		# fp4 = N
   5673	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
   5674	fmov.x		%fp2,%fp5		# fp5 = N
   5675	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
   5676	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
   5677
   5678#--we want P+p = W+w  but  |p| <= half ulp of P
   5679#--Then, we need to compute  A := R-P   and  a := r-p
   5680	fadd.x		%fp5,%fp3		# fp3 = P
   5681	fsub.x		%fp3,%fp4		# fp4 = W-P
   5682
   5683	fsub.x		%fp3,%fp0		# fp0 = A := R - P
   5684	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
   5685
   5686	fmov.x		%fp0,%fp3		# fp3 = A
   5687	fsub.x		%fp4,%fp1		# fp1 = a := r - p
   5688
   5689#--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
   5690#--|r| <= half ulp of R.
   5691	fadd.x		%fp1,%fp0		# fp0 = R := A+a
   5692#--No need to calculate r if this is the last loop
   5693	cmp.b		%d1,&0
   5694	bgt.w		SRESTORE
   5695
   5696#--Need to calculate r
   5697	fsub.x		%fp0,%fp3		# fp3 = A-R
   5698	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
   5699	bra.w		SLOOP
   5700
   5701SRESTORE:
   5702	fmov.l		%fp2,INT(%a6)
   5703	mov.l		(%sp)+,%d2		# restore d2
   5704	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
   5705
   5706	mov.l		ADJN(%a6),%d1
   5707	cmp.l		%d1,&4
   5708
   5709	blt.w		SINCONT
   5710	bra.w		SCCONT
   5711
   5712#########################################################################
   5713# stan():  computes the tangent of a normalized input			#
   5714# stand(): computes the tangent of a denormalized input			#
   5715#									#
   5716# INPUT *************************************************************** #
   5717#	a0 = pointer to extended precision input			#
   5718#	d0 = round precision,mode					#
   5719#									#
   5720# OUTPUT ************************************************************** #
   5721#	fp0 = tan(X)							#
   5722#									#
   5723# ACCURACY and MONOTONICITY ******************************************* #
   5724#	The returned result is within 3 ulp in 64 significant bit, i.e. #
   5725#	within 0.5001 ulp to 53 bits if the result is subsequently	#
   5726#	rounded to double precision. The result is provably monotonic	#
   5727#	in double precision.						#
   5728#									#
   5729# ALGORITHM *********************************************************** #
   5730#									#
   5731#	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
   5732#									#
   5733#	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
   5734#		k = N mod 2, so in particular, k = 0 or 1.		#
   5735#									#
   5736#	3. If k is odd, go to 5.					#
   5737#									#
   5738#	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a	#
   5739#		rational function U/V where				#
   5740#		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
   5741#		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.	#
   5742#		Exit.							#
   5743#									#
   5744#	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
   5745#		a rational function U/V where				#
   5746#		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
   5747#		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,	#
   5748#		-Cot(r) = -V/U. Exit.					#
   5749#									#
   5750#	6. If |X| > 1, go to 8.						#
   5751#									#
   5752#	7. (|X|<2**(-40)) Tan(X) = X. Exit.				#
   5753#									#
   5754#	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back	#
   5755#		to 2.							#
   5756#									#
   5757#########################################################################
   5758
   5759TANQ4:
   5760	long		0x3EA0B759,0xF50F8688
   5761TANP3:
   5762	long		0xBEF2BAA5,0xA8924F04
   5763
   5764TANQ3:
   5765	long		0xBF346F59,0xB39BA65F,0x00000000,0x00000000
   5766
   5767TANP2:
   5768	long		0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
   5769
   5770TANQ2:
   5771	long		0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
   5772
   5773TANP1:
   5774	long		0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
   5775
   5776TANQ1:
   5777	long		0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
   5778
   5779INVTWOPI:
   5780	long		0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
   5781
   5782TWOPI1:
   5783	long		0x40010000,0xC90FDAA2,0x00000000,0x00000000
   5784TWOPI2:
   5785	long		0x3FDF0000,0x85A308D4,0x00000000,0x00000000
   5786
   5787#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
   5788#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
   5789#--MOST 69 BITS LONG.
   5790#	global		PITBL
   5791PITBL:
   5792	long		0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
   5793	long		0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
   5794	long		0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
   5795	long		0xC0040000,0xB6365E22,0xEE46F000,0x21480000
   5796	long		0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
   5797	long		0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
   5798	long		0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
   5799	long		0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
   5800	long		0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
   5801	long		0xC0040000,0x90836524,0x88034B96,0x20B00000
   5802	long		0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
   5803	long		0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
   5804	long		0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
   5805	long		0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
   5806	long		0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
   5807	long		0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
   5808	long		0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
   5809	long		0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
   5810	long		0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
   5811	long		0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
   5812	long		0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
   5813	long		0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
   5814	long		0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
   5815	long		0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
   5816	long		0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
   5817	long		0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
   5818	long		0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
   5819	long		0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
   5820	long		0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
   5821	long		0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
   5822	long		0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
   5823	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
   5824	long		0x00000000,0x00000000,0x00000000,0x00000000
   5825	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
   5826	long		0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
   5827	long		0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
   5828	long		0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
   5829	long		0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
   5830	long		0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
   5831	long		0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
   5832	long		0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
   5833	long		0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
   5834	long		0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
   5835	long		0x40030000,0x8A3AE64F,0x76F80584,0x21080000
   5836	long		0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
   5837	long		0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
   5838	long		0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
   5839	long		0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
   5840	long		0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
   5841	long		0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
   5842	long		0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
   5843	long		0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
   5844	long		0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
   5845	long		0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
   5846	long		0x40040000,0x8A3AE64F,0x76F80584,0x21880000
   5847	long		0x40040000,0x90836524,0x88034B96,0xA0B00000
   5848	long		0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
   5849	long		0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
   5850	long		0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
   5851	long		0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
   5852	long		0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
   5853	long		0x40040000,0xB6365E22,0xEE46F000,0xA1480000
   5854	long		0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
   5855	long		0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
   5856	long		0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
   5857
   5858	set		INARG,FP_SCR0
   5859
   5860	set		TWOTO63,L_SCR1
   5861	set		INT,L_SCR1
   5862	set		ENDFLAG,L_SCR2
   5863
   5864	global		stan
   5865stan:
   5866	fmov.x		(%a0),%fp0		# LOAD INPUT
   5867
   5868	mov.l		(%a0),%d1
   5869	mov.w		4(%a0),%d1
   5870	and.l		&0x7FFFFFFF,%d1
   5871
   5872	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
   5873	bge.b		TANOK1
   5874	bra.w		TANSM
   5875TANOK1:
   5876	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
   5877	blt.b		TANMAIN
   5878	bra.w		REDUCEX
   5879
   5880TANMAIN:
   5881#--THIS IS THE USUAL CASE, |X| <= 15 PI.
   5882#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
   5883	fmov.x		%fp0,%fp1
   5884	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
   5885
   5886	lea.l		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
   5887
   5888	fmov.l		%fp1,%d1		# CONVERT TO INTEGER
   5889
   5890	asl.l		&4,%d1
   5891	add.l		%d1,%a1			# ADDRESS N*PIBY2 IN Y1, Y2
   5892
   5893	fsub.x		(%a1)+,%fp0		# X-Y1
   5894
   5895	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
   5896
   5897	ror.l		&5,%d1
   5898	and.l		&0x80000000,%d1		# D0 WAS ODD IFF D0 < 0
   5899
   5900TANCONT:
   5901	fmovm.x		&0x0c,-(%sp)		# save fp2,fp3
   5902
   5903	cmp.l		%d1,&0
   5904	blt.w		NODD
   5905
   5906	fmov.x		%fp0,%fp1
   5907	fmul.x		%fp1,%fp1		# S = R*R
   5908
   5909	fmov.d		TANQ4(%pc),%fp3
   5910	fmov.d		TANP3(%pc),%fp2
   5911
   5912	fmul.x		%fp1,%fp3		# SQ4
   5913	fmul.x		%fp1,%fp2		# SP3
   5914
   5915	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
   5916	fadd.x		TANP2(%pc),%fp2		# P2+SP3
   5917
   5918	fmul.x		%fp1,%fp3		# S(Q3+SQ4)
   5919	fmul.x		%fp1,%fp2		# S(P2+SP3)
   5920
   5921	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
   5922	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
   5923
   5924	fmul.x		%fp1,%fp3		# S(Q2+S(Q3+SQ4))
   5925	fmul.x		%fp1,%fp2		# S(P1+S(P2+SP3))
   5926
   5927	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
   5928	fmul.x		%fp0,%fp2		# RS(P1+S(P2+SP3))
   5929
   5930	fmul.x		%fp3,%fp1		# S(Q1+S(Q2+S(Q3+SQ4)))
   5931
   5932	fadd.x		%fp2,%fp0		# R+RS(P1+S(P2+SP3))
   5933
   5934	fadd.s		&0x3F800000,%fp1	# 1+S(Q1+...)
   5935
   5936	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
   5937
   5938	fmov.l		%d0,%fpcr		# restore users round mode,prec
   5939	fdiv.x		%fp1,%fp0		# last inst - possible exception set
   5940	bra		t_inx2
   5941
   5942NODD:
   5943	fmov.x		%fp0,%fp1
   5944	fmul.x		%fp0,%fp0		# S = R*R
   5945
   5946	fmov.d		TANQ4(%pc),%fp3
   5947	fmov.d		TANP3(%pc),%fp2
   5948
   5949	fmul.x		%fp0,%fp3		# SQ4
   5950	fmul.x		%fp0,%fp2		# SP3
   5951
   5952	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
   5953	fadd.x		TANP2(%pc),%fp2		# P2+SP3
   5954
   5955	fmul.x		%fp0,%fp3		# S(Q3+SQ4)
   5956	fmul.x		%fp0,%fp2		# S(P2+SP3)
   5957
   5958	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
   5959	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
   5960
   5961	fmul.x		%fp0,%fp3		# S(Q2+S(Q3+SQ4))
   5962	fmul.x		%fp0,%fp2		# S(P1+S(P2+SP3))
   5963
   5964	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
   5965	fmul.x		%fp1,%fp2		# RS(P1+S(P2+SP3))
   5966
   5967	fmul.x		%fp3,%fp0		# S(Q1+S(Q2+S(Q3+SQ4)))
   5968
   5969	fadd.x		%fp2,%fp1		# R+RS(P1+S(P2+SP3))
   5970	fadd.s		&0x3F800000,%fp0	# 1+S(Q1+...)
   5971
   5972	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
   5973
   5974	fmov.x		%fp1,-(%sp)
   5975	eor.l		&0x80000000,(%sp)
   5976
   5977	fmov.l		%d0,%fpcr		# restore users round mode,prec
   5978	fdiv.x		(%sp)+,%fp0		# last inst - possible exception set
   5979	bra		t_inx2
   5980
   5981TANBORS:
   5982#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
   5983#--IF |X| < 2**(-40), RETURN X OR 1.
   5984	cmp.l		%d1,&0x3FFF8000
   5985	bgt.b		REDUCEX
   5986
   5987TANSM:
   5988	fmov.x		%fp0,-(%sp)
   5989	fmov.l		%d0,%fpcr		# restore users round mode,prec
   5990	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   5991	fmov.x		(%sp)+,%fp0		# last inst - posibble exception set
   5992	bra		t_catch
   5993
   5994	global		stand
   5995#--TAN(X) = X FOR DENORMALIZED X
   5996stand:
   5997	bra		t_extdnrm
   5998
   5999#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
   6000#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
   6001#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
   6002REDUCEX:
   6003	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
   6004	mov.l		%d2,-(%sp)		# save d2
   6005	fmov.s		&0x00000000,%fp1	# fp1 = 0
   6006
   6007#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
   6008#--there is a danger of unwanted overflow in first LOOP iteration.  In this
   6009#--case, reduce argument by one remainder step to make subsequent reduction
   6010#--safe.
   6011	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
   6012	bne.b		LOOP			# no
   6013
   6014# yes; create 2**16383*PI/2
   6015	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
   6016	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
   6017	clr.l		FP_SCR0_LO(%a6)
   6018
   6019# create low half of 2**16383*PI/2 at FP_SCR1
   6020	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
   6021	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
   6022	clr.l		FP_SCR1_LO(%a6)
   6023
   6024	ftest.x		%fp0			# test sign of argument
   6025	fblt.w		red_neg
   6026
   6027	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
   6028	or.b		&0x80,FP_SCR1_EX(%a6)
   6029red_neg:
   6030	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
   6031	fmov.x		%fp0,%fp1		# save high result in fp1
   6032	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
   6033	fsub.x		%fp0,%fp1		# determine low component of result
   6034	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
   6035
   6036#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
   6037#--integer quotient will be stored in N
   6038#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
   6039LOOP:
   6040	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
   6041	mov.w		INARG(%a6),%d1
   6042	mov.l		%d1,%a1			# save a copy of D0
   6043	and.l		&0x00007FFF,%d1
   6044	sub.l		&0x00003FFF,%d1		# d0 = K
   6045	cmp.l		%d1,&28
   6046	ble.b		LASTLOOP
   6047CONTLOOP:
   6048	sub.l		&27,%d1			# d0 = L := K-27
   6049	mov.b		&0,ENDFLAG(%a6)
   6050	bra.b		WORK
   6051LASTLOOP:
   6052	clr.l		%d1			# d0 = L := 0
   6053	mov.b		&1,ENDFLAG(%a6)
   6054
   6055WORK:
   6056#--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
   6057#--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
   6058
   6059#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
   6060#--2**L * (PIby2_1), 2**L * (PIby2_2)
   6061
   6062	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
   6063	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
   6064
   6065	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
   6066	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
   6067	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
   6068
   6069	fmov.x		%fp0,%fp2
   6070	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
   6071
   6072#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
   6073#--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
   6074#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
   6075#--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
   6076#--US THE DESIRED VALUE IN FLOATING POINT.
   6077	mov.l		%a1,%d2
   6078	swap		%d2
   6079	and.l		&0x80000000,%d2
   6080	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
   6081	mov.l		%d2,TWOTO63(%a6)
   6082	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
   6083	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
   6084#	fintrz.x	%fp2,%fp2
   6085
   6086#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
   6087	mov.l		%d1,%d2			# d2 = L
   6088
   6089	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
   6090	mov.w		%d2,FP_SCR0_EX(%a6)
   6091	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
   6092	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
   6093
   6094	add.l		&0x00003FDD,%d1
   6095	mov.w		%d1,FP_SCR1_EX(%a6)
   6096	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
   6097	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
   6098
   6099	mov.b		ENDFLAG(%a6),%d1
   6100
   6101#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
   6102#--P2 = 2**(L) * Piby2_2
   6103	fmov.x		%fp2,%fp4		# fp4 = N
   6104	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
   6105	fmov.x		%fp2,%fp5		# fp5 = N
   6106	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
   6107	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
   6108
   6109#--we want P+p = W+w  but  |p| <= half ulp of P
   6110#--Then, we need to compute  A := R-P   and  a := r-p
   6111	fadd.x		%fp5,%fp3		# fp3 = P
   6112	fsub.x		%fp3,%fp4		# fp4 = W-P
   6113
   6114	fsub.x		%fp3,%fp0		# fp0 = A := R - P
   6115	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
   6116
   6117	fmov.x		%fp0,%fp3		# fp3 = A
   6118	fsub.x		%fp4,%fp1		# fp1 = a := r - p
   6119
   6120#--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
   6121#--|r| <= half ulp of R.
   6122	fadd.x		%fp1,%fp0		# fp0 = R := A+a
   6123#--No need to calculate r if this is the last loop
   6124	cmp.b		%d1,&0
   6125	bgt.w		RESTORE
   6126
   6127#--Need to calculate r
   6128	fsub.x		%fp0,%fp3		# fp3 = A-R
   6129	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
   6130	bra.w		LOOP
   6131
   6132RESTORE:
   6133	fmov.l		%fp2,INT(%a6)
   6134	mov.l		(%sp)+,%d2		# restore d2
   6135	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
   6136
   6137	mov.l		INT(%a6),%d1
   6138	ror.l		&1,%d1
   6139
   6140	bra.w		TANCONT
   6141
   6142#########################################################################
   6143# satan():  computes the arctangent of a normalized number		#
   6144# satand(): computes the arctangent of a denormalized number		#
   6145#									#
   6146# INPUT	*************************************************************** #
   6147#	a0 = pointer to extended precision input			#
   6148#	d0 = round precision,mode					#
   6149#									#
   6150# OUTPUT ************************************************************** #
   6151#	fp0 = arctan(X)							#
   6152#									#
   6153# ACCURACY and MONOTONICITY ******************************************* #
   6154#	The returned result is within 2 ulps in	64 significant bit,	#
   6155#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
   6156#	rounded to double precision. The result is provably monotonic	#
   6157#	in double precision.						#
   6158#									#
   6159# ALGORITHM *********************************************************** #
   6160#	Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.		#
   6161#									#
   6162#	Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x.			#
   6163#		Note that k = -4, -3,..., or 3.				#
   6164#		Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5	#
   6165#		significant bits of X with a bit-1 attached at the 6-th	#
   6166#		bit position. Define u to be u = (X-F) / (1 + X*F).	#
   6167#									#
   6168#	Step 3. Approximate arctan(u) by a polynomial poly.		#
   6169#									#
   6170#	Step 4. Return arctan(F) + poly, arctan(F) is fetched from a	#
   6171#		table of values calculated beforehand. Exit.		#
   6172#									#
   6173#	Step 5. If |X| >= 16, go to Step 7.				#
   6174#									#
   6175#	Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.	#
   6176#									#
   6177#	Step 7. Define X' = -1/X. Approximate arctan(X') by an odd	#
   6178#		polynomial in X'.					#
   6179#		Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.		#
   6180#									#
   6181#########################################################################
   6182
   6183ATANA3:	long		0xBFF6687E,0x314987D8
   6184ATANA2:	long		0x4002AC69,0x34A26DB3
   6185ATANA1:	long		0xBFC2476F,0x4E1DA28E
   6186
   6187ATANB6:	long		0x3FB34444,0x7F876989
   6188ATANB5:	long		0xBFB744EE,0x7FAF45DB
   6189ATANB4:	long		0x3FBC71C6,0x46940220
   6190ATANB3:	long		0xBFC24924,0x921872F9
   6191ATANB2:	long		0x3FC99999,0x99998FA9
   6192ATANB1:	long		0xBFD55555,0x55555555
   6193
   6194ATANC5:	long		0xBFB70BF3,0x98539E6A
   6195ATANC4:	long		0x3FBC7187,0x962D1D7D
   6196ATANC3:	long		0xBFC24924,0x827107B8
   6197ATANC2:	long		0x3FC99999,0x9996263E
   6198ATANC1:	long		0xBFD55555,0x55555536
   6199
   6200PPIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
   6201NPIBY2:	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
   6202
   6203PTINY:	long		0x00010000,0x80000000,0x00000000,0x00000000
   6204NTINY:	long		0x80010000,0x80000000,0x00000000,0x00000000
   6205
   6206ATANTBL:
   6207	long		0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
   6208	long		0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
   6209	long		0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
   6210	long		0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
   6211	long		0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
   6212	long		0x3FFB0000,0xAB98E943,0x62765619,0x00000000
   6213	long		0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
   6214	long		0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
   6215	long		0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
   6216	long		0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
   6217	long		0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
   6218	long		0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
   6219	long		0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
   6220	long		0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
   6221	long		0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
   6222	long		0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
   6223	long		0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
   6224	long		0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
   6225	long		0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
   6226	long		0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
   6227	long		0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
   6228	long		0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
   6229	long		0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
   6230	long		0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
   6231	long		0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
   6232	long		0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
   6233	long		0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
   6234	long		0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
   6235	long		0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
   6236	long		0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
   6237	long		0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
   6238	long		0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
   6239	long		0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
   6240	long		0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
   6241	long		0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
   6242	long		0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
   6243	long		0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
   6244	long		0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
   6245	long		0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
   6246	long		0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
   6247	long		0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
   6248	long		0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
   6249	long		0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
   6250	long		0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
   6251	long		0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
   6252	long		0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
   6253	long		0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
   6254	long		0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
   6255	long		0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
   6256	long		0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
   6257	long		0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
   6258	long		0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
   6259	long		0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
   6260	long		0x3FFE0000,0x97731420,0x365E538C,0x00000000
   6261	long		0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
   6262	long		0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
   6263	long		0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
   6264	long		0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
   6265	long		0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
   6266	long		0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
   6267	long		0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
   6268	long		0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
   6269	long		0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
   6270	long		0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
   6271	long		0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
   6272	long		0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
   6273	long		0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
   6274	long		0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
   6275	long		0x3FFE0000,0xE8771129,0xC4353259,0x00000000
   6276	long		0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
   6277	long		0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
   6278	long		0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
   6279	long		0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
   6280	long		0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
   6281	long		0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
   6282	long		0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
   6283	long		0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
   6284	long		0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
   6285	long		0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
   6286	long		0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
   6287	long		0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
   6288	long		0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
   6289	long		0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
   6290	long		0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
   6291	long		0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
   6292	long		0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
   6293	long		0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
   6294	long		0x3FFF0000,0x9F100575,0x006CC571,0x00000000
   6295	long		0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
   6296	long		0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
   6297	long		0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
   6298	long		0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
   6299	long		0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
   6300	long		0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
   6301	long		0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
   6302	long		0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
   6303	long		0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
   6304	long		0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
   6305	long		0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
   6306	long		0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
   6307	long		0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
   6308	long		0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
   6309	long		0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
   6310	long		0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
   6311	long		0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
   6312	long		0x3FFF0000,0xB525529D,0x562246BD,0x00000000
   6313	long		0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
   6314	long		0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
   6315	long		0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
   6316	long		0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
   6317	long		0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
   6318	long		0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
   6319	long		0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
   6320	long		0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
   6321	long		0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
   6322	long		0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
   6323	long		0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
   6324	long		0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
   6325	long		0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
   6326	long		0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
   6327	long		0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
   6328	long		0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
   6329	long		0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
   6330	long		0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
   6331	long		0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
   6332	long		0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
   6333	long		0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
   6334	long		0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
   6335
   6336	set		X,FP_SCR0
   6337	set		XDCARE,X+2
   6338	set		XFRAC,X+4
   6339	set		XFRACLO,X+8
   6340
   6341	set		ATANF,FP_SCR1
   6342	set		ATANFHI,ATANF+4
   6343	set		ATANFLO,ATANF+8
   6344
   6345	global		satan
   6346#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
   6347satan:
   6348	fmov.x		(%a0),%fp0		# LOAD INPUT
   6349
   6350	mov.l		(%a0),%d1
   6351	mov.w		4(%a0),%d1
   6352	fmov.x		%fp0,X(%a6)
   6353	and.l		&0x7FFFFFFF,%d1
   6354
   6355	cmp.l		%d1,&0x3FFB8000		# |X| >= 1/16?
   6356	bge.b		ATANOK1
   6357	bra.w		ATANSM
   6358
   6359ATANOK1:
   6360	cmp.l		%d1,&0x4002FFFF		# |X| < 16 ?
   6361	ble.b		ATANMAIN
   6362	bra.w		ATANBIG
   6363
   6364#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
   6365#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
   6366#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
   6367#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
   6368#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
   6369#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
   6370#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
   6371#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
   6372#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
   6373#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
   6374#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
   6375#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
   6376#--WILL INVOLVE A VERY LONG POLYNOMIAL.
   6377
   6378#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
   6379#--WE CHOSE F TO BE +-2^K * 1.BBBB1
   6380#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
   6381#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
   6382#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
   6383#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
   6384
   6385ATANMAIN:
   6386
   6387	and.l		&0xF8000000,XFRAC(%a6)	# FIRST 5 BITS
   6388	or.l		&0x04000000,XFRAC(%a6)	# SET 6-TH BIT TO 1
   6389	mov.l		&0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
   6390
   6391	fmov.x		%fp0,%fp1		# FP1 IS X
   6392	fmul.x		X(%a6),%fp1		# FP1 IS X*F, NOTE THAT X*F > 0
   6393	fsub.x		X(%a6),%fp0		# FP0 IS X-F
   6394	fadd.s		&0x3F800000,%fp1	# FP1 IS 1 + X*F
   6395	fdiv.x		%fp1,%fp0		# FP0 IS U = (X-F)/(1+X*F)
   6396
   6397#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
   6398#--CREATE ATAN(F) AND STORE IT IN ATANF, AND
   6399#--SAVE REGISTERS FP2.
   6400
   6401	mov.l		%d2,-(%sp)		# SAVE d2 TEMPORARILY
   6402	mov.l		%d1,%d2			# THE EXP AND 16 BITS OF X
   6403	and.l		&0x00007800,%d1		# 4 VARYING BITS OF F'S FRACTION
   6404	and.l		&0x7FFF0000,%d2		# EXPONENT OF F
   6405	sub.l		&0x3FFB0000,%d2		# K+4
   6406	asr.l		&1,%d2
   6407	add.l		%d2,%d1			# THE 7 BITS IDENTIFYING F
   6408	asr.l		&7,%d1			# INDEX INTO TBL OF ATAN(|F|)
   6409	lea		ATANTBL(%pc),%a1
   6410	add.l		%d1,%a1			# ADDRESS OF ATAN(|F|)
   6411	mov.l		(%a1)+,ATANF(%a6)
   6412	mov.l		(%a1)+,ATANFHI(%a6)
   6413	mov.l		(%a1)+,ATANFLO(%a6)	# ATANF IS NOW ATAN(|F|)
   6414	mov.l		X(%a6),%d1		# LOAD SIGN AND EXPO. AGAIN
   6415	and.l		&0x80000000,%d1		# SIGN(F)
   6416	or.l		%d1,ATANF(%a6)		# ATANF IS NOW SIGN(F)*ATAN(|F|)
   6417	mov.l		(%sp)+,%d2		# RESTORE d2
   6418
   6419#--THAT'S ALL I HAVE TO DO FOR NOW,
   6420#--BUT ALAS, THE DIVIDE IS STILL CRANKING!
   6421
   6422#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
   6423#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
   6424#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
   6425#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
   6426#--WHAT WE HAVE HERE IS MERELY	A1 = A3, A2 = A1/A3, A3 = A2/A3.
   6427#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
   6428#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
   6429
   6430	fmovm.x		&0x04,-(%sp)		# save fp2
   6431
   6432	fmov.x		%fp0,%fp1
   6433	fmul.x		%fp1,%fp1
   6434	fmov.d		ATANA3(%pc),%fp2
   6435	fadd.x		%fp1,%fp2		# A3+V
   6436	fmul.x		%fp1,%fp2		# V*(A3+V)
   6437	fmul.x		%fp0,%fp1		# U*V
   6438	fadd.d		ATANA2(%pc),%fp2	# A2+V*(A3+V)
   6439	fmul.d		ATANA1(%pc),%fp1	# A1*U*V
   6440	fmul.x		%fp2,%fp1		# A1*U*V*(A2+V*(A3+V))
   6441	fadd.x		%fp1,%fp0		# ATAN(U), FP1 RELEASED
   6442
   6443	fmovm.x		(%sp)+,&0x20		# restore fp2
   6444
   6445	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
   6446	fadd.x		ATANF(%a6),%fp0		# ATAN(X)
   6447	bra		t_inx2
   6448
   6449ATANBORS:
   6450#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
   6451#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
   6452	cmp.l		%d1,&0x3FFF8000
   6453	bgt.w		ATANBIG			# I.E. |X| >= 16
   6454
   6455ATANSM:
   6456#--|X| <= 1/16
   6457#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
   6458#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
   6459#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
   6460#--WHERE Y = X*X, AND Z = Y*Y.
   6461
   6462	cmp.l		%d1,&0x3FD78000
   6463	blt.w		ATANTINY
   6464
   6465#--COMPUTE POLYNOMIAL
   6466	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
   6467
   6468	fmul.x		%fp0,%fp0		# FPO IS Y = X*X
   6469
   6470	fmov.x		%fp0,%fp1
   6471	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
   6472
   6473	fmov.d		ATANB6(%pc),%fp2
   6474	fmov.d		ATANB5(%pc),%fp3
   6475
   6476	fmul.x		%fp1,%fp2		# Z*B6
   6477	fmul.x		%fp1,%fp3		# Z*B5
   6478
   6479	fadd.d		ATANB4(%pc),%fp2	# B4+Z*B6
   6480	fadd.d		ATANB3(%pc),%fp3	# B3+Z*B5
   6481
   6482	fmul.x		%fp1,%fp2		# Z*(B4+Z*B6)
   6483	fmul.x		%fp3,%fp1		# Z*(B3+Z*B5)
   6484
   6485	fadd.d		ATANB2(%pc),%fp2	# B2+Z*(B4+Z*B6)
   6486	fadd.d		ATANB1(%pc),%fp1	# B1+Z*(B3+Z*B5)
   6487
   6488	fmul.x		%fp0,%fp2		# Y*(B2+Z*(B4+Z*B6))
   6489	fmul.x		X(%a6),%fp0		# X*Y
   6490
   6491	fadd.x		%fp2,%fp1		# [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
   6492
   6493	fmul.x		%fp1,%fp0		# X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
   6494
   6495	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
   6496
   6497	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
   6498	fadd.x		X(%a6),%fp0
   6499	bra		t_inx2
   6500
   6501ATANTINY:
   6502#--|X| < 2^(-40), ATAN(X) = X
   6503
   6504	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
   6505	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   6506	fmov.x		X(%a6),%fp0		# last inst - possible exception set
   6507
   6508	bra		t_catch
   6509
   6510ATANBIG:
   6511#--IF |X| > 2^(100), RETURN	SIGN(X)*(PI/2 - TINY). OTHERWISE,
   6512#--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
   6513	cmp.l		%d1,&0x40638000
   6514	bgt.w		ATANHUGE
   6515
   6516#--APPROXIMATE ATAN(-1/X) BY
   6517#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
   6518#--THIS CAN BE RE-WRITTEN AS
   6519#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
   6520
   6521	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
   6522
   6523	fmov.s		&0xBF800000,%fp1	# LOAD -1
   6524	fdiv.x		%fp0,%fp1		# FP1 IS -1/X
   6525
   6526#--DIVIDE IS STILL CRANKING
   6527
   6528	fmov.x		%fp1,%fp0		# FP0 IS X'
   6529	fmul.x		%fp0,%fp0		# FP0 IS Y = X'*X'
   6530	fmov.x		%fp1,X(%a6)		# X IS REALLY X'
   6531
   6532	fmov.x		%fp0,%fp1
   6533	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
   6534
   6535	fmov.d		ATANC5(%pc),%fp3
   6536	fmov.d		ATANC4(%pc),%fp2
   6537
   6538	fmul.x		%fp1,%fp3		# Z*C5
   6539	fmul.x		%fp1,%fp2		# Z*B4
   6540
   6541	fadd.d		ATANC3(%pc),%fp3	# C3+Z*C5
   6542	fadd.d		ATANC2(%pc),%fp2	# C2+Z*C4
   6543
   6544	fmul.x		%fp3,%fp1		# Z*(C3+Z*C5), FP3 RELEASED
   6545	fmul.x		%fp0,%fp2		# Y*(C2+Z*C4)
   6546
   6547	fadd.d		ATANC1(%pc),%fp1	# C1+Z*(C3+Z*C5)
   6548	fmul.x		X(%a6),%fp0		# X'*Y
   6549
   6550	fadd.x		%fp2,%fp1		# [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
   6551
   6552	fmul.x		%fp1,%fp0		# X'*Y*([B1+Z*(B3+Z*B5)]
   6553#					...	+[Y*(B2+Z*(B4+Z*B6))])
   6554	fadd.x		X(%a6),%fp0
   6555
   6556	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
   6557
   6558	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
   6559	tst.b		(%a0)
   6560	bpl.b		pos_big
   6561
   6562neg_big:
   6563	fadd.x		NPIBY2(%pc),%fp0
   6564	bra		t_minx2
   6565
   6566pos_big:
   6567	fadd.x		PPIBY2(%pc),%fp0
   6568	bra		t_pinx2
   6569
   6570ATANHUGE:
   6571#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
   6572	tst.b		(%a0)
   6573	bpl.b		pos_huge
   6574
   6575neg_huge:
   6576	fmov.x		NPIBY2(%pc),%fp0
   6577	fmov.l		%d0,%fpcr
   6578	fadd.x		PTINY(%pc),%fp0
   6579	bra		t_minx2
   6580
   6581pos_huge:
   6582	fmov.x		PPIBY2(%pc),%fp0
   6583	fmov.l		%d0,%fpcr
   6584	fadd.x		NTINY(%pc),%fp0
   6585	bra		t_pinx2
   6586
   6587	global		satand
   6588#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
   6589satand:
   6590	bra		t_extdnrm
   6591
   6592#########################################################################
   6593# sasin():  computes the inverse sine of a normalized input		#
   6594# sasind(): computes the inverse sine of a denormalized input		#
   6595#									#
   6596# INPUT ***************************************************************	#
   6597#	a0 = pointer to extended precision input			#
   6598#	d0 = round precision,mode					#
   6599#									#
   6600# OUTPUT **************************************************************	#
   6601#	fp0 = arcsin(X)							#
   6602#									#
   6603# ACCURACY and MONOTONICITY *******************************************	#
   6604#	The returned result is within 3 ulps in	64 significant bit,	#
   6605#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
   6606#	rounded to double precision. The result is provably monotonic	#
   6607#	in double precision.						#
   6608#									#
   6609# ALGORITHM ***********************************************************	#
   6610#									#
   6611#	ASIN								#
   6612#	1. If |X| >= 1, go to 3.					#
   6613#									#
   6614#	2. (|X| < 1) Calculate asin(X) by				#
   6615#		z := sqrt( [1-X][1+X] )					#
   6616#		asin(X) = atan( x / z ).				#
   6617#		Exit.							#
   6618#									#
   6619#	3. If |X| > 1, go to 5.						#
   6620#									#
   6621#	4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
   6622#									#
   6623#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
   6624#		Exit.							#
   6625#									#
   6626#########################################################################
   6627
   6628	global		sasin
   6629sasin:
   6630	fmov.x		(%a0),%fp0		# LOAD INPUT
   6631
   6632	mov.l		(%a0),%d1
   6633	mov.w		4(%a0),%d1
   6634	and.l		&0x7FFFFFFF,%d1
   6635	cmp.l		%d1,&0x3FFF8000
   6636	bge.b		ASINBIG
   6637
   6638# This catch is added here for the '060 QSP. Originally, the call to
   6639# satan() would handle this case by causing the exception which would
   6640# not be caught until gen_except(). Now, with the exceptions being
   6641# detected inside of satan(), the exception would have been handled there
   6642# instead of inside sasin() as expected.
   6643	cmp.l		%d1,&0x3FD78000
   6644	blt.w		ASINTINY
   6645
   6646#--THIS IS THE USUAL CASE, |X| < 1
   6647#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
   6648
   6649ASINMAIN:
   6650	fmov.s		&0x3F800000,%fp1
   6651	fsub.x		%fp0,%fp1		# 1-X
   6652	fmovm.x		&0x4,-(%sp)		#  {fp2}
   6653	fmov.s		&0x3F800000,%fp2
   6654	fadd.x		%fp0,%fp2		# 1+X
   6655	fmul.x		%fp2,%fp1		# (1+X)(1-X)
   6656	fmovm.x		(%sp)+,&0x20		#  {fp2}
   6657	fsqrt.x		%fp1			# SQRT([1-X][1+X])
   6658	fdiv.x		%fp1,%fp0		# X/SQRT([1-X][1+X])
   6659	fmovm.x		&0x01,-(%sp)		# save X/SQRT(...)
   6660	lea		(%sp),%a0		# pass ptr to X/SQRT(...)
   6661	bsr		satan
   6662	add.l		&0xc,%sp		# clear X/SQRT(...) from stack
   6663	bra		t_inx2
   6664
   6665ASINBIG:
   6666	fabs.x		%fp0			# |X|
   6667	fcmp.s		%fp0,&0x3F800000
   6668	fbgt		t_operr			# cause an operr exception
   6669
   6670#--|X| = 1, ASIN(X) = +- PI/2.
   6671ASINONE:
   6672	fmov.x		PIBY2(%pc),%fp0
   6673	mov.l		(%a0),%d1
   6674	and.l		&0x80000000,%d1		# SIGN BIT OF X
   6675	or.l		&0x3F800000,%d1		# +-1 IN SGL FORMAT
   6676	mov.l		%d1,-(%sp)		# push SIGN(X) IN SGL-FMT
   6677	fmov.l		%d0,%fpcr
   6678	fmul.s		(%sp)+,%fp0
   6679	bra		t_inx2
   6680
   6681#--|X| < 2^(-40), ATAN(X) = X
   6682ASINTINY:
   6683	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
   6684	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   6685	fmov.x		(%a0),%fp0		# last inst - possible exception
   6686	bra		t_catch
   6687
   6688	global		sasind
   6689#--ASIN(X) = X FOR DENORMALIZED X
   6690sasind:
   6691	bra		t_extdnrm
   6692
   6693#########################################################################
   6694# sacos():  computes the inverse cosine of a normalized input		#
   6695# sacosd(): computes the inverse cosine of a denormalized input		#
   6696#									#
   6697# INPUT ***************************************************************	#
   6698#	a0 = pointer to extended precision input			#
   6699#	d0 = round precision,mode					#
   6700#									#
   6701# OUTPUT ************************************************************** #
   6702#	fp0 = arccos(X)							#
   6703#									#
   6704# ACCURACY and MONOTONICITY *******************************************	#
   6705#	The returned result is within 3 ulps in	64 significant bit,	#
   6706#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
   6707#	rounded to double precision. The result is provably monotonic	#
   6708#	in double precision.						#
   6709#									#
   6710# ALGORITHM *********************************************************** #
   6711#									#
   6712#	ACOS								#
   6713#	1. If |X| >= 1, go to 3.					#
   6714#									#
   6715#	2. (|X| < 1) Calculate acos(X) by				#
   6716#		z := (1-X) / (1+X)					#
   6717#		acos(X) = 2 * atan( sqrt(z) ).				#
   6718#		Exit.							#
   6719#									#
   6720#	3. If |X| > 1, go to 5.						#
   6721#									#
   6722#	4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.	#
   6723#									#
   6724#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
   6725#		Exit.							#
   6726#									#
   6727#########################################################################
   6728
   6729	global		sacos
   6730sacos:
   6731	fmov.x		(%a0),%fp0		# LOAD INPUT
   6732
   6733	mov.l		(%a0),%d1		# pack exp w/ upper 16 fraction
   6734	mov.w		4(%a0),%d1
   6735	and.l		&0x7FFFFFFF,%d1
   6736	cmp.l		%d1,&0x3FFF8000
   6737	bge.b		ACOSBIG
   6738
   6739#--THIS IS THE USUAL CASE, |X| < 1
   6740#--ACOS(X) = 2 * ATAN(	SQRT( (1-X)/(1+X) ) )
   6741
   6742ACOSMAIN:
   6743	fmov.s		&0x3F800000,%fp1
   6744	fadd.x		%fp0,%fp1		# 1+X
   6745	fneg.x		%fp0			# -X
   6746	fadd.s		&0x3F800000,%fp0	# 1-X
   6747	fdiv.x		%fp1,%fp0		# (1-X)/(1+X)
   6748	fsqrt.x		%fp0			# SQRT((1-X)/(1+X))
   6749	mov.l		%d0,-(%sp)		# save original users fpcr
   6750	clr.l		%d0
   6751	fmovm.x		&0x01,-(%sp)		# save SQRT(...) to stack
   6752	lea		(%sp),%a0		# pass ptr to sqrt
   6753	bsr		satan			# ATAN(SQRT([1-X]/[1+X]))
   6754	add.l		&0xc,%sp		# clear SQRT(...) from stack
   6755
   6756	fmov.l		(%sp)+,%fpcr		# restore users round prec,mode
   6757	fadd.x		%fp0,%fp0		# 2 * ATAN( STUFF )
   6758	bra		t_pinx2
   6759
   6760ACOSBIG:
   6761	fabs.x		%fp0
   6762	fcmp.s		%fp0,&0x3F800000
   6763	fbgt		t_operr			# cause an operr exception
   6764
   6765#--|X| = 1, ACOS(X) = 0 OR PI
   6766	tst.b		(%a0)			# is X positive or negative?
   6767	bpl.b		ACOSP1
   6768
   6769#--X = -1
   6770#Returns PI and inexact exception
   6771ACOSM1:
   6772	fmov.x		PI(%pc),%fp0		# load PI
   6773	fmov.l		%d0,%fpcr		# load round mode,prec
   6774	fadd.s		&0x00800000,%fp0	# add a small value
   6775	bra		t_pinx2
   6776
   6777ACOSP1:
   6778	bra		ld_pzero		# answer is positive zero
   6779
   6780	global		sacosd
   6781#--ACOS(X) = PI/2 FOR DENORMALIZED X
   6782sacosd:
   6783	fmov.l		%d0,%fpcr		# load user's rnd mode/prec
   6784	fmov.x		PIBY2(%pc),%fp0
   6785	bra		t_pinx2
   6786
   6787#########################################################################
   6788# setox():    computes the exponential for a normalized input		#
   6789# setoxd():   computes the exponential for a denormalized input		#
   6790# setoxm1():  computes the exponential minus 1 for a normalized input	#
   6791# setoxm1d(): computes the exponential minus 1 for a denormalized input	#
   6792#									#
   6793# INPUT	*************************************************************** #
   6794#	a0 = pointer to extended precision input			#
   6795#	d0 = round precision,mode					#
   6796#									#
   6797# OUTPUT ************************************************************** #
   6798#	fp0 = exp(X) or exp(X)-1					#
   6799#									#
   6800# ACCURACY and MONOTONICITY ******************************************* #
   6801#	The returned result is within 0.85 ulps in 64 significant bit,	#
   6802#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
   6803#	rounded to double precision. The result is provably monotonic	#
   6804#	in double precision.						#
   6805#									#
   6806# ALGORITHM and IMPLEMENTATION **************************************** #
   6807#									#
   6808#	setoxd								#
   6809#	------								#
   6810#	Step 1.	Set ans := 1.0						#
   6811#									#
   6812#	Step 2.	Return	ans := ans + sign(X)*2^(-126). Exit.		#
   6813#	Notes:	This will always generate one exception -- inexact.	#
   6814#									#
   6815#									#
   6816#	setox								#
   6817#	-----								#
   6818#									#
   6819#	Step 1.	Filter out extreme cases of input argument.		#
   6820#		1.1	If |X| >= 2^(-65), go to Step 1.3.		#
   6821#		1.2	Go to Step 7.					#
   6822#		1.3	If |X| < 16380 log(2), go to Step 2.		#
   6823#		1.4	Go to Step 8.					#
   6824#	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
   6825#		To avoid the use of floating-point comparisons, a	#
   6826#		compact representation of |X| is used. This format is a	#
   6827#		32-bit integer, the upper (more significant) 16 bits	#
   6828#		are the sign and biased exponent field of |X|; the	#
   6829#		lower 16 bits are the 16 most significant fraction	#
   6830#		(including the explicit bit) bits of |X|. Consequently,	#
   6831#		the comparisons in Steps 1.1 and 1.3 can be performed	#
   6832#		by integer comparison. Note also that the constant	#
   6833#		16380 log(2) used in Step 1.3 is also in the compact	#
   6834#		form. Thus taking the branch to Step 2 guarantees	#
   6835#		|X| < 16380 log(2). There is no harm to have a small	#
   6836#		number of cases where |X| is less than,	but close to,	#
   6837#		16380 log(2) and the branch to Step 9 is taken.		#
   6838#									#
   6839#	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
   6840#		2.1	Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
   6841#			was taken)					#
   6842#		2.2	N := round-to-nearest-integer( X * 64/log2 ).	#
   6843#		2.3	Calculate	J = N mod 64; so J = 0,1,2,..., #
   6844#			or 63.						#
   6845#		2.4	Calculate	M = (N - J)/64; so N = 64M + J.	#
   6846#		2.5	Calculate the address of the stored value of	#
   6847#			2^(J/64).					#
   6848#		2.6	Create the value Scale = 2^M.			#
   6849#	Notes:	The calculation in 2.2 is really performed by		#
   6850#			Z := X * constant				#
   6851#			N := round-to-nearest-integer(Z)		#
   6852#		where							#
   6853#			constant := single-precision( 64/log 2 ).	#
   6854#									#
   6855#		Using a single-precision constant avoids memory		#
   6856#		access. Another effect of using a single-precision	#
   6857#		"constant" is that the calculated value Z is		#
   6858#									#
   6859#			Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).	#
   6860#									#
   6861#		This error has to be considered later in Steps 3 and 4.	#
   6862#									#
   6863#	Step 3.	Calculate X - N*log2/64.				#
   6864#		3.1	R := X + N*L1,					#
   6865#				where L1 := single-precision(-log2/64).	#
   6866#		3.2	R := R + N*L2,					#
   6867#				L2 := extended-precision(-log2/64 - L1).#
   6868#	Notes:	a) The way L1 and L2 are chosen ensures L1+L2		#
   6869#		approximate the value -log2/64 to 88 bits of accuracy.	#
   6870#		b) N*L1 is exact because N is no longer than 22 bits	#
   6871#		and L1 is no longer than 24 bits.			#
   6872#		c) The calculation X+N*L1 is also exact due to		#
   6873#		cancellation. Thus, R is practically X+N(L1+L2) to full	#
   6874#		64 bits.						#
   6875#		d) It is important to estimate how large can |R| be	#
   6876#		after Step 3.2.						#
   6877#									#
   6878#		N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)	#
   6879#		X*64/log2 (1+eps)	=	N + f,	|f| <= 0.5	#
   6880#		X*64/log2 - N	=	f - eps*X 64/log2		#
   6881#		X - N*log2/64	=	f*log2/64 - eps*X		#
   6882#									#
   6883#									#
   6884#		Now |X| <= 16446 log2, thus				#
   6885#									#
   6886#			|X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64	#
   6887#					<= 0.57 log2/64.		#
   6888#		 This bound will be used in Step 4.			#
   6889#									#
   6890#	Step 4.	Approximate exp(R)-1 by a polynomial			#
   6891#		p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))	#
   6892#	Notes:	a) In order to reduce memory access, the coefficients	#
   6893#		are made as "short" as possible: A1 (which is 1/2), A4	#
   6894#		and A5 are single precision; A2 and A3 are double	#
   6895#		precision.						#
   6896#		b) Even with the restrictions above,			#
   6897#		   |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.	#
   6898#		Note that 0.0062 is slightly bigger than 0.57 log2/64.	#
   6899#		c) To fully utilize the pipeline, p is separated into	#
   6900#		two independent pieces of roughly equal complexities	#
   6901#			p = [ R + R*S*(A2 + S*A4) ]	+		#
   6902#				[ S*(A1 + S*(A3 + S*A5)) ]		#
   6903#		where S = R*R.						#
   6904#									#
   6905#	Step 5.	Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by		#
   6906#				ans := T + ( T*p + t)			#
   6907#		where T and t are the stored values for 2^(J/64).	#
   6908#	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
   6909#		2^(J/64) to roughly 85 bits; T is in extended precision	#
   6910#		and t is in single precision. Note also that T is	#
   6911#		rounded to 62 bits so that the last two bits of T are	#
   6912#		zero. The reason for such a special form is that T-1,	#
   6913#		T-2, and T-8 will all be exact --- a property that will	#
   6914#		give much more accurate computation of the function	#
   6915#		EXPM1.							#
   6916#									#
   6917#	Step 6.	Reconstruction of exp(X)				#
   6918#			exp(X) = 2^M * 2^(J/64) * exp(R).		#
   6919#		6.1	If AdjFlag = 0, go to 6.3			#
   6920#		6.2	ans := ans * AdjScale				#
   6921#		6.3	Restore the user FPCR				#
   6922#		6.4	Return ans := ans * Scale. Exit.		#
   6923#	Notes:	If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,	#
   6924#		|M| <= 16380, and Scale = 2^M. Moreover, exp(X) will	#
   6925#		neither overflow nor underflow. If AdjFlag = 1, that	#
   6926#		means that						#
   6927#			X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.	#
   6928#		Hence, exp(X) may overflow or underflow or neither.	#
   6929#		When that is the case, AdjScale = 2^(M1) where M1 is	#
   6930#		approximately M. Thus 6.2 will never cause		#
   6931#		over/underflow. Possible exception in 6.4 is overflow	#
   6932#		or underflow. The inexact exception is not generated in	#
   6933#		6.4. Although one can argue that the inexact flag	#
   6934#		should always be raised, to simulate that exception	#
   6935#		cost to much than the flag is worth in practical uses.	#
   6936#									#
   6937#	Step 7.	Return 1 + X.						#
   6938#		7.1	ans := X					#
   6939#		7.2	Restore user FPCR.				#
   6940#		7.3	Return ans := 1 + ans. Exit			#
   6941#	Notes:	For non-zero X, the inexact exception will always be	#
   6942#		raised by 7.3. That is the only exception raised by 7.3.#
   6943#		Note also that we use the FMOVEM instruction to move X	#
   6944#		in Step 7.1 to avoid unnecessary trapping. (Although	#
   6945#		the FMOVEM may not seem relevant since X is normalized,	#
   6946#		the precaution will be useful in the library version of	#
   6947#		this code where the separate entry for denormalized	#
   6948#		inputs will be done away with.)				#
   6949#									#
   6950#	Step 8.	Handle exp(X) where |X| >= 16380log2.			#
   6951#		8.1	If |X| > 16480 log2, go to Step 9.		#
   6952#		(mimic 2.2 - 2.6)					#
   6953#		8.2	N := round-to-integer( X * 64/log2 )		#
   6954#		8.3	Calculate J = N mod 64, J = 0,1,...,63		#
   6955#		8.4	K := (N-J)/64, M1 := truncate(K/2), M = K-M1,	#
   6956#			AdjFlag := 1.					#
   6957#		8.5	Calculate the address of the stored value	#
   6958#			2^(J/64).					#
   6959#		8.6	Create the values Scale = 2^M, AdjScale = 2^M1.	#
   6960#		8.7	Go to Step 3.					#
   6961#	Notes:	Refer to notes for 2.2 - 2.6.				#
   6962#									#
   6963#	Step 9.	Handle exp(X), |X| > 16480 log2.			#
   6964#		9.1	If X < 0, go to 9.3				#
   6965#		9.2	ans := Huge, go to 9.4				#
   6966#		9.3	ans := Tiny.					#
   6967#		9.4	Restore user FPCR.				#
   6968#		9.5	Return ans := ans * ans. Exit.			#
   6969#	Notes:	Exp(X) will surely overflow or underflow, depending on	#
   6970#		X's sign. "Huge" and "Tiny" are respectively large/tiny	#
   6971#		extended-precision numbers whose square over/underflow	#
   6972#		with an inexact result. Thus, 9.5 always raises the	#
   6973#		inexact together with either overflow or underflow.	#
   6974#									#
   6975#	setoxm1d							#
   6976#	--------							#
   6977#									#
   6978#	Step 1.	Set ans := 0						#
   6979#									#
   6980#	Step 2.	Return	ans := X + ans. Exit.				#
   6981#	Notes:	This will return X with the appropriate rounding	#
   6982#		 precision prescribed by the user FPCR.			#
   6983#									#
   6984#	setoxm1								#
   6985#	-------								#
   6986#									#
   6987#	Step 1.	Check |X|						#
   6988#		1.1	If |X| >= 1/4, go to Step 1.3.			#
   6989#		1.2	Go to Step 7.					#
   6990#		1.3	If |X| < 70 log(2), go to Step 2.		#
   6991#		1.4	Go to Step 10.					#
   6992#	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
   6993#		However, it is conceivable |X| can be small very often	#
   6994#		because EXPM1 is intended to evaluate exp(X)-1		#
   6995#		accurately when |X| is small. For further details on	#
   6996#		the comparisons, see the notes on Step 1 of setox.	#
   6997#									#
   6998#	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
   6999#		2.1	N := round-to-nearest-integer( X * 64/log2 ).	#
   7000#		2.2	Calculate	J = N mod 64; so J = 0,1,2,..., #
   7001#			or 63.						#
   7002#		2.3	Calculate	M = (N - J)/64; so N = 64M + J.	#
   7003#		2.4	Calculate the address of the stored value of	#
   7004#			2^(J/64).					#
   7005#		2.5	Create the values Sc = 2^M and			#
   7006#			OnebySc := -2^(-M).				#
   7007#	Notes:	See the notes on Step 2 of setox.			#
   7008#									#
   7009#	Step 3.	Calculate X - N*log2/64.				#
   7010#		3.1	R := X + N*L1,					#
   7011#				where L1 := single-precision(-log2/64).	#
   7012#		3.2	R := R + N*L2,					#
   7013#				L2 := extended-precision(-log2/64 - L1).#
   7014#	Notes:	Applying the analysis of Step 3 of setox in this case	#
   7015#		shows that |R| <= 0.0055 (note that |X| <= 70 log2 in	#
   7016#		this case).						#
   7017#									#
   7018#	Step 4.	Approximate exp(R)-1 by a polynomial			#
   7019#			p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))	#
   7020#	Notes:	a) In order to reduce memory access, the coefficients	#
   7021#		are made as "short" as possible: A1 (which is 1/2), A5	#
   7022#		and A6 are single precision; A2, A3 and A4 are double	#
   7023#		precision.						#
   7024#		b) Even with the restriction above,			#
   7025#			|p - (exp(R)-1)| <	|R| * 2^(-72.7)		#
   7026#		for all |R| <= 0.0055.					#
   7027#		c) To fully utilize the pipeline, p is separated into	#
   7028#		two independent pieces of roughly equal complexity	#
   7029#			p = [ R*S*(A2 + S*(A4 + S*A6)) ]	+	#
   7030#				[ R + S*(A1 + S*(A3 + S*A5)) ]		#
   7031#		where S = R*R.						#
   7032#									#
   7033#	Step 5.	Compute 2^(J/64)*p by					#
   7034#				p := T*p				#
   7035#		where T and t are the stored values for 2^(J/64).	#
   7036#	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
   7037#		2^(J/64) to roughly 85 bits; T is in extended precision	#
   7038#		and t is in single precision. Note also that T is	#
   7039#		rounded to 62 bits so that the last two bits of T are	#
   7040#		zero. The reason for such a special form is that T-1,	#
   7041#		T-2, and T-8 will all be exact --- a property that will	#
   7042#		be exploited in Step 6 below. The total relative error	#
   7043#		in p is no bigger than 2^(-67.7) compared to the final	#
   7044#		result.							#
   7045#									#
   7046#	Step 6.	Reconstruction of exp(X)-1				#
   7047#			exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).	#
   7048#		6.1	If M <= 63, go to Step 6.3.			#
   7049#		6.2	ans := T + (p + (t + OnebySc)). Go to 6.6	#
   7050#		6.3	If M >= -3, go to 6.5.				#
   7051#		6.4	ans := (T + (p + t)) + OnebySc. Go to 6.6	#
   7052#		6.5	ans := (T + OnebySc) + (p + t).			#
   7053#		6.6	Restore user FPCR.				#
   7054#		6.7	Return ans := Sc * ans. Exit.			#
   7055#	Notes:	The various arrangements of the expressions give	#
   7056#		accurate evaluations.					#
   7057#									#
   7058#	Step 7.	exp(X)-1 for |X| < 1/4.					#
   7059#		7.1	If |X| >= 2^(-65), go to Step 9.		#
   7060#		7.2	Go to Step 8.					#
   7061#									#
   7062#	Step 8.	Calculate exp(X)-1, |X| < 2^(-65).			#
   7063#		8.1	If |X| < 2^(-16312), goto 8.3			#
   7064#		8.2	Restore FPCR; return ans := X - 2^(-16382).	#
   7065#			Exit.						#
   7066#		8.3	X := X * 2^(140).				#
   7067#		8.4	Restore FPCR; ans := ans - 2^(-16382).		#
   7068#		 Return ans := ans*2^(140). Exit			#
   7069#	Notes:	The idea is to return "X - tiny" under the user		#
   7070#		precision and rounding modes. To avoid unnecessary	#
   7071#		inefficiency, we stay away from denormalized numbers	#
   7072#		the best we can. For |X| >= 2^(-16312), the		#
   7073#		straightforward 8.2 generates the inexact exception as	#
   7074#		the case warrants.					#
   7075#									#
   7076#	Step 9.	Calculate exp(X)-1, |X| < 1/4, by a polynomial		#
   7077#			p = X + X*X*(B1 + X*(B2 + ... + X*B12))		#
   7078#	Notes:	a) In order to reduce memory access, the coefficients	#
   7079#		are made as "short" as possible: B1 (which is 1/2), B9	#
   7080#		to B12 are single precision; B3 to B8 are double	#
   7081#		precision; and B2 is double extended.			#
   7082#		b) Even with the restriction above,			#
   7083#			|p - (exp(X)-1)| < |X| 2^(-70.6)		#
   7084#		for all |X| <= 0.251.					#
   7085#		Note that 0.251 is slightly bigger than 1/4.		#
   7086#		c) To fully preserve accuracy, the polynomial is	#
   7087#		computed as						#
   7088#			X + ( S*B1 +	Q ) where S = X*X and		#
   7089#			Q	=	X*S*(B2 + X*(B3 + ... + X*B12))	#
   7090#		d) To fully utilize the pipeline, Q is separated into	#
   7091#		two independent pieces of roughly equal complexity	#
   7092#			Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +	#
   7093#				[ S*S*(B3 + S*(B5 + ... + S*B11)) ]	#
   7094#									#
   7095#	Step 10. Calculate exp(X)-1 for |X| >= 70 log 2.		#
   7096#		10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all	#
   7097#		practical purposes. Therefore, go to Step 1 of setox.	#
   7098#		10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical	#
   7099#		purposes.						#
   7100#		ans := -1						#
   7101#		Restore user FPCR					#
   7102#		Return ans := ans + 2^(-126). Exit.			#
   7103#	Notes:	10.2 will always create an inexact and return -1 + tiny	#
   7104#		in the user rounding precision and mode.		#
   7105#									#
   7106#########################################################################
   7107
   7108L2:	long		0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
   7109
   7110EEXPA3:	long		0x3FA55555,0x55554CC1
   7111EEXPA2:	long		0x3FC55555,0x55554A54
   7112
   7113EM1A4:	long		0x3F811111,0x11174385
   7114EM1A3:	long		0x3FA55555,0x55554F5A
   7115
   7116EM1A2:	long		0x3FC55555,0x55555555,0x00000000,0x00000000
   7117
   7118EM1B8:	long		0x3EC71DE3,0xA5774682
   7119EM1B7:	long		0x3EFA01A0,0x19D7CB68
   7120
   7121EM1B6:	long		0x3F2A01A0,0x1A019DF3
   7122EM1B5:	long		0x3F56C16C,0x16C170E2
   7123
   7124EM1B4:	long		0x3F811111,0x11111111
   7125EM1B3:	long		0x3FA55555,0x55555555
   7126
   7127EM1B2:	long		0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
   7128	long		0x00000000
   7129
   7130TWO140:	long		0x48B00000,0x00000000
   7131TWON140:
   7132	long		0x37300000,0x00000000
   7133
   7134EEXPTBL:
   7135	long		0x3FFF0000,0x80000000,0x00000000,0x00000000
   7136	long		0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
   7137	long		0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
   7138	long		0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
   7139	long		0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
   7140	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
   7141	long		0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
   7142	long		0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
   7143	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
   7144	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
   7145	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
   7146	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
   7147	long		0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
   7148	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
   7149	long		0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
   7150	long		0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
   7151	long		0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
   7152	long		0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
   7153	long		0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
   7154	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
   7155	long		0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
   7156	long		0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
   7157	long		0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
   7158	long		0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
   7159	long		0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
   7160	long		0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
   7161	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
   7162	long		0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
   7163	long		0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
   7164	long		0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
   7165	long		0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
   7166	long		0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
   7167	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
   7168	long		0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
   7169	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
   7170	long		0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
   7171	long		0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
   7172	long		0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
   7173	long		0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
   7174	long		0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
   7175	long		0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
   7176	long		0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
   7177	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
   7178	long		0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
   7179	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
   7180	long		0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
   7181	long		0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
   7182	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
   7183	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
   7184	long		0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
   7185	long		0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
   7186	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
   7187	long		0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
   7188	long		0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
   7189	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
   7190	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
   7191	long		0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
   7192	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
   7193	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
   7194	long		0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
   7195	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
   7196	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
   7197	long		0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
   7198	long		0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
   7199
   7200	set		ADJFLAG,L_SCR2
   7201	set		SCALE,FP_SCR0
   7202	set		ADJSCALE,FP_SCR1
   7203	set		SC,FP_SCR0
   7204	set		ONEBYSC,FP_SCR1
   7205
   7206	global		setox
   7207setox:
   7208#--entry point for EXP(X), here X is finite, non-zero, and not NaN's
   7209
   7210#--Step 1.
   7211	mov.l		(%a0),%d1		# load part of input X
   7212	and.l		&0x7FFF0000,%d1		# biased expo. of X
   7213	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
   7214	bge.b		EXPC1			# normal case
   7215	bra		EXPSM
   7216
   7217EXPC1:
   7218#--The case |X| >= 2^(-65)
   7219	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
   7220	cmp.l		%d1,&0x400CB167		# 16380 log2 trunc. 16 bits
   7221	blt.b		EXPMAIN			# normal case
   7222	bra		EEXPBIG
   7223
   7224EXPMAIN:
   7225#--Step 2.
   7226#--This is the normal branch:	2^(-65) <= |X| < 16380 log2.
   7227	fmov.x		(%a0),%fp0		# load input from (a0)
   7228
   7229	fmov.x		%fp0,%fp1
   7230	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
   7231	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
   7232	mov.l		&0,ADJFLAG(%a6)
   7233	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
   7234	lea		EEXPTBL(%pc),%a1
   7235	fmov.l		%d1,%fp0		# convert to floating-format
   7236
   7237	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
   7238	and.l		&0x3F,%d1		# D0 is J = N mod 64
   7239	lsl.l		&4,%d1
   7240	add.l		%d1,%a1			# address of 2^(J/64)
   7241	mov.l		L_SCR1(%a6),%d1
   7242	asr.l		&6,%d1			# D0 is M
   7243	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
   7244	mov.w		L2(%pc),L_SCR1(%a6)	# prefetch L2, no need in CB
   7245
   7246EXPCONT1:
   7247#--Step 3.
   7248#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
   7249#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
   7250	fmov.x		%fp0,%fp2
   7251	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
   7252	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
   7253	fadd.x		%fp1,%fp0		# X + N*L1
   7254	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
   7255
   7256#--Step 4.
   7257#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
   7258#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
   7259#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
   7260#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
   7261
   7262	fmov.x		%fp0,%fp1
   7263	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
   7264
   7265	fmov.s		&0x3AB60B70,%fp2	# fp2 IS A5
   7266
   7267	fmul.x		%fp1,%fp2		# fp2 IS S*A5
   7268	fmov.x		%fp1,%fp3
   7269	fmul.s		&0x3C088895,%fp3	# fp3 IS S*A4
   7270
   7271	fadd.d		EEXPA3(%pc),%fp2	# fp2 IS A3+S*A5
   7272	fadd.d		EEXPA2(%pc),%fp3	# fp3 IS A2+S*A4
   7273
   7274	fmul.x		%fp1,%fp2		# fp2 IS S*(A3+S*A5)
   7275	mov.w		%d1,SCALE(%a6)		# SCALE is 2^(M) in extended
   7276	mov.l		&0x80000000,SCALE+4(%a6)
   7277	clr.l		SCALE+8(%a6)
   7278
   7279	fmul.x		%fp1,%fp3		# fp3 IS S*(A2+S*A4)
   7280
   7281	fadd.s		&0x3F000000,%fp2	# fp2 IS A1+S*(A3+S*A5)
   7282	fmul.x		%fp0,%fp3		# fp3 IS R*S*(A2+S*A4)
   7283
   7284	fmul.x		%fp1,%fp2		# fp2 IS S*(A1+S*(A3+S*A5))
   7285	fadd.x		%fp3,%fp0		# fp0 IS R+R*S*(A2+S*A4),
   7286
   7287	fmov.x		(%a1)+,%fp1		# fp1 is lead. pt. of 2^(J/64)
   7288	fadd.x		%fp2,%fp0		# fp0 is EXP(R) - 1
   7289
   7290#--Step 5
   7291#--final reconstruction process
   7292#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
   7293
   7294	fmul.x		%fp1,%fp0		# 2^(J/64)*(Exp(R)-1)
   7295	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
   7296	fadd.s		(%a1),%fp0		# accurate 2^(J/64)
   7297
   7298	fadd.x		%fp1,%fp0		# 2^(J/64) + 2^(J/64)*...
   7299	mov.l		ADJFLAG(%a6),%d1
   7300
   7301#--Step 6
   7302	tst.l		%d1
   7303	beq.b		NORMAL
   7304ADJUST:
   7305	fmul.x		ADJSCALE(%a6),%fp0
   7306NORMAL:
   7307	fmov.l		%d0,%fpcr		# restore user FPCR
   7308	mov.b		&FMUL_OP,%d1		# last inst is MUL
   7309	fmul.x		SCALE(%a6),%fp0		# multiply 2^(M)
   7310	bra		t_catch
   7311
   7312EXPSM:
   7313#--Step 7
   7314	fmovm.x		(%a0),&0x80		# load X
   7315	fmov.l		%d0,%fpcr
   7316	fadd.s		&0x3F800000,%fp0	# 1+X in user mode
   7317	bra		t_pinx2
   7318
   7319EEXPBIG:
   7320#--Step 8
   7321	cmp.l		%d1,&0x400CB27C		# 16480 log2
   7322	bgt.b		EXP2BIG
   7323#--Steps 8.2 -- 8.6
   7324	fmov.x		(%a0),%fp0		# load input from (a0)
   7325
   7326	fmov.x		%fp0,%fp1
   7327	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
   7328	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
   7329	mov.l		&1,ADJFLAG(%a6)
   7330	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
   7331	lea		EEXPTBL(%pc),%a1
   7332	fmov.l		%d1,%fp0		# convert to floating-format
   7333	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
   7334	and.l		&0x3F,%d1		# D0 is J = N mod 64
   7335	lsl.l		&4,%d1
   7336	add.l		%d1,%a1			# address of 2^(J/64)
   7337	mov.l		L_SCR1(%a6),%d1
   7338	asr.l		&6,%d1			# D0 is K
   7339	mov.l		%d1,L_SCR1(%a6)		# save K temporarily
   7340	asr.l		&1,%d1			# D0 is M1
   7341	sub.l		%d1,L_SCR1(%a6)		# a1 is M
   7342	add.w		&0x3FFF,%d1		# biased expo. of 2^(M1)
   7343	mov.w		%d1,ADJSCALE(%a6)	# ADJSCALE := 2^(M1)
   7344	mov.l		&0x80000000,ADJSCALE+4(%a6)
   7345	clr.l		ADJSCALE+8(%a6)
   7346	mov.l		L_SCR1(%a6),%d1		# D0 is M
   7347	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
   7348	bra.w		EXPCONT1		# go back to Step 3
   7349
   7350EXP2BIG:
   7351#--Step 9
   7352	tst.b		(%a0)			# is X positive or negative?
   7353	bmi		t_unfl2
   7354	bra		t_ovfl2
   7355
   7356	global		setoxd
   7357setoxd:
   7358#--entry point for EXP(X), X is denormalized
   7359	mov.l		(%a0),-(%sp)
   7360	andi.l		&0x80000000,(%sp)
   7361	ori.l		&0x00800000,(%sp)	# sign(X)*2^(-126)
   7362
   7363	fmov.s		&0x3F800000,%fp0
   7364
   7365	fmov.l		%d0,%fpcr
   7366	fadd.s		(%sp)+,%fp0
   7367	bra		t_pinx2
   7368
   7369	global		setoxm1
   7370setoxm1:
   7371#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
   7372
   7373#--Step 1.
   7374#--Step 1.1
   7375	mov.l		(%a0),%d1		# load part of input X
   7376	and.l		&0x7FFF0000,%d1		# biased expo. of X
   7377	cmp.l		%d1,&0x3FFD0000		# 1/4
   7378	bge.b		EM1CON1			# |X| >= 1/4
   7379	bra		EM1SM
   7380
   7381EM1CON1:
   7382#--Step 1.3
   7383#--The case |X| >= 1/4
   7384	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
   7385	cmp.l		%d1,&0x4004C215		# 70log2 rounded up to 16 bits
   7386	ble.b		EM1MAIN			# 1/4 <= |X| <= 70log2
   7387	bra		EM1BIG
   7388
   7389EM1MAIN:
   7390#--Step 2.
   7391#--This is the case:	1/4 <= |X| <= 70 log2.
   7392	fmov.x		(%a0),%fp0		# load input from (a0)
   7393
   7394	fmov.x		%fp0,%fp1
   7395	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
   7396	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
   7397	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
   7398	lea		EEXPTBL(%pc),%a1
   7399	fmov.l		%d1,%fp0		# convert to floating-format
   7400
   7401	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
   7402	and.l		&0x3F,%d1		# D0 is J = N mod 64
   7403	lsl.l		&4,%d1
   7404	add.l		%d1,%a1			# address of 2^(J/64)
   7405	mov.l		L_SCR1(%a6),%d1
   7406	asr.l		&6,%d1			# D0 is M
   7407	mov.l		%d1,L_SCR1(%a6)		# save a copy of M
   7408
   7409#--Step 3.
   7410#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
   7411#--a0 points to 2^(J/64), D0 and a1 both contain M
   7412	fmov.x		%fp0,%fp2
   7413	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
   7414	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
   7415	fadd.x		%fp1,%fp0		# X + N*L1
   7416	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
   7417	add.w		&0x3FFF,%d1		# D0 is biased expo. of 2^M
   7418
   7419#--Step 4.
   7420#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
   7421#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
   7422#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
   7423#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
   7424
   7425	fmov.x		%fp0,%fp1
   7426	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
   7427
   7428	fmov.s		&0x3950097B,%fp2	# fp2 IS a6
   7429
   7430	fmul.x		%fp1,%fp2		# fp2 IS S*A6
   7431	fmov.x		%fp1,%fp3
   7432	fmul.s		&0x3AB60B6A,%fp3	# fp3 IS S*A5
   7433
   7434	fadd.d		EM1A4(%pc),%fp2		# fp2 IS A4+S*A6
   7435	fadd.d		EM1A3(%pc),%fp3		# fp3 IS A3+S*A5
   7436	mov.w		%d1,SC(%a6)		# SC is 2^(M) in extended
   7437	mov.l		&0x80000000,SC+4(%a6)
   7438	clr.l		SC+8(%a6)
   7439
   7440	fmul.x		%fp1,%fp2		# fp2 IS S*(A4+S*A6)
   7441	mov.l		L_SCR1(%a6),%d1		# D0 is	M
   7442	neg.w		%d1			# D0 is -M
   7443	fmul.x		%fp1,%fp3		# fp3 IS S*(A3+S*A5)
   7444	add.w		&0x3FFF,%d1		# biased expo. of 2^(-M)
   7445	fadd.d		EM1A2(%pc),%fp2		# fp2 IS A2+S*(A4+S*A6)
   7446	fadd.s		&0x3F000000,%fp3	# fp3 IS A1+S*(A3+S*A5)
   7447
   7448	fmul.x		%fp1,%fp2		# fp2 IS S*(A2+S*(A4+S*A6))
   7449	or.w		&0x8000,%d1		# signed/expo. of -2^(-M)
   7450	mov.w		%d1,ONEBYSC(%a6)	# OnebySc is -2^(-M)
   7451	mov.l		&0x80000000,ONEBYSC+4(%a6)
   7452	clr.l		ONEBYSC+8(%a6)
   7453	fmul.x		%fp3,%fp1		# fp1 IS S*(A1+S*(A3+S*A5))
   7454
   7455	fmul.x		%fp0,%fp2		# fp2 IS R*S*(A2+S*(A4+S*A6))
   7456	fadd.x		%fp1,%fp0		# fp0 IS R+S*(A1+S*(A3+S*A5))
   7457
   7458	fadd.x		%fp2,%fp0		# fp0 IS EXP(R)-1
   7459
   7460	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
   7461
   7462#--Step 5
   7463#--Compute 2^(J/64)*p
   7464
   7465	fmul.x		(%a1),%fp0		# 2^(J/64)*(Exp(R)-1)
   7466
   7467#--Step 6
   7468#--Step 6.1
   7469	mov.l		L_SCR1(%a6),%d1		# retrieve M
   7470	cmp.l		%d1,&63
   7471	ble.b		MLE63
   7472#--Step 6.2	M >= 64
   7473	fmov.s		12(%a1),%fp1		# fp1 is t
   7474	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is t+OnebySc
   7475	fadd.x		%fp1,%fp0		# p+(t+OnebySc), fp1 released
   7476	fadd.x		(%a1),%fp0		# T+(p+(t+OnebySc))
   7477	bra		EM1SCALE
   7478MLE63:
   7479#--Step 6.3	M <= 63
   7480	cmp.l		%d1,&-3
   7481	bge.b		MGEN3
   7482MLTN3:
   7483#--Step 6.4	M <= -4
   7484	fadd.s		12(%a1),%fp0		# p+t
   7485	fadd.x		(%a1),%fp0		# T+(p+t)
   7486	fadd.x		ONEBYSC(%a6),%fp0	# OnebySc + (T+(p+t))
   7487	bra		EM1SCALE
   7488MGEN3:
   7489#--Step 6.5	-3 <= M <= 63
   7490	fmov.x		(%a1)+,%fp1		# fp1 is T
   7491	fadd.s		(%a1),%fp0		# fp0 is p+t
   7492	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is T+OnebySc
   7493	fadd.x		%fp1,%fp0		# (T+OnebySc)+(p+t)
   7494
   7495EM1SCALE:
   7496#--Step 6.6
   7497	fmov.l		%d0,%fpcr
   7498	fmul.x		SC(%a6),%fp0
   7499	bra		t_inx2
   7500
   7501EM1SM:
   7502#--Step 7	|X| < 1/4.
   7503	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
   7504	bge.b		EM1POLY
   7505
   7506EM1TINY:
   7507#--Step 8	|X| < 2^(-65)
   7508	cmp.l		%d1,&0x00330000		# 2^(-16312)
   7509	blt.b		EM12TINY
   7510#--Step 8.2
   7511	mov.l		&0x80010000,SC(%a6)	# SC is -2^(-16382)
   7512	mov.l		&0x80000000,SC+4(%a6)
   7513	clr.l		SC+8(%a6)
   7514	fmov.x		(%a0),%fp0
   7515	fmov.l		%d0,%fpcr
   7516	mov.b		&FADD_OP,%d1		# last inst is ADD
   7517	fadd.x		SC(%a6),%fp0
   7518	bra		t_catch
   7519
   7520EM12TINY:
   7521#--Step 8.3
   7522	fmov.x		(%a0),%fp0
   7523	fmul.d		TWO140(%pc),%fp0
   7524	mov.l		&0x80010000,SC(%a6)
   7525	mov.l		&0x80000000,SC+4(%a6)
   7526	clr.l		SC+8(%a6)
   7527	fadd.x		SC(%a6),%fp0
   7528	fmov.l		%d0,%fpcr
   7529	mov.b		&FMUL_OP,%d1		# last inst is MUL
   7530	fmul.d		TWON140(%pc),%fp0
   7531	bra		t_catch
   7532
   7533EM1POLY:
   7534#--Step 9	exp(X)-1 by a simple polynomial
   7535	fmov.x		(%a0),%fp0		# fp0 is X
   7536	fmul.x		%fp0,%fp0		# fp0 is S := X*X
   7537	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
   7538	fmov.s		&0x2F30CAA8,%fp1	# fp1 is B12
   7539	fmul.x		%fp0,%fp1		# fp1 is S*B12
   7540	fmov.s		&0x310F8290,%fp2	# fp2 is B11
   7541	fadd.s		&0x32D73220,%fp1	# fp1 is B10+S*B12
   7542
   7543	fmul.x		%fp0,%fp2		# fp2 is S*B11
   7544	fmul.x		%fp0,%fp1		# fp1 is S*(B10 + ...
   7545
   7546	fadd.s		&0x3493F281,%fp2	# fp2 is B9+S*...
   7547	fadd.d		EM1B8(%pc),%fp1		# fp1 is B8+S*...
   7548
   7549	fmul.x		%fp0,%fp2		# fp2 is S*(B9+...
   7550	fmul.x		%fp0,%fp1		# fp1 is S*(B8+...
   7551
   7552	fadd.d		EM1B7(%pc),%fp2		# fp2 is B7+S*...
   7553	fadd.d		EM1B6(%pc),%fp1		# fp1 is B6+S*...
   7554
   7555	fmul.x		%fp0,%fp2		# fp2 is S*(B7+...
   7556	fmul.x		%fp0,%fp1		# fp1 is S*(B6+...
   7557
   7558	fadd.d		EM1B5(%pc),%fp2		# fp2 is B5+S*...
   7559	fadd.d		EM1B4(%pc),%fp1		# fp1 is B4+S*...
   7560
   7561	fmul.x		%fp0,%fp2		# fp2 is S*(B5+...
   7562	fmul.x		%fp0,%fp1		# fp1 is S*(B4+...
   7563
   7564	fadd.d		EM1B3(%pc),%fp2		# fp2 is B3+S*...
   7565	fadd.x		EM1B2(%pc),%fp1		# fp1 is B2+S*...
   7566
   7567	fmul.x		%fp0,%fp2		# fp2 is S*(B3+...
   7568	fmul.x		%fp0,%fp1		# fp1 is S*(B2+...
   7569
   7570	fmul.x		%fp0,%fp2		# fp2 is S*S*(B3+...)
   7571	fmul.x		(%a0),%fp1		# fp1 is X*S*(B2...
   7572
   7573	fmul.s		&0x3F000000,%fp0	# fp0 is S*B1
   7574	fadd.x		%fp2,%fp1		# fp1 is Q
   7575
   7576	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
   7577
   7578	fadd.x		%fp1,%fp0		# fp0 is S*B1+Q
   7579
   7580	fmov.l		%d0,%fpcr
   7581	fadd.x		(%a0),%fp0
   7582	bra		t_inx2
   7583
   7584EM1BIG:
   7585#--Step 10	|X| > 70 log2
   7586	mov.l		(%a0),%d1
   7587	cmp.l		%d1,&0
   7588	bgt.w		EXPC1
   7589#--Step 10.2
   7590	fmov.s		&0xBF800000,%fp0	# fp0 is -1
   7591	fmov.l		%d0,%fpcr
   7592	fadd.s		&0x00800000,%fp0	# -1 + 2^(-126)
   7593	bra		t_minx2
   7594
   7595	global		setoxm1d
   7596setoxm1d:
   7597#--entry point for EXPM1(X), here X is denormalized
   7598#--Step 0.
   7599	bra		t_extdnrm
   7600
   7601#########################################################################
   7602# sgetexp():  returns the exponent portion of the input argument.	#
   7603#	      The exponent bias is removed and the exponent value is	#
   7604#	      returned as an extended precision number in fp0.		#
   7605# sgetexpd(): handles denormalized numbers.				#
   7606#									#
   7607# sgetman():  extracts the mantissa of the input argument. The		#
   7608#	      mantissa is converted to an extended precision number w/	#
   7609#	      an exponent of $3fff and is returned in fp0. The range of #
   7610#	      the result is [1.0 - 2.0).				#
   7611# sgetmand(): handles denormalized numbers.				#
   7612#									#
   7613# INPUT *************************************************************** #
   7614#	a0  = pointer to extended precision input			#
   7615#									#
   7616# OUTPUT ************************************************************** #
   7617#	fp0 = exponent(X) or mantissa(X)				#
   7618#									#
   7619#########################################################################
   7620
   7621	global		sgetexp
   7622sgetexp:
   7623	mov.w		SRC_EX(%a0),%d0		# get the exponent
   7624	bclr		&0xf,%d0		# clear the sign bit
   7625	subi.w		&0x3fff,%d0		# subtract off the bias
   7626	fmov.w		%d0,%fp0		# return exp in fp0
   7627	blt.b		sgetexpn		# it's negative
   7628	rts
   7629
   7630sgetexpn:
   7631	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   7632	rts
   7633
   7634	global		sgetexpd
   7635sgetexpd:
   7636	bsr.l		norm			# normalize
   7637	neg.w		%d0			# new exp = -(shft amt)
   7638	subi.w		&0x3fff,%d0		# subtract off the bias
   7639	fmov.w		%d0,%fp0		# return exp in fp0
   7640	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   7641	rts
   7642
   7643	global		sgetman
   7644sgetman:
   7645	mov.w		SRC_EX(%a0),%d0		# get the exp
   7646	ori.w		&0x7fff,%d0		# clear old exp
   7647	bclr		&0xe,%d0		# make it the new exp +-3fff
   7648
   7649# here, we build the result in a tmp location so as not to disturb the input
   7650	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
   7651	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
   7652	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
   7653	fmov.x		FP_SCR0(%a6),%fp0	# put new value back in fp0
   7654	bmi.b		sgetmann		# it's negative
   7655	rts
   7656
   7657sgetmann:
   7658	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
   7659	rts
   7660
   7661#
   7662# For denormalized numbers, shift the mantissa until the j-bit = 1,
   7663# then load the exponent with +/1 $3fff.
   7664#
   7665	global		sgetmand
   7666sgetmand:
   7667	bsr.l		norm			# normalize exponent
   7668	bra.b		sgetman
   7669
   7670#########################################################################
   7671# scosh():  computes the hyperbolic cosine of a normalized input	#
   7672# scoshd(): computes the hyperbolic cosine of a denormalized input	#
   7673#									#
   7674# INPUT ***************************************************************	#
   7675#	a0 = pointer to extended precision input			#
   7676#	d0 = round precision,mode					#
   7677#									#
   7678# OUTPUT **************************************************************	#
   7679#	fp0 = cosh(X)							#
   7680#									#
   7681# ACCURACY and MONOTONICITY *******************************************	#
   7682#	The returned result is within 3 ulps in 64 significant bit,	#
   7683#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
   7684#	rounded to double precision. The result is provably monotonic	#
   7685#	in double precision.						#
   7686#									#
   7687# ALGORITHM ***********************************************************	#
   7688#									#
   7689#	COSH								#
   7690#	1. If |X| > 16380 log2, go to 3.				#
   7691#									#
   7692#	2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae	#
   7693#		y = |X|, z = exp(Y), and				#
   7694#		cosh(X) = (1/2)*( z + 1/z ).				#
   7695#		Exit.							#
   7696#									#
   7697#	3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.		#
   7698#									#
   7699#	4. (16380 log2 < |X| <= 16480 log2)				#
   7700#		cosh(X) = sign(X) * exp(|X|)/2.				#
   7701#		However, invoking exp(|X|) may cause premature		#
   7702#		overflow. Thus, we calculate sinh(X) as follows:	#
   7703#		Y	:= |X|						#
   7704#		Fact	:=	2**(16380)				#
   7705#		Y'	:= Y - 16381 log2				#
   7706#		cosh(X) := Fact * exp(Y').				#
   7707#		Exit.							#
   7708#									#
   7709#	5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
   7710#		Huge*Huge to generate overflow and an infinity with	#
   7711#		the appropriate sign. Huge is the largest finite number	#
   7712#		in extended format. Exit.				#
   7713#									#
   7714#########################################################################
   7715
   7716TWO16380:
   7717	long		0x7FFB0000,0x80000000,0x00000000,0x00000000
   7718
   7719	global		scosh
   7720scosh:
   7721	fmov.x		(%a0),%fp0		# LOAD INPUT
   7722
   7723	mov.l		(%a0),%d1
   7724	mov.w		4(%a0),%d1
   7725	and.l		&0x7FFFFFFF,%d1
   7726	cmp.l		%d1,&0x400CB167
   7727	bgt.b		COSHBIG
   7728
   7729#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
   7730#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
   7731
   7732	fabs.x		%fp0			# |X|
   7733
   7734	mov.l		%d0,-(%sp)
   7735	clr.l		%d0
   7736	fmovm.x		&0x01,-(%sp)		# save |X| to stack
   7737	lea		(%sp),%a0		# pass ptr to |X|
   7738	bsr		setox			# FP0 IS EXP(|X|)
   7739	add.l		&0xc,%sp		# erase |X| from stack
   7740	fmul.s		&0x3F000000,%fp0	# (1/2)EXP(|X|)
   7741	mov.l		(%sp)+,%d0
   7742
   7743	fmov.s		&0x3E800000,%fp1	# (1/4)
   7744	fdiv.x		%fp0,%fp1		# 1/(2 EXP(|X|))
   7745
   7746	fmov.l		%d0,%fpcr
   7747	mov.b		&FADD_OP,%d1		# last inst is ADD
   7748	fadd.x		%fp1,%fp0
   7749	bra		t_catch
   7750
   7751COSHBIG:
   7752	cmp.l		%d1,&0x400CB2B3
   7753	bgt.b		COSHHUGE
   7754
   7755	fabs.x		%fp0
   7756	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
   7757	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
   7758
   7759	mov.l		%d0,-(%sp)
   7760	clr.l		%d0
   7761	fmovm.x		&0x01,-(%sp)		# save fp0 to stack
   7762	lea		(%sp),%a0		# pass ptr to fp0
   7763	bsr		setox
   7764	add.l		&0xc,%sp		# clear fp0 from stack
   7765	mov.l		(%sp)+,%d0
   7766
   7767	fmov.l		%d0,%fpcr
   7768	mov.b		&FMUL_OP,%d1		# last inst is MUL
   7769	fmul.x		TWO16380(%pc),%fp0
   7770	bra		t_catch
   7771
   7772COSHHUGE:
   7773	bra		t_ovfl2
   7774
   7775	global		scoshd
   7776#--COSH(X) = 1 FOR DENORMALIZED X
   7777scoshd:
   7778	fmov.s		&0x3F800000,%fp0
   7779
   7780	fmov.l		%d0,%fpcr
   7781	fadd.s		&0x00800000,%fp0
   7782	bra		t_pinx2
   7783
   7784#########################################################################
   7785# ssinh():  computes the hyperbolic sine of a normalized input		#
   7786# ssinhd(): computes the hyperbolic sine of a denormalized input	#
   7787#									#
   7788# INPUT *************************************************************** #
   7789#	a0 = pointer to extended precision input			#
   7790#	d0 = round precision,mode					#
   7791#									#
   7792# OUTPUT ************************************************************** #
   7793#	fp0 = sinh(X)							#
   7794#									#
   7795# ACCURACY and MONOTONICITY *******************************************	#
   7796#	The returned result is within 3 ulps in 64 significant bit,	#
   7797#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
   7798#	rounded to double precision. The result is provably monotonic	#
   7799#	in double precision.						#
   7800#									#
   7801# ALGORITHM *********************************************************** #
   7802#									#
   7803#       SINH								#
   7804#       1. If |X| > 16380 log2, go to 3.				#
   7805#									#
   7806#       2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula	#
   7807#               y = |X|, sgn = sign(X), and z = expm1(Y),		#
   7808#               sinh(X) = sgn*(1/2)*( z + z/(1+z) ).			#
   7809#          Exit.							#
   7810#									#
   7811#       3. If |X| > 16480 log2, go to 5.				#
   7812#									#
   7813#       4. (16380 log2 < |X| <= 16480 log2)				#
   7814#               sinh(X) = sign(X) * exp(|X|)/2.				#
   7815#          However, invoking exp(|X|) may cause premature overflow.	#
   7816#          Thus, we calculate sinh(X) as follows:			#
   7817#             Y       := |X|						#
   7818#             sgn     := sign(X)					#
   7819#             sgnFact := sgn * 2**(16380)				#
   7820#             Y'      := Y - 16381 log2					#
   7821#             sinh(X) := sgnFact * exp(Y').				#
   7822#          Exit.							#
   7823#									#
   7824#       5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
   7825#          sign(X)*Huge*Huge to generate overflow and an infinity with	#
   7826#          the appropriate sign. Huge is the largest finite number in	#
   7827#          extended format. Exit.					#
   7828#									#
   7829#########################################################################
   7830
   7831	global		ssinh
   7832ssinh:
   7833	fmov.x		(%a0),%fp0		# LOAD INPUT
   7834
   7835	mov.l		(%a0),%d1
   7836	mov.w		4(%a0),%d1
   7837	mov.l		%d1,%a1			# save (compacted) operand
   7838	and.l		&0x7FFFFFFF,%d1
   7839	cmp.l		%d1,&0x400CB167
   7840	bgt.b		SINHBIG
   7841
   7842#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
   7843#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
   7844
   7845	fabs.x		%fp0			# Y = |X|
   7846
   7847	movm.l		&0x8040,-(%sp)		# {a1/d0}
   7848	fmovm.x		&0x01,-(%sp)		# save Y on stack
   7849	lea		(%sp),%a0		# pass ptr to Y
   7850	clr.l		%d0
   7851	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
   7852	add.l		&0xc,%sp		# clear Y from stack
   7853	fmov.l		&0,%fpcr
   7854	movm.l		(%sp)+,&0x0201		# {a1/d0}
   7855
   7856	fmov.x		%fp0,%fp1
   7857	fadd.s		&0x3F800000,%fp1	# 1+Z
   7858	fmov.x		%fp0,-(%sp)
   7859	fdiv.x		%fp1,%fp0		# Z/(1+Z)
   7860	mov.l		%a1,%d1
   7861	and.l		&0x80000000,%d1
   7862	or.l		&0x3F000000,%d1
   7863	fadd.x		(%sp)+,%fp0
   7864	mov.l		%d1,-(%sp)
   7865
   7866	fmov.l		%d0,%fpcr
   7867	mov.b		&FMUL_OP,%d1		# last inst is MUL
   7868	fmul.s		(%sp)+,%fp0		# last fp inst - possible exceptions set
   7869	bra		t_catch
   7870
   7871SINHBIG:
   7872	cmp.l		%d1,&0x400CB2B3
   7873	bgt		t_ovfl
   7874	fabs.x		%fp0
   7875	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
   7876	mov.l		&0,-(%sp)
   7877	mov.l		&0x80000000,-(%sp)
   7878	mov.l		%a1,%d1
   7879	and.l		&0x80000000,%d1
   7880	or.l		&0x7FFB0000,%d1
   7881	mov.l		%d1,-(%sp)		# EXTENDED FMT
   7882	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
   7883
   7884	mov.l		%d0,-(%sp)
   7885	clr.l		%d0
   7886	fmovm.x		&0x01,-(%sp)		# save fp0 on stack
   7887	lea		(%sp),%a0		# pass ptr to fp0
   7888	bsr		setox
   7889	add.l		&0xc,%sp		# clear fp0 from stack
   7890
   7891	mov.l		(%sp)+,%d0
   7892	fmov.l		%d0,%fpcr
   7893	mov.b		&FMUL_OP,%d1		# last inst is MUL
   7894	fmul.x		(%sp)+,%fp0		# possible exception
   7895	bra		t_catch
   7896
   7897	global		ssinhd
   7898#--SINH(X) = X FOR DENORMALIZED X
   7899ssinhd:
   7900	bra		t_extdnrm
   7901
   7902#########################################################################
   7903# stanh():  computes the hyperbolic tangent of a normalized input	#
   7904# stanhd(): computes the hyperbolic tangent of a denormalized input	#
   7905#									#
   7906# INPUT ***************************************************************	#
   7907#	a0 = pointer to extended precision input			#
   7908#	d0 = round precision,mode					#
   7909#									#
   7910# OUTPUT **************************************************************	#
   7911#	fp0 = tanh(X)							#
   7912#									#
   7913# ACCURACY and MONOTONICITY *******************************************	#
   7914#	The returned result is within 3 ulps in 64 significant bit,	#
   7915#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
   7916#	rounded to double precision. The result is provably monotonic	#
   7917#	in double precision.						#
   7918#									#
   7919# ALGORITHM ***********************************************************	#
   7920#									#
   7921#	TANH								#
   7922#	1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.		#
   7923#									#
   7924#	2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by		#
   7925#		sgn := sign(X), y := 2|X|, z := expm1(Y), and		#
   7926#		tanh(X) = sgn*( z/(2+z) ).				#
   7927#		Exit.							#
   7928#									#
   7929#	3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,		#
   7930#		go to 7.						#
   7931#									#
   7932#	4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.		#
   7933#									#
   7934#	5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by		#
   7935#		sgn := sign(X), y := 2|X|, z := exp(Y),			#
   7936#		tanh(X) = sgn - [ sgn*2/(1+z) ].			#
   7937#		Exit.							#
   7938#									#
   7939#	6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we	#
   7940#		calculate Tanh(X) by					#
   7941#		sgn := sign(X), Tiny := 2**(-126),			#
   7942#		tanh(X) := sgn - sgn*Tiny.				#
   7943#		Exit.							#
   7944#									#
   7945#	7. (|X| < 2**(-40)). Tanh(X) = X.	Exit.			#
   7946#									#
   7947#########################################################################
   7948
   7949	set		X,FP_SCR0
   7950	set		XFRAC,X+4
   7951
   7952	set		SGN,L_SCR3
   7953
   7954	set		V,FP_SCR0
   7955
   7956	global		stanh
   7957stanh:
   7958	fmov.x		(%a0),%fp0		# LOAD INPUT
   7959
   7960	fmov.x		%fp0,X(%a6)
   7961	mov.l		(%a0),%d1
   7962	mov.w		4(%a0),%d1
   7963	mov.l		%d1,X(%a6)
   7964	and.l		&0x7FFFFFFF,%d1
   7965	cmp.l		%d1, &0x3fd78000	# is |X| < 2^(-40)?
   7966	blt.w		TANHBORS		# yes
   7967	cmp.l		%d1, &0x3fffddce	# is |X| > (5/2)LOG2?
   7968	bgt.w		TANHBORS		# yes
   7969
   7970#--THIS IS THE USUAL CASE
   7971#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
   7972
   7973	mov.l		X(%a6),%d1
   7974	mov.l		%d1,SGN(%a6)
   7975	and.l		&0x7FFF0000,%d1
   7976	add.l		&0x00010000,%d1		# EXPONENT OF 2|X|
   7977	mov.l		%d1,X(%a6)
   7978	and.l		&0x80000000,SGN(%a6)
   7979	fmov.x		X(%a6),%fp0		# FP0 IS Y = 2|X|
   7980
   7981	mov.l		%d0,-(%sp)
   7982	clr.l		%d0
   7983	fmovm.x		&0x1,-(%sp)		# save Y on stack
   7984	lea		(%sp),%a0		# pass ptr to Y
   7985	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
   7986	add.l		&0xc,%sp		# clear Y from stack
   7987	mov.l		(%sp)+,%d0
   7988
   7989	fmov.x		%fp0,%fp1
   7990	fadd.s		&0x40000000,%fp1	# Z+2
   7991	mov.l		SGN(%a6),%d1
   7992	fmov.x		%fp1,V(%a6)
   7993	eor.l		%d1,V(%a6)
   7994
   7995	fmov.l		%d0,%fpcr		# restore users round prec,mode
   7996	fdiv.x		V(%a6),%fp0
   7997	bra		t_inx2
   7998
   7999TANHBORS:
   8000	cmp.l		%d1,&0x3FFF8000
   8001	blt.w		TANHSM
   8002
   8003	cmp.l		%d1,&0x40048AA1
   8004	bgt.w		TANHHUGE
   8005
   8006#-- (5/2) LOG2 < |X| < 50 LOG2,
   8007#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
   8008#--TANH(X) = SGN -	SGN*2/[EXP(Y)+1].
   8009
   8010	mov.l		X(%a6),%d1
   8011	mov.l		%d1,SGN(%a6)
   8012	and.l		&0x7FFF0000,%d1
   8013	add.l		&0x00010000,%d1		# EXPO OF 2|X|
   8014	mov.l		%d1,X(%a6)		# Y = 2|X|
   8015	and.l		&0x80000000,SGN(%a6)
   8016	mov.l		SGN(%a6),%d1
   8017	fmov.x		X(%a6),%fp0		# Y = 2|X|
   8018
   8019	mov.l		%d0,-(%sp)
   8020	clr.l		%d0
   8021	fmovm.x		&0x01,-(%sp)		# save Y on stack
   8022	lea		(%sp),%a0		# pass ptr to Y
   8023	bsr		setox			# FP0 IS EXP(Y)
   8024	add.l		&0xc,%sp		# clear Y from stack
   8025	mov.l		(%sp)+,%d0
   8026	mov.l		SGN(%a6),%d1
   8027	fadd.s		&0x3F800000,%fp0	# EXP(Y)+1
   8028
   8029	eor.l		&0xC0000000,%d1		# -SIGN(X)*2
   8030	fmov.s		%d1,%fp1		# -SIGN(X)*2 IN SGL FMT
   8031	fdiv.x		%fp0,%fp1		# -SIGN(X)2 / [EXP(Y)+1 ]
   8032
   8033	mov.l		SGN(%a6),%d1
   8034	or.l		&0x3F800000,%d1		# SGN
   8035	fmov.s		%d1,%fp0		# SGN IN SGL FMT
   8036
   8037	fmov.l		%d0,%fpcr		# restore users round prec,mode
   8038	mov.b		&FADD_OP,%d1		# last inst is ADD
   8039	fadd.x		%fp1,%fp0
   8040	bra		t_inx2
   8041
   8042TANHSM:
   8043	fmov.l		%d0,%fpcr		# restore users round prec,mode
   8044	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   8045	fmov.x		X(%a6),%fp0		# last inst - possible exception set
   8046	bra		t_catch
   8047
   8048#---RETURN SGN(X) - SGN(X)EPS
   8049TANHHUGE:
   8050	mov.l		X(%a6),%d1
   8051	and.l		&0x80000000,%d1
   8052	or.l		&0x3F800000,%d1
   8053	fmov.s		%d1,%fp0
   8054	and.l		&0x80000000,%d1
   8055	eor.l		&0x80800000,%d1		# -SIGN(X)*EPS
   8056
   8057	fmov.l		%d0,%fpcr		# restore users round prec,mode
   8058	fadd.s		%d1,%fp0
   8059	bra		t_inx2
   8060
   8061	global		stanhd
   8062#--TANH(X) = X FOR DENORMALIZED X
   8063stanhd:
   8064	bra		t_extdnrm
   8065
   8066#########################################################################
   8067# slogn():    computes the natural logarithm of a normalized input	#
   8068# slognd():   computes the natural logarithm of a denormalized input	#
   8069# slognp1():  computes the log(1+X) of a normalized input		#
   8070# slognp1d(): computes the log(1+X) of a denormalized input		#
   8071#									#
   8072# INPUT ***************************************************************	#
   8073#	a0 = pointer to extended precision input			#
   8074#	d0 = round precision,mode					#
   8075#									#
   8076# OUTPUT **************************************************************	#
   8077#	fp0 = log(X) or log(1+X)					#
   8078#									#
   8079# ACCURACY and MONOTONICITY *******************************************	#
   8080#	The returned result is within 2 ulps in 64 significant bit,	#
   8081#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
   8082#	rounded to double precision. The result is provably monotonic	#
   8083#	in double precision.						#
   8084#									#
   8085# ALGORITHM ***********************************************************	#
   8086#	LOGN:								#
   8087#	Step 1. If |X-1| < 1/16, approximate log(X) by an odd		#
   8088#		polynomial in u, where u = 2(X-1)/(X+1). Otherwise,	#
   8089#		move on to Step 2.					#
   8090#									#
   8091#	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first	#
   8092#		seven significant bits of Y plus 2**(-7), i.e.		#
   8093#		F = 1.xxxxxx1 in base 2 where the six "x" match those	#
   8094#		of Y. Note that |Y-F| <= 2**(-7).			#
   8095#									#
   8096#	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a		#
   8097#		polynomial in u, log(1+u) = poly.			#
   8098#									#
   8099#	Step 4. Reconstruct						#
   8100#		log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)	#
   8101#		by k*log(2) + (log(F) + poly). The values of log(F) are	#
   8102#		calculated beforehand and stored in the program.	#
   8103#									#
   8104#	lognp1:								#
   8105#	Step 1: If |X| < 1/16, approximate log(1+X) by an odd		#
   8106#		polynomial in u where u = 2X/(2+X). Otherwise, move on	#
   8107#		to Step 2.						#
   8108#									#
   8109#	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done	#
   8110#		in Step 2 of the algorithm for LOGN and compute		#
   8111#		log(1+X) as k*log(2) + log(F) + poly where poly		#
   8112#		approximates log(1+u), u = (Y-F)/F.			#
   8113#									#
   8114#	Implementation Notes:						#
   8115#	Note 1. There are 64 different possible values for F, thus 64	#
   8116#		log(F)'s need to be tabulated. Moreover, the values of	#
   8117#		1/F are also tabulated so that the division in (Y-F)/F	#
   8118#		can be performed by a multiplication.			#
   8119#									#
   8120#	Note 2. In Step 2 of lognp1, in order to preserved accuracy,	#
   8121#		the value Y-F has to be calculated carefully when	#
   8122#		1/2 <= X < 3/2.						#
   8123#									#
   8124#	Note 3. To fully exploit the pipeline, polynomials are usually	#
   8125#		separated into two parts evaluated independently before	#
   8126#		being added up.						#
   8127#									#
   8128#########################################################################
   8129LOGOF2:
   8130	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
   8131
   8132one:
   8133	long		0x3F800000
   8134zero:
   8135	long		0x00000000
   8136infty:
   8137	long		0x7F800000
   8138negone:
   8139	long		0xBF800000
   8140
   8141LOGA6:
   8142	long		0x3FC2499A,0xB5E4040B
   8143LOGA5:
   8144	long		0xBFC555B5,0x848CB7DB
   8145
   8146LOGA4:
   8147	long		0x3FC99999,0x987D8730
   8148LOGA3:
   8149	long		0xBFCFFFFF,0xFF6F7E97
   8150
   8151LOGA2:
   8152	long		0x3FD55555,0x555555A4
   8153LOGA1:
   8154	long		0xBFE00000,0x00000008
   8155
   8156LOGB5:
   8157	long		0x3F175496,0xADD7DAD6
   8158LOGB4:
   8159	long		0x3F3C71C2,0xFE80C7E0
   8160
   8161LOGB3:
   8162	long		0x3F624924,0x928BCCFF
   8163LOGB2:
   8164	long		0x3F899999,0x999995EC
   8165
   8166LOGB1:
   8167	long		0x3FB55555,0x55555555
   8168TWO:
   8169	long		0x40000000,0x00000000
   8170
   8171LTHOLD:
   8172	long		0x3f990000,0x80000000,0x00000000,0x00000000
   8173
   8174LOGTBL:
   8175	long		0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
   8176	long		0x3FF70000,0xFF015358,0x833C47E2,0x00000000
   8177	long		0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
   8178	long		0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
   8179	long		0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
   8180	long		0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
   8181	long		0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
   8182	long		0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
   8183	long		0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
   8184	long		0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
   8185	long		0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
   8186	long		0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
   8187	long		0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
   8188	long		0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
   8189	long		0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
   8190	long		0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
   8191	long		0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
   8192	long		0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
   8193	long		0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
   8194	long		0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
   8195	long		0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
   8196	long		0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
   8197	long		0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
   8198	long		0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
   8199	long		0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
   8200	long		0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
   8201	long		0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
   8202	long		0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
   8203	long		0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
   8204	long		0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
   8205	long		0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
   8206	long		0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
   8207	long		0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
   8208	long		0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
   8209	long		0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
   8210	long		0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
   8211	long		0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
   8212	long		0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
   8213	long		0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
   8214	long		0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
   8215	long		0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
   8216	long		0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
   8217	long		0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
   8218	long		0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
   8219	long		0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
   8220	long		0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
   8221	long		0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
   8222	long		0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
   8223	long		0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
   8224	long		0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
   8225	long		0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
   8226	long		0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
   8227	long		0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
   8228	long		0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
   8229	long		0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
   8230	long		0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
   8231	long		0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
   8232	long		0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
   8233	long		0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
   8234	long		0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
   8235	long		0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
   8236	long		0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
   8237	long		0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
   8238	long		0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
   8239	long		0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
   8240	long		0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
   8241	long		0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
   8242	long		0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
   8243	long		0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
   8244	long		0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
   8245	long		0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
   8246	long		0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
   8247	long		0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
   8248	long		0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
   8249	long		0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
   8250	long		0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
   8251	long		0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
   8252	long		0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
   8253	long		0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
   8254	long		0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
   8255	long		0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
   8256	long		0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
   8257	long		0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
   8258	long		0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
   8259	long		0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
   8260	long		0x3FFE0000,0x825EFCED,0x49369330,0x00000000
   8261	long		0x3FFE0000,0x9868C809,0x868C8098,0x00000000
   8262	long		0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
   8263	long		0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
   8264	long		0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
   8265	long		0x3FFE0000,0x95A02568,0x095A0257,0x00000000
   8266	long		0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
   8267	long		0x3FFE0000,0x94458094,0x45809446,0x00000000
   8268	long		0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
   8269	long		0x3FFE0000,0x92F11384,0x0497889C,0x00000000
   8270	long		0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
   8271	long		0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
   8272	long		0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
   8273	long		0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
   8274	long		0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
   8275	long		0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
   8276	long		0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
   8277	long		0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
   8278	long		0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
   8279	long		0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
   8280	long		0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
   8281	long		0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
   8282	long		0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
   8283	long		0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
   8284	long		0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
   8285	long		0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
   8286	long		0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
   8287	long		0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
   8288	long		0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
   8289	long		0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
   8290	long		0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
   8291	long		0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
   8292	long		0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
   8293	long		0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
   8294	long		0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
   8295	long		0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
   8296	long		0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
   8297	long		0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
   8298	long		0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
   8299	long		0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
   8300	long		0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
   8301	long		0x3FFE0000,0x80808080,0x80808081,0x00000000
   8302	long		0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
   8303
   8304	set		ADJK,L_SCR1
   8305
   8306	set		X,FP_SCR0
   8307	set		XDCARE,X+2
   8308	set		XFRAC,X+4
   8309
   8310	set		F,FP_SCR1
   8311	set		FFRAC,F+4
   8312
   8313	set		KLOG2,FP_SCR0
   8314
   8315	set		SAVEU,FP_SCR0
   8316
   8317	global		slogn
   8318#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
   8319slogn:
   8320	fmov.x		(%a0),%fp0		# LOAD INPUT
   8321	mov.l		&0x00000000,ADJK(%a6)
   8322
   8323LOGBGN:
   8324#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
   8325#--A FINITE, NON-ZERO, NORMALIZED NUMBER.
   8326
   8327	mov.l		(%a0),%d1
   8328	mov.w		4(%a0),%d1
   8329
   8330	mov.l		(%a0),X(%a6)
   8331	mov.l		4(%a0),X+4(%a6)
   8332	mov.l		8(%a0),X+8(%a6)
   8333
   8334	cmp.l		%d1,&0			# CHECK IF X IS NEGATIVE
   8335	blt.w		LOGNEG			# LOG OF NEGATIVE ARGUMENT IS INVALID
   8336# X IS POSITIVE, CHECK IF X IS NEAR 1
   8337	cmp.l		%d1,&0x3ffef07d		# IS X < 15/16?
   8338	blt.b		LOGMAIN			# YES
   8339	cmp.l		%d1,&0x3fff8841		# IS X > 17/16?
   8340	ble.w		LOGNEAR1		# NO
   8341
   8342LOGMAIN:
   8343#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
   8344
   8345#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
   8346#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
   8347#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
   8348#--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
   8349#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
   8350#--LOG(1+U) CAN BE VERY EFFICIENT.
   8351#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
   8352#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
   8353
   8354#--GET K, Y, F, AND ADDRESS OF 1/F.
   8355	asr.l		&8,%d1
   8356	asr.l		&8,%d1			# SHIFTED 16 BITS, BIASED EXPO. OF X
   8357	sub.l		&0x3FFF,%d1		# THIS IS K
   8358	add.l		ADJK(%a6),%d1		# ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
   8359	lea		LOGTBL(%pc),%a0		# BASE ADDRESS OF 1/F AND LOG(F)
   8360	fmov.l		%d1,%fp1		# CONVERT K TO FLOATING-POINT FORMAT
   8361
   8362#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
   8363	mov.l		&0x3FFF0000,X(%a6)	# X IS NOW Y, I.E. 2^(-K)*X
   8364	mov.l		XFRAC(%a6),FFRAC(%a6)
   8365	and.l		&0xFE000000,FFRAC(%a6)	# FIRST 7 BITS OF Y
   8366	or.l		&0x01000000,FFRAC(%a6)	# GET F: ATTACH A 1 AT THE EIGHTH BIT
   8367	mov.l		FFRAC(%a6),%d1	# READY TO GET ADDRESS OF 1/F
   8368	and.l		&0x7E000000,%d1
   8369	asr.l		&8,%d1
   8370	asr.l		&8,%d1
   8371	asr.l		&4,%d1			# SHIFTED 20, D0 IS THE DISPLACEMENT
   8372	add.l		%d1,%a0			# A0 IS THE ADDRESS FOR 1/F
   8373
   8374	fmov.x		X(%a6),%fp0
   8375	mov.l		&0x3fff0000,F(%a6)
   8376	clr.l		F+8(%a6)
   8377	fsub.x		F(%a6),%fp0		# Y-F
   8378	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3 WHILE FP0 IS NOT READY
   8379#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
   8380#--REGISTERS SAVED: FPCR, FP1, FP2
   8381
   8382LP1CONT1:
   8383#--AN RE-ENTRY POINT FOR LOGNP1
   8384	fmul.x		(%a0),%fp0		# FP0 IS U = (Y-F)/F
   8385	fmul.x		LOGOF2(%pc),%fp1	# GET K*LOG2 WHILE FP0 IS NOT READY
   8386	fmov.x		%fp0,%fp2
   8387	fmul.x		%fp2,%fp2		# FP2 IS V=U*U
   8388	fmov.x		%fp1,KLOG2(%a6)		# PUT K*LOG2 IN MEMEORY, FREE FP1
   8389
   8390#--LOG(1+U) IS APPROXIMATED BY
   8391#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
   8392#--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
   8393
   8394	fmov.x		%fp2,%fp3
   8395	fmov.x		%fp2,%fp1
   8396
   8397	fmul.d		LOGA6(%pc),%fp1		# V*A6
   8398	fmul.d		LOGA5(%pc),%fp2		# V*A5
   8399
   8400	fadd.d		LOGA4(%pc),%fp1		# A4+V*A6
   8401	fadd.d		LOGA3(%pc),%fp2		# A3+V*A5
   8402
   8403	fmul.x		%fp3,%fp1		# V*(A4+V*A6)
   8404	fmul.x		%fp3,%fp2		# V*(A3+V*A5)
   8405
   8406	fadd.d		LOGA2(%pc),%fp1		# A2+V*(A4+V*A6)
   8407	fadd.d		LOGA1(%pc),%fp2		# A1+V*(A3+V*A5)
   8408
   8409	fmul.x		%fp3,%fp1		# V*(A2+V*(A4+V*A6))
   8410	add.l		&16,%a0			# ADDRESS OF LOG(F)
   8411	fmul.x		%fp3,%fp2		# V*(A1+V*(A3+V*A5))
   8412
   8413	fmul.x		%fp0,%fp1		# U*V*(A2+V*(A4+V*A6))
   8414	fadd.x		%fp2,%fp0		# U+V*(A1+V*(A3+V*A5))
   8415
   8416	fadd.x		(%a0),%fp1		# LOG(F)+U*V*(A2+V*(A4+V*A6))
   8417	fmovm.x		(%sp)+,&0x30		# RESTORE FP2-3
   8418	fadd.x		%fp1,%fp0		# FP0 IS LOG(F) + LOG(1+U)
   8419
   8420	fmov.l		%d0,%fpcr
   8421	fadd.x		KLOG2(%a6),%fp0		# FINAL ADD
   8422	bra		t_inx2
   8423
   8424
   8425LOGNEAR1:
   8426
   8427# if the input is exactly equal to one, then exit through ld_pzero.
   8428# if these 2 lines weren't here, the correct answer would be returned
   8429# but the INEX2 bit would be set.
   8430	fcmp.b		%fp0,&0x1		# is it equal to one?
   8431	fbeq.l		ld_pzero		# yes
   8432
   8433#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
   8434	fmov.x		%fp0,%fp1
   8435	fsub.s		one(%pc),%fp1		# FP1 IS X-1
   8436	fadd.s		one(%pc),%fp0		# FP0 IS X+1
   8437	fadd.x		%fp1,%fp1		# FP1 IS 2(X-1)
   8438#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
   8439#--IN U, U = 2(X-1)/(X+1) = FP1/FP0
   8440
   8441LP1CONT2:
   8442#--THIS IS AN RE-ENTRY POINT FOR LOGNP1
   8443	fdiv.x		%fp0,%fp1		# FP1 IS U
   8444	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3
   8445#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
   8446#--LET V=U*U, W=V*V, CALCULATE
   8447#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
   8448#--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
   8449	fmov.x		%fp1,%fp0
   8450	fmul.x		%fp0,%fp0		# FP0 IS V
   8451	fmov.x		%fp1,SAVEU(%a6)		# STORE U IN MEMORY, FREE FP1
   8452	fmov.x		%fp0,%fp1
   8453	fmul.x		%fp1,%fp1		# FP1 IS W
   8454
   8455	fmov.d		LOGB5(%pc),%fp3
   8456	fmov.d		LOGB4(%pc),%fp2
   8457
   8458	fmul.x		%fp1,%fp3		# W*B5
   8459	fmul.x		%fp1,%fp2		# W*B4
   8460
   8461	fadd.d		LOGB3(%pc),%fp3		# B3+W*B5
   8462	fadd.d		LOGB2(%pc),%fp2		# B2+W*B4
   8463
   8464	fmul.x		%fp3,%fp1		# W*(B3+W*B5), FP3 RELEASED
   8465
   8466	fmul.x		%fp0,%fp2		# V*(B2+W*B4)
   8467
   8468	fadd.d		LOGB1(%pc),%fp1		# B1+W*(B3+W*B5)
   8469	fmul.x		SAVEU(%a6),%fp0		# FP0 IS U*V
   8470
   8471	fadd.x		%fp2,%fp1		# B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
   8472	fmovm.x		(%sp)+,&0x30		# FP2-3 RESTORED
   8473
   8474	fmul.x		%fp1,%fp0		# U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
   8475
   8476	fmov.l		%d0,%fpcr
   8477	fadd.x		SAVEU(%a6),%fp0
   8478	bra		t_inx2
   8479
   8480#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
   8481LOGNEG:
   8482	bra		t_operr
   8483
   8484	global		slognd
   8485slognd:
   8486#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
   8487
   8488	mov.l		&-100,ADJK(%a6)		# INPUT = 2^(ADJK) * FP0
   8489
   8490#----normalize the input value by left shifting k bits (k to be determined
   8491#----below), adjusting exponent and storing -k to  ADJK
   8492#----the value TWOTO100 is no longer needed.
   8493#----Note that this code assumes the denormalized input is NON-ZERO.
   8494
   8495	movm.l		&0x3f00,-(%sp)		# save some registers  {d2-d7}
   8496	mov.l		(%a0),%d3		# D3 is exponent of smallest norm. #
   8497	mov.l		4(%a0),%d4
   8498	mov.l		8(%a0),%d5		# (D4,D5) is (Hi_X,Lo_X)
   8499	clr.l		%d2			# D2 used for holding K
   8500
   8501	tst.l		%d4
   8502	bne.b		Hi_not0
   8503
   8504Hi_0:
   8505	mov.l		%d5,%d4
   8506	clr.l		%d5
   8507	mov.l		&32,%d2
   8508	clr.l		%d6
   8509	bfffo		%d4{&0:&32},%d6
   8510	lsl.l		%d6,%d4
   8511	add.l		%d6,%d2			# (D3,D4,D5) is normalized
   8512
   8513	mov.l		%d3,X(%a6)
   8514	mov.l		%d4,XFRAC(%a6)
   8515	mov.l		%d5,XFRAC+4(%a6)
   8516	neg.l		%d2
   8517	mov.l		%d2,ADJK(%a6)
   8518	fmov.x		X(%a6),%fp0
   8519	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
   8520	lea		X(%a6),%a0
   8521	bra.w		LOGBGN			# begin regular log(X)
   8522
   8523Hi_not0:
   8524	clr.l		%d6
   8525	bfffo		%d4{&0:&32},%d6		# find first 1
   8526	mov.l		%d6,%d2			# get k
   8527	lsl.l		%d6,%d4
   8528	mov.l		%d5,%d7			# a copy of D5
   8529	lsl.l		%d6,%d5
   8530	neg.l		%d6
   8531	add.l		&32,%d6
   8532	lsr.l		%d6,%d7
   8533	or.l		%d7,%d4			# (D3,D4,D5) normalized
   8534
   8535	mov.l		%d3,X(%a6)
   8536	mov.l		%d4,XFRAC(%a6)
   8537	mov.l		%d5,XFRAC+4(%a6)
   8538	neg.l		%d2
   8539	mov.l		%d2,ADJK(%a6)
   8540	fmov.x		X(%a6),%fp0
   8541	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
   8542	lea		X(%a6),%a0
   8543	bra.w		LOGBGN			# begin regular log(X)
   8544
   8545	global		slognp1
   8546#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
   8547slognp1:
   8548	fmov.x		(%a0),%fp0		# LOAD INPUT
   8549	fabs.x		%fp0			# test magnitude
   8550	fcmp.x		%fp0,LTHOLD(%pc)	# compare with min threshold
   8551	fbgt.w		LP1REAL			# if greater, continue
   8552	fmov.l		%d0,%fpcr
   8553	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   8554	fmov.x		(%a0),%fp0		# return signed argument
   8555	bra		t_catch
   8556
   8557LP1REAL:
   8558	fmov.x		(%a0),%fp0		# LOAD INPUT
   8559	mov.l		&0x00000000,ADJK(%a6)
   8560	fmov.x		%fp0,%fp1		# FP1 IS INPUT Z
   8561	fadd.s		one(%pc),%fp0		# X := ROUND(1+Z)
   8562	fmov.x		%fp0,X(%a6)
   8563	mov.w		XFRAC(%a6),XDCARE(%a6)
   8564	mov.l		X(%a6),%d1
   8565	cmp.l		%d1,&0
   8566	ble.w		LP1NEG0			# LOG OF ZERO OR -VE
   8567	cmp.l		%d1,&0x3ffe8000		# IS BOUNDS [1/2,3/2]?
   8568	blt.w		LOGMAIN
   8569	cmp.l		%d1,&0x3fffc000
   8570	bgt.w		LOGMAIN
   8571#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
   8572#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
   8573#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
   8574
   8575LP1NEAR1:
   8576#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
   8577	cmp.l		%d1,&0x3ffef07d
   8578	blt.w		LP1CARE
   8579	cmp.l		%d1,&0x3fff8841
   8580	bgt.w		LP1CARE
   8581
   8582LP1ONE16:
   8583#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
   8584#--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
   8585	fadd.x		%fp1,%fp1		# FP1 IS 2Z
   8586	fadd.s		one(%pc),%fp0		# FP0 IS 1+X
   8587#--U = FP1/FP0
   8588	bra.w		LP1CONT2
   8589
   8590LP1CARE:
   8591#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
   8592#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
   8593#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
   8594#--THERE ARE ONLY TWO CASES.
   8595#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
   8596#--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
   8597#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
   8598#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
   8599
   8600	mov.l		XFRAC(%a6),FFRAC(%a6)
   8601	and.l		&0xFE000000,FFRAC(%a6)
   8602	or.l		&0x01000000,FFRAC(%a6)	# F OBTAINED
   8603	cmp.l		%d1,&0x3FFF8000		# SEE IF 1+Z > 1
   8604	bge.b		KISZERO
   8605
   8606KISNEG1:
   8607	fmov.s		TWO(%pc),%fp0
   8608	mov.l		&0x3fff0000,F(%a6)
   8609	clr.l		F+8(%a6)
   8610	fsub.x		F(%a6),%fp0		# 2-F
   8611	mov.l		FFRAC(%a6),%d1
   8612	and.l		&0x7E000000,%d1
   8613	asr.l		&8,%d1
   8614	asr.l		&8,%d1
   8615	asr.l		&4,%d1			# D0 CONTAINS DISPLACEMENT FOR 1/F
   8616	fadd.x		%fp1,%fp1		# GET 2Z
   8617	fmovm.x		&0xc,-(%sp)		# SAVE FP2  {%fp2/%fp3}
   8618	fadd.x		%fp1,%fp0		# FP0 IS Y-F = (2-F)+2Z
   8619	lea		LOGTBL(%pc),%a0		# A0 IS ADDRESS OF 1/F
   8620	add.l		%d1,%a0
   8621	fmov.s		negone(%pc),%fp1	# FP1 IS K = -1
   8622	bra.w		LP1CONT1
   8623
   8624KISZERO:
   8625	fmov.s		one(%pc),%fp0
   8626	mov.l		&0x3fff0000,F(%a6)
   8627	clr.l		F+8(%a6)
   8628	fsub.x		F(%a6),%fp0		# 1-F
   8629	mov.l		FFRAC(%a6),%d1
   8630	and.l		&0x7E000000,%d1
   8631	asr.l		&8,%d1
   8632	asr.l		&8,%d1
   8633	asr.l		&4,%d1
   8634	fadd.x		%fp1,%fp0		# FP0 IS Y-F
   8635	fmovm.x		&0xc,-(%sp)		# FP2 SAVED {%fp2/%fp3}
   8636	lea		LOGTBL(%pc),%a0
   8637	add.l		%d1,%a0			# A0 IS ADDRESS OF 1/F
   8638	fmov.s		zero(%pc),%fp1		# FP1 IS K = 0
   8639	bra.w		LP1CONT1
   8640
   8641LP1NEG0:
   8642#--FPCR SAVED. D0 IS X IN COMPACT FORM.
   8643	cmp.l		%d1,&0
   8644	blt.b		LP1NEG
   8645LP1ZERO:
   8646	fmov.s		negone(%pc),%fp0
   8647
   8648	fmov.l		%d0,%fpcr
   8649	bra		t_dz
   8650
   8651LP1NEG:
   8652	fmov.s		zero(%pc),%fp0
   8653
   8654	fmov.l		%d0,%fpcr
   8655	bra		t_operr
   8656
   8657	global		slognp1d
   8658#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
   8659# Simply return the denorm
   8660slognp1d:
   8661	bra		t_extdnrm
   8662
   8663#########################################################################
   8664# satanh():  computes the inverse hyperbolic tangent of a norm input	#
   8665# satanhd(): computes the inverse hyperbolic tangent of a denorm input	#
   8666#									#
   8667# INPUT ***************************************************************	#
   8668#	a0 = pointer to extended precision input			#
   8669#	d0 = round precision,mode					#
   8670#									#
   8671# OUTPUT **************************************************************	#
   8672#	fp0 = arctanh(X)						#
   8673#									#
   8674# ACCURACY and MONOTONICITY *******************************************	#
   8675#	The returned result is within 3 ulps in	64 significant bit,	#
   8676#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
   8677#	rounded to double precision. The result is provably monotonic	#
   8678#	in double precision.						#
   8679#									#
   8680# ALGORITHM ***********************************************************	#
   8681#									#
   8682#	ATANH								#
   8683#	1. If |X| >= 1, go to 3.					#
   8684#									#
   8685#	2. (|X| < 1) Calculate atanh(X) by				#
   8686#		sgn := sign(X)						#
   8687#		y := |X|						#
   8688#		z := 2y/(1-y)						#
   8689#		atanh(X) := sgn * (1/2) * logp1(z)			#
   8690#		Exit.							#
   8691#									#
   8692#	3. If |X| > 1, go to 5.						#
   8693#									#
   8694#	4. (|X| = 1) Generate infinity with an appropriate sign and	#
   8695#		divide-by-zero by					#
   8696#		sgn := sign(X)						#
   8697#		atan(X) := sgn / (+0).					#
   8698#		Exit.							#
   8699#									#
   8700#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
   8701#		Exit.							#
   8702#									#
   8703#########################################################################
   8704
   8705	global		satanh
   8706satanh:
   8707	mov.l		(%a0),%d1
   8708	mov.w		4(%a0),%d1
   8709	and.l		&0x7FFFFFFF,%d1
   8710	cmp.l		%d1,&0x3FFF8000
   8711	bge.b		ATANHBIG
   8712
   8713#--THIS IS THE USUAL CASE, |X| < 1
   8714#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
   8715
   8716	fabs.x		(%a0),%fp0		# Y = |X|
   8717	fmov.x		%fp0,%fp1
   8718	fneg.x		%fp1			# -Y
   8719	fadd.x		%fp0,%fp0		# 2Y
   8720	fadd.s		&0x3F800000,%fp1	# 1-Y
   8721	fdiv.x		%fp1,%fp0		# 2Y/(1-Y)
   8722	mov.l		(%a0),%d1
   8723	and.l		&0x80000000,%d1
   8724	or.l		&0x3F000000,%d1		# SIGN(X)*HALF
   8725	mov.l		%d1,-(%sp)
   8726
   8727	mov.l		%d0,-(%sp)		# save rnd prec,mode
   8728	clr.l		%d0			# pass ext prec,RN
   8729	fmovm.x		&0x01,-(%sp)		# save Z on stack
   8730	lea		(%sp),%a0		# pass ptr to Z
   8731	bsr		slognp1			# LOG1P(Z)
   8732	add.l		&0xc,%sp		# clear Z from stack
   8733
   8734	mov.l		(%sp)+,%d0		# fetch old prec,mode
   8735	fmov.l		%d0,%fpcr		# load it
   8736	mov.b		&FMUL_OP,%d1		# last inst is MUL
   8737	fmul.s		(%sp)+,%fp0
   8738	bra		t_catch
   8739
   8740ATANHBIG:
   8741	fabs.x		(%a0),%fp0		# |X|
   8742	fcmp.s		%fp0,&0x3F800000
   8743	fbgt		t_operr
   8744	bra		t_dz
   8745
   8746	global		satanhd
   8747#--ATANH(X) = X FOR DENORMALIZED X
   8748satanhd:
   8749	bra		t_extdnrm
   8750
   8751#########################################################################
   8752# slog10():  computes the base-10 logarithm of a normalized input	#
   8753# slog10d(): computes the base-10 logarithm of a denormalized input	#
   8754# slog2():   computes the base-2 logarithm of a normalized input	#
   8755# slog2d():  computes the base-2 logarithm of a denormalized input	#
   8756#									#
   8757# INPUT *************************************************************** #
   8758#	a0 = pointer to extended precision input			#
   8759#	d0 = round precision,mode					#
   8760#									#
   8761# OUTPUT **************************************************************	#
   8762#	fp0 = log_10(X) or log_2(X)					#
   8763#									#
   8764# ACCURACY and MONOTONICITY *******************************************	#
   8765#	The returned result is within 1.7 ulps in 64 significant bit,	#
   8766#	i.e. within 0.5003 ulp to 53 bits if the result is subsequently	#
   8767#	rounded to double precision. The result is provably monotonic	#
   8768#	in double precision.						#
   8769#									#
   8770# ALGORITHM ***********************************************************	#
   8771#									#
   8772#       slog10d:							#
   8773#									#
   8774#       Step 0.	If X < 0, create a NaN and raise the invalid operation	#
   8775#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
   8776#       Notes:  Default means round-to-nearest mode, no floating-point	#
   8777#               traps, and precision control = double extended.		#
   8778#									#
   8779#       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
   8780#       Notes:  Even if X is denormalized, log(X) is always normalized.	#
   8781#									#
   8782#       Step 2.  Compute log_10(X) = log(X) * (1/log(10)).		#
   8783#            2.1 Restore the user FPCR					#
   8784#            2.2 Return ans := Y * INV_L10.				#
   8785#									#
   8786#       slog10:								#
   8787#									#
   8788#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
   8789#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
   8790#       Notes:  Default means round-to-nearest mode, no floating-point	#
   8791#               traps, and precision control = double extended.		#
   8792#									#
   8793#       Step 1. Call sLogN to obtain Y = log(X), the natural log of X.	#
   8794#									#
   8795#       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).		#
   8796#            2.1  Restore the user FPCR					#
   8797#            2.2  Return ans := Y * INV_L10.				#
   8798#									#
   8799#       sLog2d:								#
   8800#									#
   8801#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
   8802#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
   8803#       Notes:  Default means round-to-nearest mode, no floating-point	#
   8804#               traps, and precision control = double extended.		#
   8805#									#
   8806#       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
   8807#       Notes:  Even if X is denormalized, log(X) is always normalized.	#
   8808#									#
   8809#       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).		#
   8810#            2.1  Restore the user FPCR					#
   8811#            2.2  Return ans := Y * INV_L2.				#
   8812#									#
   8813#       sLog2:								#
   8814#									#
   8815#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
   8816#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
   8817#       Notes:  Default means round-to-nearest mode, no floating-point	#
   8818#               traps, and precision control = double extended.		#
   8819#									#
   8820#       Step 1. If X is not an integer power of two, i.e., X != 2^k,	#
   8821#               go to Step 3.						#
   8822#									#
   8823#       Step 2.   Return k.						#
   8824#            2.1  Get integer k, X = 2^k.				#
   8825#            2.2  Restore the user FPCR.				#
   8826#            2.3  Return ans := convert-to-double-extended(k).		#
   8827#									#
   8828#       Step 3. Call sLogN to obtain Y = log(X), the natural log of X.	#
   8829#									#
   8830#       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).		#
   8831#            4.1  Restore the user FPCR					#
   8832#            4.2  Return ans := Y * INV_L2.				#
   8833#									#
   8834#########################################################################
   8835
   8836INV_L10:
   8837	long		0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
   8838
   8839INV_L2:
   8840	long		0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
   8841
   8842	global		slog10
   8843#--entry point for Log10(X), X is normalized
   8844slog10:
   8845	fmov.b		&0x1,%fp0
   8846	fcmp.x		%fp0,(%a0)		# if operand == 1,
   8847	fbeq.l		ld_pzero		# return an EXACT zero
   8848
   8849	mov.l		(%a0),%d1
   8850	blt.w		invalid
   8851	mov.l		%d0,-(%sp)
   8852	clr.l		%d0
   8853	bsr		slogn			# log(X), X normal.
   8854	fmov.l		(%sp)+,%fpcr
   8855	fmul.x		INV_L10(%pc),%fp0
   8856	bra		t_inx2
   8857
   8858	global		slog10d
   8859#--entry point for Log10(X), X is denormalized
   8860slog10d:
   8861	mov.l		(%a0),%d1
   8862	blt.w		invalid
   8863	mov.l		%d0,-(%sp)
   8864	clr.l		%d0
   8865	bsr		slognd			# log(X), X denorm.
   8866	fmov.l		(%sp)+,%fpcr
   8867	fmul.x		INV_L10(%pc),%fp0
   8868	bra		t_minx2
   8869
   8870	global		slog2
   8871#--entry point for Log2(X), X is normalized
   8872slog2:
   8873	mov.l		(%a0),%d1
   8874	blt.w		invalid
   8875
   8876	mov.l		8(%a0),%d1
   8877	bne.b		continue		# X is not 2^k
   8878
   8879	mov.l		4(%a0),%d1
   8880	and.l		&0x7FFFFFFF,%d1
   8881	bne.b		continue
   8882
   8883#--X = 2^k.
   8884	mov.w		(%a0),%d1
   8885	and.l		&0x00007FFF,%d1
   8886	sub.l		&0x3FFF,%d1
   8887	beq.l		ld_pzero
   8888	fmov.l		%d0,%fpcr
   8889	fmov.l		%d1,%fp0
   8890	bra		t_inx2
   8891
   8892continue:
   8893	mov.l		%d0,-(%sp)
   8894	clr.l		%d0
   8895	bsr		slogn			# log(X), X normal.
   8896	fmov.l		(%sp)+,%fpcr
   8897	fmul.x		INV_L2(%pc),%fp0
   8898	bra		t_inx2
   8899
   8900invalid:
   8901	bra		t_operr
   8902
   8903	global		slog2d
   8904#--entry point for Log2(X), X is denormalized
   8905slog2d:
   8906	mov.l		(%a0),%d1
   8907	blt.w		invalid
   8908	mov.l		%d0,-(%sp)
   8909	clr.l		%d0
   8910	bsr		slognd			# log(X), X denorm.
   8911	fmov.l		(%sp)+,%fpcr
   8912	fmul.x		INV_L2(%pc),%fp0
   8913	bra		t_minx2
   8914
   8915#########################################################################
   8916# stwotox():  computes 2**X for a normalized input			#
   8917# stwotoxd(): computes 2**X for a denormalized input			#
   8918# stentox():  computes 10**X for a normalized input			#
   8919# stentoxd(): computes 10**X for a denormalized input			#
   8920#									#
   8921# INPUT ***************************************************************	#
   8922#	a0 = pointer to extended precision input			#
   8923#	d0 = round precision,mode					#
   8924#									#
   8925# OUTPUT **************************************************************	#
   8926#	fp0 = 2**X or 10**X						#
   8927#									#
   8928# ACCURACY and MONOTONICITY *******************************************	#
   8929#	The returned result is within 2 ulps in 64 significant bit,	#
   8930#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
   8931#	rounded to double precision. The result is provably monotonic	#
   8932#	in double precision.						#
   8933#									#
   8934# ALGORITHM ***********************************************************	#
   8935#									#
   8936#	twotox								#
   8937#	1. If |X| > 16480, go to ExpBig.				#
   8938#									#
   8939#	2. If |X| < 2**(-70), go to ExpSm.				#
   8940#									#
   8941#	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore	#
   8942#		decompose N as						#
   8943#		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
   8944#									#
   8945#	4. Overwrite r := r * log2. Then				#
   8946#		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
   8947#		Go to expr to compute that expression.			#
   8948#									#
   8949#	tentox								#
   8950#	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.	#
   8951#									#
   8952#	2. If |X| < 2**(-70), go to ExpSm.				#
   8953#									#
   8954#	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set		#
   8955#		N := round-to-int(y). Decompose N as			#
   8956#		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
   8957#									#
   8958#	4. Define r as							#
   8959#		r := ((X - N*L1)-N*L2) * L10				#
   8960#		where L1, L2 are the leading and trailing parts of	#
   8961#		log_10(2)/64 and L10 is the natural log of 10. Then	#
   8962#		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
   8963#		Go to expr to compute that expression.			#
   8964#									#
   8965#	expr								#
   8966#	1. Fetch 2**(j/64) from table as Fact1 and Fact2.		#
   8967#									#
   8968#	2. Overwrite Fact1 and Fact2 by					#
   8969#		Fact1 := 2**(M) * Fact1					#
   8970#		Fact2 := 2**(M) * Fact2					#
   8971#		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).		#
   8972#									#
   8973#	3. Calculate P where 1 + P approximates exp(r):			#
   8974#		P = r + r*r*(A1+r*(A2+...+r*A5)).			#
   8975#									#
   8976#	4. Let AdjFact := 2**(M'). Return				#
   8977#		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).		#
   8978#		Exit.							#
   8979#									#
   8980#	ExpBig								#
   8981#	1. Generate overflow by Huge * Huge if X > 0; otherwise,	#
   8982#	        generate underflow by Tiny * Tiny.			#
   8983#									#
   8984#	ExpSm								#
   8985#	1. Return 1 + X.						#
   8986#									#
   8987#########################################################################
   8988
   8989L2TEN64:
   8990	long		0x406A934F,0x0979A371	# 64LOG10/LOG2
   8991L10TWO1:
   8992	long		0x3F734413,0x509F8000	# LOG2/64LOG10
   8993
   8994L10TWO2:
   8995	long		0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
   8996
   8997LOG10:	long		0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
   8998
   8999LOG2:	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
   9000
   9001EXPA5:	long		0x3F56C16D,0x6F7BD0B2
   9002EXPA4:	long		0x3F811112,0x302C712C
   9003EXPA3:	long		0x3FA55555,0x55554CC1
   9004EXPA2:	long		0x3FC55555,0x55554A54
   9005EXPA1:	long		0x3FE00000,0x00000000,0x00000000,0x00000000
   9006
   9007TEXPTBL:
   9008	long		0x3FFF0000,0x80000000,0x00000000,0x3F738000
   9009	long		0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
   9010	long		0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
   9011	long		0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
   9012	long		0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
   9013	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
   9014	long		0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
   9015	long		0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
   9016	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
   9017	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
   9018	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
   9019	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
   9020	long		0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
   9021	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
   9022	long		0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
   9023	long		0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
   9024	long		0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
   9025	long		0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
   9026	long		0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
   9027	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
   9028	long		0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
   9029	long		0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
   9030	long		0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
   9031	long		0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
   9032	long		0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
   9033	long		0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
   9034	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
   9035	long		0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
   9036	long		0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
   9037	long		0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
   9038	long		0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
   9039	long		0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
   9040	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
   9041	long		0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
   9042	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
   9043	long		0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
   9044	long		0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
   9045	long		0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
   9046	long		0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
   9047	long		0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
   9048	long		0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
   9049	long		0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
   9050	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
   9051	long		0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
   9052	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
   9053	long		0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
   9054	long		0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
   9055	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
   9056	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
   9057	long		0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
   9058	long		0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
   9059	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
   9060	long		0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
   9061	long		0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
   9062	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
   9063	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
   9064	long		0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
   9065	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
   9066	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
   9067	long		0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
   9068	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
   9069	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
   9070	long		0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
   9071	long		0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
   9072
   9073	set		INT,L_SCR1
   9074
   9075	set		X,FP_SCR0
   9076	set		XDCARE,X+2
   9077	set		XFRAC,X+4
   9078
   9079	set		ADJFACT,FP_SCR0
   9080
   9081	set		FACT1,FP_SCR0
   9082	set		FACT1HI,FACT1+4
   9083	set		FACT1LOW,FACT1+8
   9084
   9085	set		FACT2,FP_SCR1
   9086	set		FACT2HI,FACT2+4
   9087	set		FACT2LOW,FACT2+8
   9088
   9089	global		stwotox
   9090#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
   9091stwotox:
   9092	fmovm.x		(%a0),&0x80		# LOAD INPUT
   9093
   9094	mov.l		(%a0),%d1
   9095	mov.w		4(%a0),%d1
   9096	fmov.x		%fp0,X(%a6)
   9097	and.l		&0x7FFFFFFF,%d1
   9098
   9099	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
   9100	bge.b		TWOOK1
   9101	bra.w		EXPBORS
   9102
   9103TWOOK1:
   9104	cmp.l		%d1,&0x400D80C0		# |X| > 16480?
   9105	ble.b		TWOMAIN
   9106	bra.w		EXPBORS
   9107
   9108TWOMAIN:
   9109#--USUAL CASE, 2^(-70) <= |X| <= 16480
   9110
   9111	fmov.x		%fp0,%fp1
   9112	fmul.s		&0x42800000,%fp1	# 64 * X
   9113	fmov.l		%fp1,INT(%a6)		# N = ROUND-TO-INT(64 X)
   9114	mov.l		%d2,-(%sp)
   9115	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
   9116	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
   9117	mov.l		INT(%a6),%d1
   9118	mov.l		%d1,%d2
   9119	and.l		&0x3F,%d1		# D0 IS J
   9120	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
   9121	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
   9122	asr.l		&6,%d2			# d2 IS L, N = 64L + J
   9123	mov.l		%d2,%d1
   9124	asr.l		&1,%d1			# D0 IS M
   9125	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
   9126	add.l		&0x3FFF,%d2
   9127
   9128#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
   9129#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
   9130#--ADJFACT = 2^(M').
   9131#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
   9132
   9133	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
   9134
   9135	fmul.s		&0x3C800000,%fp1	# (1/64)*N
   9136	mov.l		(%a1)+,FACT1(%a6)
   9137	mov.l		(%a1)+,FACT1HI(%a6)
   9138	mov.l		(%a1)+,FACT1LOW(%a6)
   9139	mov.w		(%a1)+,FACT2(%a6)
   9140
   9141	fsub.x		%fp1,%fp0		# X - (1/64)*INT(64 X)
   9142
   9143	mov.w		(%a1)+,FACT2HI(%a6)
   9144	clr.w		FACT2HI+2(%a6)
   9145	clr.l		FACT2LOW(%a6)
   9146	add.w		%d1,FACT1(%a6)
   9147	fmul.x		LOG2(%pc),%fp0		# FP0 IS R
   9148	add.w		%d1,FACT2(%a6)
   9149
   9150	bra.w		expr
   9151
   9152EXPBORS:
   9153#--FPCR, D0 SAVED
   9154	cmp.l		%d1,&0x3FFF8000
   9155	bgt.b		TEXPBIG
   9156
   9157#--|X| IS SMALL, RETURN 1 + X
   9158
   9159	fmov.l		%d0,%fpcr		# restore users round prec,mode
   9160	fadd.s		&0x3F800000,%fp0	# RETURN 1 + X
   9161	bra		t_pinx2
   9162
   9163TEXPBIG:
   9164#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
   9165#--REGISTERS SAVE SO FAR ARE FPCR AND  D0
   9166	mov.l		X(%a6),%d1
   9167	cmp.l		%d1,&0
   9168	blt.b		EXPNEG
   9169
   9170	bra		t_ovfl2			# t_ovfl expects positive value
   9171
   9172EXPNEG:
   9173	bra		t_unfl2			# t_unfl expects positive value
   9174
   9175	global		stwotoxd
   9176stwotoxd:
   9177#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
   9178
   9179	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
   9180	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
   9181	mov.l		(%a0),%d1
   9182	or.l		&0x00800001,%d1
   9183	fadd.s		%d1,%fp0
   9184	bra		t_pinx2
   9185
   9186	global		stentox
   9187#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
   9188stentox:
   9189	fmovm.x		(%a0),&0x80		# LOAD INPUT
   9190
   9191	mov.l		(%a0),%d1
   9192	mov.w		4(%a0),%d1
   9193	fmov.x		%fp0,X(%a6)
   9194	and.l		&0x7FFFFFFF,%d1
   9195
   9196	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
   9197	bge.b		TENOK1
   9198	bra.w		EXPBORS
   9199
   9200TENOK1:
   9201	cmp.l		%d1,&0x400B9B07		# |X| <= 16480*log2/log10 ?
   9202	ble.b		TENMAIN
   9203	bra.w		EXPBORS
   9204
   9205TENMAIN:
   9206#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
   9207
   9208	fmov.x		%fp0,%fp1
   9209	fmul.d		L2TEN64(%pc),%fp1	# X*64*LOG10/LOG2
   9210	fmov.l		%fp1,INT(%a6)		# N=INT(X*64*LOG10/LOG2)
   9211	mov.l		%d2,-(%sp)
   9212	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
   9213	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
   9214	mov.l		INT(%a6),%d1
   9215	mov.l		%d1,%d2
   9216	and.l		&0x3F,%d1		# D0 IS J
   9217	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
   9218	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
   9219	asr.l		&6,%d2			# d2 IS L, N = 64L + J
   9220	mov.l		%d2,%d1
   9221	asr.l		&1,%d1			# D0 IS M
   9222	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
   9223	add.l		&0x3FFF,%d2
   9224
   9225#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
   9226#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
   9227#--ADJFACT = 2^(M').
   9228#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
   9229	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
   9230
   9231	fmov.x		%fp1,%fp2
   9232
   9233	fmul.d		L10TWO1(%pc),%fp1	# N*(LOG2/64LOG10)_LEAD
   9234	mov.l		(%a1)+,FACT1(%a6)
   9235
   9236	fmul.x		L10TWO2(%pc),%fp2	# N*(LOG2/64LOG10)_TRAIL
   9237
   9238	mov.l		(%a1)+,FACT1HI(%a6)
   9239	mov.l		(%a1)+,FACT1LOW(%a6)
   9240	fsub.x		%fp1,%fp0		# X - N L_LEAD
   9241	mov.w		(%a1)+,FACT2(%a6)
   9242
   9243	fsub.x		%fp2,%fp0		# X - N L_TRAIL
   9244
   9245	mov.w		(%a1)+,FACT2HI(%a6)
   9246	clr.w		FACT2HI+2(%a6)
   9247	clr.l		FACT2LOW(%a6)
   9248
   9249	fmul.x		LOG10(%pc),%fp0		# FP0 IS R
   9250	add.w		%d1,FACT1(%a6)
   9251	add.w		%d1,FACT2(%a6)
   9252
   9253expr:
   9254#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
   9255#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
   9256#--FP0 IS R. THE FOLLOWING CODE COMPUTES
   9257#--	2**(M'+M) * 2**(J/64) * EXP(R)
   9258
   9259	fmov.x		%fp0,%fp1
   9260	fmul.x		%fp1,%fp1		# FP1 IS S = R*R
   9261
   9262	fmov.d		EXPA5(%pc),%fp2		# FP2 IS A5
   9263	fmov.d		EXPA4(%pc),%fp3		# FP3 IS A4
   9264
   9265	fmul.x		%fp1,%fp2		# FP2 IS S*A5
   9266	fmul.x		%fp1,%fp3		# FP3 IS S*A4
   9267
   9268	fadd.d		EXPA3(%pc),%fp2		# FP2 IS A3+S*A5
   9269	fadd.d		EXPA2(%pc),%fp3		# FP3 IS A2+S*A4
   9270
   9271	fmul.x		%fp1,%fp2		# FP2 IS S*(A3+S*A5)
   9272	fmul.x		%fp1,%fp3		# FP3 IS S*(A2+S*A4)
   9273
   9274	fadd.d		EXPA1(%pc),%fp2		# FP2 IS A1+S*(A3+S*A5)
   9275	fmul.x		%fp0,%fp3		# FP3 IS R*S*(A2+S*A4)
   9276
   9277	fmul.x		%fp1,%fp2		# FP2 IS S*(A1+S*(A3+S*A5))
   9278	fadd.x		%fp3,%fp0		# FP0 IS R+R*S*(A2+S*A4)
   9279	fadd.x		%fp2,%fp0		# FP0 IS EXP(R) - 1
   9280
   9281	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
   9282
   9283#--FINAL RECONSTRUCTION PROCESS
   9284#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
   9285
   9286	fmul.x		FACT1(%a6),%fp0
   9287	fadd.x		FACT2(%a6),%fp0
   9288	fadd.x		FACT1(%a6),%fp0
   9289
   9290	fmov.l		%d0,%fpcr		# restore users round prec,mode
   9291	mov.w		%d2,ADJFACT(%a6)	# INSERT EXPONENT
   9292	mov.l		(%sp)+,%d2
   9293	mov.l		&0x80000000,ADJFACT+4(%a6)
   9294	clr.l		ADJFACT+8(%a6)
   9295	mov.b		&FMUL_OP,%d1		# last inst is MUL
   9296	fmul.x		ADJFACT(%a6),%fp0	# FINAL ADJUSTMENT
   9297	bra		t_catch
   9298
   9299	global		stentoxd
   9300stentoxd:
   9301#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
   9302
   9303	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
   9304	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
   9305	mov.l		(%a0),%d1
   9306	or.l		&0x00800001,%d1
   9307	fadd.s		%d1,%fp0
   9308	bra		t_pinx2
   9309
   9310#########################################################################
   9311# smovcr(): returns the ROM constant at the offset specified in d1	#
   9312#	    rounded to the mode and precision specified in d0.		#
   9313#									#
   9314# INPUT	***************************************************************	#
   9315#	d0 = rnd prec,mode						#
   9316#	d1 = ROM offset							#
   9317#									#
   9318# OUTPUT **************************************************************	#
   9319#	fp0 = the ROM constant rounded to the user's rounding mode,prec	#
   9320#									#
   9321#########################################################################
   9322
   9323	global		smovcr
   9324smovcr:
   9325	mov.l		%d1,-(%sp)		# save rom offset for a sec
   9326
   9327	lsr.b		&0x4,%d0		# shift ctrl bits to lo
   9328	mov.l		%d0,%d1			# make a copy
   9329	andi.w		&0x3,%d1		# extract rnd mode
   9330	andi.w		&0xc,%d0		# extract rnd prec
   9331	swap		%d0			# put rnd prec in hi
   9332	mov.w		%d1,%d0			# put rnd mode in lo
   9333
   9334	mov.l		(%sp)+,%d1		# get rom offset
   9335
   9336#
   9337# check range of offset
   9338#
   9339	tst.b		%d1			# if zero, offset is to pi
   9340	beq.b		pi_tbl			# it is pi
   9341	cmpi.b		%d1,&0x0a		# check range $01 - $0a
   9342	ble.b		z_val			# if in this range, return zero
   9343	cmpi.b		%d1,&0x0e		# check range $0b - $0e
   9344	ble.b		sm_tbl			# valid constants in this range
   9345	cmpi.b		%d1,&0x2f		# check range $10 - $2f
   9346	ble.b		z_val			# if in this range, return zero
   9347	cmpi.b		%d1,&0x3f		# check range $30 - $3f
   9348	ble.b		bg_tbl			# valid constants in this range
   9349
   9350z_val:
   9351	bra.l		ld_pzero		# return a zero
   9352
   9353#
   9354# the answer is PI rounded to the proper precision.
   9355#
   9356# fetch a pointer to the answer table relating to the proper rounding
   9357# precision.
   9358#
   9359pi_tbl:
   9360	tst.b		%d0			# is rmode RN?
   9361	bne.b		pi_not_rn		# no
   9362pi_rn:
   9363	lea.l		PIRN(%pc),%a0		# yes; load PI RN table addr
   9364	bra.w		set_finx
   9365pi_not_rn:
   9366	cmpi.b		%d0,&rp_mode		# is rmode RP?
   9367	beq.b		pi_rp			# yes
   9368pi_rzrm:
   9369	lea.l		PIRZRM(%pc),%a0		# no; load PI RZ,RM table addr
   9370	bra.b		set_finx
   9371pi_rp:
   9372	lea.l		PIRP(%pc),%a0		# load PI RP table addr
   9373	bra.b		set_finx
   9374
   9375#
   9376# the answer is one of:
   9377#	$0B	log10(2)	(inexact)
   9378#	$0C	e		(inexact)
   9379#	$0D	log2(e)		(inexact)
   9380#	$0E	log10(e)	(exact)
   9381#
   9382# fetch a pointer to the answer table relating to the proper rounding
   9383# precision.
   9384#
   9385sm_tbl:
   9386	subi.b		&0xb,%d1		# make offset in 0-4 range
   9387	tst.b		%d0			# is rmode RN?
   9388	bne.b		sm_not_rn		# no
   9389sm_rn:
   9390	lea.l		SMALRN(%pc),%a0		# yes; load RN table addr
   9391sm_tbl_cont:
   9392	cmpi.b		%d1,&0x2		# is result log10(e)?
   9393	ble.b		set_finx		# no; answer is inexact
   9394	bra.b		no_finx			# yes; answer is exact
   9395sm_not_rn:
   9396	cmpi.b		%d0,&rp_mode		# is rmode RP?
   9397	beq.b		sm_rp			# yes
   9398sm_rzrm:
   9399	lea.l		SMALRZRM(%pc),%a0	# no; load RZ,RM table addr
   9400	bra.b		sm_tbl_cont
   9401sm_rp:
   9402	lea.l		SMALRP(%pc),%a0		# load RP table addr
   9403	bra.b		sm_tbl_cont
   9404
   9405#
   9406# the answer is one of:
   9407#	$30	ln(2)		(inexact)
   9408#	$31	ln(10)		(inexact)
   9409#	$32	10^0		(exact)
   9410#	$33	10^1		(exact)
   9411#	$34	10^2		(exact)
   9412#	$35	10^4		(exact)
   9413#	$36	10^8		(exact)
   9414#	$37	10^16		(exact)
   9415#	$38	10^32		(inexact)
   9416#	$39	10^64		(inexact)
   9417#	$3A	10^128		(inexact)
   9418#	$3B	10^256		(inexact)
   9419#	$3C	10^512		(inexact)
   9420#	$3D	10^1024		(inexact)
   9421#	$3E	10^2048		(inexact)
   9422#	$3F	10^4096		(inexact)
   9423#
   9424# fetch a pointer to the answer table relating to the proper rounding
   9425# precision.
   9426#
   9427bg_tbl:
   9428	subi.b		&0x30,%d1		# make offset in 0-f range
   9429	tst.b		%d0			# is rmode RN?
   9430	bne.b		bg_not_rn		# no
   9431bg_rn:
   9432	lea.l		BIGRN(%pc),%a0		# yes; load RN table addr
   9433bg_tbl_cont:
   9434	cmpi.b		%d1,&0x1		# is offset <= $31?
   9435	ble.b		set_finx		# yes; answer is inexact
   9436	cmpi.b		%d1,&0x7		# is $32 <= offset <= $37?
   9437	ble.b		no_finx			# yes; answer is exact
   9438	bra.b		set_finx		# no; answer is inexact
   9439bg_not_rn:
   9440	cmpi.b		%d0,&rp_mode		# is rmode RP?
   9441	beq.b		bg_rp			# yes
   9442bg_rzrm:
   9443	lea.l		BIGRZRM(%pc),%a0	# no; load RZ,RM table addr
   9444	bra.b		bg_tbl_cont
   9445bg_rp:
   9446	lea.l		BIGRP(%pc),%a0		# load RP table addr
   9447	bra.b		bg_tbl_cont
   9448
   9449# answer is inexact, so set INEX2 and AINEX in the user's FPSR.
   9450set_finx:
   9451	ori.l		&inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX
   9452no_finx:
   9453	mulu.w		&0xc,%d1		# offset points into tables
   9454	swap		%d0			# put rnd prec in lo word
   9455	tst.b		%d0			# is precision extended?
   9456
   9457	bne.b		not_ext			# if xprec, do not call round
   9458
   9459# Precision is extended
   9460	fmovm.x		(%a0,%d1.w),&0x80	# return result in fp0
   9461	rts
   9462
   9463# Precision is single or double
   9464not_ext:
   9465	swap		%d0			# rnd prec in upper word
   9466
   9467# call round() to round the answer to the proper precision.
   9468# exponents out of range for single or double DO NOT cause underflow
   9469# or overflow.
   9470	mov.w		0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word
   9471	mov.l		0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word
   9472	mov.l		0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word
   9473	mov.l		%d0,%d1
   9474	clr.l		%d0			# clear g,r,s
   9475	lea		FP_SCR1(%a6),%a0	# pass ptr to answer
   9476	clr.w		LOCAL_SGN(%a0)		# sign always positive
   9477	bsr.l		_round			# round the mantissa
   9478
   9479	fmovm.x		(%a0),&0x80		# return rounded result in fp0
   9480	rts
   9481
   9482	align		0x4
   9483
   9484PIRN:	long		0x40000000,0xc90fdaa2,0x2168c235	# pi
   9485PIRZRM:	long		0x40000000,0xc90fdaa2,0x2168c234	# pi
   9486PIRP:	long		0x40000000,0xc90fdaa2,0x2168c235	# pi
   9487
   9488SMALRN:	long		0x3ffd0000,0x9a209a84,0xfbcff798	# log10(2)
   9489	long		0x40000000,0xadf85458,0xa2bb4a9a	# e
   9490	long		0x3fff0000,0xb8aa3b29,0x5c17f0bc	# log2(e)
   9491	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
   9492	long		0x00000000,0x00000000,0x00000000	# 0.0
   9493
   9494SMALRZRM:
   9495	long		0x3ffd0000,0x9a209a84,0xfbcff798	# log10(2)
   9496	long		0x40000000,0xadf85458,0xa2bb4a9a	# e
   9497	long		0x3fff0000,0xb8aa3b29,0x5c17f0bb	# log2(e)
   9498	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
   9499	long		0x00000000,0x00000000,0x00000000	# 0.0
   9500
   9501SMALRP:	long		0x3ffd0000,0x9a209a84,0xfbcff799	# log10(2)
   9502	long		0x40000000,0xadf85458,0xa2bb4a9b	# e
   9503	long		0x3fff0000,0xb8aa3b29,0x5c17f0bc	# log2(e)
   9504	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
   9505	long		0x00000000,0x00000000,0x00000000	# 0.0
   9506
   9507BIGRN:	long		0x3ffe0000,0xb17217f7,0xd1cf79ac	# ln(2)
   9508	long		0x40000000,0x935d8ddd,0xaaa8ac17	# ln(10)
   9509
   9510	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
   9511	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
   9512	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
   9513	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
   9514	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
   9515	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
   9516	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
   9517	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
   9518	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
   9519	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
   9520	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
   9521	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
   9522	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
   9523	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
   9524
   9525BIGRZRM:
   9526	long		0x3ffe0000,0xb17217f7,0xd1cf79ab	# ln(2)
   9527	long		0x40000000,0x935d8ddd,0xaaa8ac16	# ln(10)
   9528
   9529	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
   9530	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
   9531	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
   9532	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
   9533	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
   9534	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
   9535	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
   9536	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
   9537	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
   9538	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
   9539	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
   9540	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
   9541	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
   9542	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
   9543
   9544BIGRP:
   9545	long		0x3ffe0000,0xb17217f7,0xd1cf79ac	# ln(2)
   9546	long		0x40000000,0x935d8ddd,0xaaa8ac17	# ln(10)
   9547
   9548	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
   9549	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
   9550	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
   9551	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
   9552	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
   9553	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
   9554	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
   9555	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
   9556	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
   9557	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
   9558	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
   9559	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
   9560	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
   9561	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
   9562
   9563#########################################################################
   9564# sscale(): computes the destination operand scaled by the source	#
   9565#	    operand. If the absoulute value of the source operand is	#
   9566#	    >= 2^14, an overflow or underflow is returned.		#
   9567#									#
   9568# INPUT *************************************************************** #
   9569#	a0  = pointer to double-extended source operand X		#
   9570#	a1  = pointer to double-extended destination operand Y		#
   9571#									#
   9572# OUTPUT ************************************************************** #
   9573#	fp0 =  scale(X,Y)						#
   9574#									#
   9575#########################################################################
   9576
   9577set	SIGN,		L_SCR1
   9578
   9579	global		sscale
   9580sscale:
   9581	mov.l		%d0,-(%sp)		# store off ctrl bits for now
   9582
   9583	mov.w		DST_EX(%a1),%d1		# get dst exponent
   9584	smi.b		SIGN(%a6)		# use SIGN to hold dst sign
   9585	andi.l		&0x00007fff,%d1		# strip sign from dst exp
   9586
   9587	mov.w		SRC_EX(%a0),%d0		# check src bounds
   9588	andi.w		&0x7fff,%d0		# clr src sign bit
   9589	cmpi.w		%d0,&0x3fff		# is src ~ ZERO?
   9590	blt.w		src_small		# yes
   9591	cmpi.w		%d0,&0x400c		# no; is src too big?
   9592	bgt.w		src_out			# yes
   9593
   9594#
   9595# Source is within 2^14 range.
   9596#
   9597src_ok:
   9598	fintrz.x	SRC(%a0),%fp0		# calc int of src
   9599	fmov.l		%fp0,%d0		# int src to d0
   9600# don't want any accrued bits from the fintrz showing up later since
   9601# we may need to read the fpsr for the last fp op in t_catch2().
   9602	fmov.l		&0x0,%fpsr
   9603
   9604	tst.b		DST_HI(%a1)		# is dst denormalized?
   9605	bmi.b		sok_norm
   9606
   9607# the dst is a DENORM. normalize the DENORM and add the adjustment to
   9608# the src value. then, jump to the norm part of the routine.
   9609sok_dnrm:
   9610	mov.l		%d0,-(%sp)		# save src for now
   9611
   9612	mov.w		DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
   9613	mov.l		DST_HI(%a1),FP_SCR0_HI(%a6)
   9614	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
   9615
   9616	lea		FP_SCR0(%a6),%a0	# pass ptr to DENORM
   9617	bsr.l		norm			# normalize the DENORM
   9618	neg.l		%d0
   9619	add.l		(%sp)+,%d0		# add adjustment to src
   9620
   9621	fmovm.x		FP_SCR0(%a6),&0x80	# load normalized DENORM
   9622
   9623	cmpi.w		%d0,&-0x3fff		# is the shft amt really low?
   9624	bge.b		sok_norm2		# thank goodness no
   9625
   9626# the multiply factor that we're trying to create should be a denorm
   9627# for the multiply to work. Therefore, we're going to actually do a
   9628# multiply with a denorm which will cause an unimplemented data type
   9629# exception to be put into the machine which will be caught and corrected
   9630# later. we don't do this with the DENORMs above because this method
   9631# is slower. but, don't fret, I don't see it being used much either.
   9632	fmov.l		(%sp)+,%fpcr		# restore user fpcr
   9633	mov.l		&0x80000000,%d1		# load normalized mantissa
   9634	subi.l		&-0x3fff,%d0		# how many should we shift?
   9635	neg.l		%d0			# make it positive
   9636	cmpi.b		%d0,&0x20		# is it > 32?
   9637	bge.b		sok_dnrm_32		# yes
   9638	lsr.l		%d0,%d1			# no; bit stays in upper lw
   9639	clr.l		-(%sp)			# insert zero low mantissa
   9640	mov.l		%d1,-(%sp)		# insert new high mantissa
   9641	clr.l		-(%sp)			# make zero exponent
   9642	bra.b		sok_norm_cont
   9643sok_dnrm_32:
   9644	subi.b		&0x20,%d0		# get shift count
   9645	lsr.l		%d0,%d1			# make low mantissa longword
   9646	mov.l		%d1,-(%sp)		# insert new low mantissa
   9647	clr.l		-(%sp)			# insert zero high mantissa
   9648	clr.l		-(%sp)			# make zero exponent
   9649	bra.b		sok_norm_cont
   9650
   9651# the src will force the dst to a DENORM value or worse. so, let's
   9652# create an fp multiply that will create the result.
   9653sok_norm:
   9654	fmovm.x		DST(%a1),&0x80		# load fp0 with normalized src
   9655sok_norm2:
   9656	fmov.l		(%sp)+,%fpcr		# restore user fpcr
   9657
   9658	addi.w		&0x3fff,%d0		# turn src amt into exp value
   9659	swap		%d0			# put exponent in high word
   9660	clr.l		-(%sp)			# insert new exponent
   9661	mov.l		&0x80000000,-(%sp)	# insert new high mantissa
   9662	mov.l		%d0,-(%sp)		# insert new lo mantissa
   9663
   9664sok_norm_cont:
   9665	fmov.l		%fpcr,%d0		# d0 needs fpcr for t_catch2
   9666	mov.b		&FMUL_OP,%d1		# last inst is MUL
   9667	fmul.x		(%sp)+,%fp0		# do the multiply
   9668	bra		t_catch2		# catch any exceptions
   9669
   9670#
   9671# Source is outside of 2^14 range.  Test the sign and branch
   9672# to the appropriate exception handler.
   9673#
   9674src_out:
   9675	mov.l		(%sp)+,%d0		# restore ctrl bits
   9676	exg		%a0,%a1			# swap src,dst ptrs
   9677	tst.b		SRC_EX(%a1)		# is src negative?
   9678	bmi		t_unfl			# yes; underflow
   9679	bra		t_ovfl_sc		# no; overflow
   9680
   9681#
   9682# The source input is below 1, so we check for denormalized numbers
   9683# and set unfl.
   9684#
   9685src_small:
   9686	tst.b		DST_HI(%a1)		# is dst denormalized?
   9687	bpl.b		ssmall_done		# yes
   9688
   9689	mov.l		(%sp)+,%d0
   9690	fmov.l		%d0,%fpcr		# no; load control bits
   9691	mov.b		&FMOV_OP,%d1		# last inst is MOVE
   9692	fmov.x		DST(%a1),%fp0		# simply return dest
   9693	bra		t_catch2
   9694ssmall_done:
   9695	mov.l		(%sp)+,%d0		# load control bits into d1
   9696	mov.l		%a1,%a0			# pass ptr to dst
   9697	bra		t_resdnrm
   9698
   9699#########################################################################
   9700# smod(): computes the fp MOD of the input values X,Y.			#
   9701# srem(): computes the fp (IEEE) REM of the input values X,Y.		#
   9702#									#
   9703# INPUT *************************************************************** #
   9704#	a0 = pointer to extended precision input X			#
   9705#	a1 = pointer to extended precision input Y			#
   9706#	d0 = round precision,mode					#
   9707#									#
   9708#	The input operands X and Y can be either normalized or		#
   9709#	denormalized.							#
   9710#									#
   9711# OUTPUT ************************************************************** #
   9712#      fp0 = FREM(X,Y) or FMOD(X,Y)					#
   9713#									#
   9714# ALGORITHM *********************************************************** #
   9715#									#
   9716#       Step 1.  Save and strip signs of X and Y: signX := sign(X),	#
   9717#                signY := sign(Y), X := |X|, Y := |Y|,			#
   9718#                signQ := signX EOR signY. Record whether MOD or REM	#
   9719#                is requested.						#
   9720#									#
   9721#       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.		#
   9722#                If (L < 0) then					#
   9723#                   R := X, go to Step 4.				#
   9724#                else							#
   9725#                   R := 2^(-L)X, j := L.				#
   9726#                endif							#
   9727#									#
   9728#       Step 3.  Perform MOD(X,Y)					#
   9729#            3.1 If R = Y, go to Step 9.				#
   9730#            3.2 If R > Y, then { R := R - Y, Q := Q + 1}		#
   9731#            3.3 If j = 0, go to Step 4.				#
   9732#            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to	#
   9733#                Step 3.1.						#
   9734#									#
   9735#       Step 4.  At this point, R = X - QY = MOD(X,Y). Set		#
   9736#                Last_Subtract := false (used in Step 7 below). If	#
   9737#                MOD is requested, go to Step 6.			#
   9738#									#
   9739#       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.		#
   9740#            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to	#
   9741#                Step 6.						#
   9742#            5.2 If R > Y/2, then { set Last_Subtract := true,		#
   9743#                Q := Q + 1, Y := signY*Y }. Go to Step 6.		#
   9744#            5.3 This is the tricky case of R = Y/2. If Q is odd,	#
   9745#                then { Q := Q + 1, signX := -signX }.			#
   9746#									#
   9747#       Step 6.  R := signX*R.						#
   9748#									#
   9749#       Step 7.  If Last_Subtract = true, R := R - Y.			#
   9750#									#
   9751#       Step 8.  Return signQ, last 7 bits of Q, and R as required.	#
   9752#									#
   9753#       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,		#
   9754#                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),		#
   9755#                R := 0. Return signQ, last 7 bits of Q, and R.		#
   9756#									#
   9757#########################################################################
   9758
   9759	set		Mod_Flag,L_SCR3
   9760	set		Sc_Flag,L_SCR3+1
   9761
   9762	set		SignY,L_SCR2
   9763	set		SignX,L_SCR2+2
   9764	set		SignQ,L_SCR3+2
   9765
   9766	set		Y,FP_SCR0
   9767	set		Y_Hi,Y+4
   9768	set		Y_Lo,Y+8
   9769
   9770	set		R,FP_SCR1
   9771	set		R_Hi,R+4
   9772	set		R_Lo,R+8
   9773
   9774Scale:
   9775	long		0x00010000,0x80000000,0x00000000,0x00000000
   9776
   9777	global		smod
   9778smod:
   9779	clr.b		FPSR_QBYTE(%a6)
   9780	mov.l		%d0,-(%sp)		# save ctrl bits
   9781	clr.b		Mod_Flag(%a6)
   9782	bra.b		Mod_Rem
   9783
   9784	global		srem
   9785srem:
   9786	clr.b		FPSR_QBYTE(%a6)
   9787	mov.l		%d0,-(%sp)		# save ctrl bits
   9788	mov.b		&0x1,Mod_Flag(%a6)
   9789
   9790Mod_Rem:
   9791#..Save sign of X and Y
   9792	movm.l		&0x3f00,-(%sp)		# save data registers
   9793	mov.w		SRC_EX(%a0),%d3
   9794	mov.w		%d3,SignY(%a6)
   9795	and.l		&0x00007FFF,%d3		# Y := |Y|
   9796
   9797#
   9798	mov.l		SRC_HI(%a0),%d4
   9799	mov.l		SRC_LO(%a0),%d5		# (D3,D4,D5) is |Y|
   9800
   9801	tst.l		%d3
   9802	bne.b		Y_Normal
   9803
   9804	mov.l		&0x00003FFE,%d3		# $3FFD + 1
   9805	tst.l		%d4
   9806	bne.b		HiY_not0
   9807
   9808HiY_0:
   9809	mov.l		%d5,%d4
   9810	clr.l		%d5
   9811	sub.l		&32,%d3
   9812	clr.l		%d6
   9813	bfffo		%d4{&0:&32},%d6
   9814	lsl.l		%d6,%d4
   9815	sub.l		%d6,%d3			# (D3,D4,D5) is normalized
   9816#	                                        ...with bias $7FFD
   9817	bra.b		Chk_X
   9818
   9819HiY_not0:
   9820	clr.l		%d6
   9821	bfffo		%d4{&0:&32},%d6
   9822	sub.l		%d6,%d3
   9823	lsl.l		%d6,%d4
   9824	mov.l		%d5,%d7			# a copy of D5
   9825	lsl.l		%d6,%d5
   9826	neg.l		%d6
   9827	add.l		&32,%d6
   9828	lsr.l		%d6,%d7
   9829	or.l		%d7,%d4			# (D3,D4,D5) normalized
   9830#                                       ...with bias $7FFD
   9831	bra.b		Chk_X
   9832
   9833Y_Normal:
   9834	add.l		&0x00003FFE,%d3		# (D3,D4,D5) normalized
   9835#                                       ...with bias $7FFD
   9836
   9837Chk_X:
   9838	mov.w		DST_EX(%a1),%d0
   9839	mov.w		%d0,SignX(%a6)
   9840	mov.w		SignY(%a6),%d1
   9841	eor.l		%d0,%d1
   9842	and.l		&0x00008000,%d1
   9843	mov.w		%d1,SignQ(%a6)		# sign(Q) obtained
   9844	and.l		&0x00007FFF,%d0
   9845	mov.l		DST_HI(%a1),%d1
   9846	mov.l		DST_LO(%a1),%d2		# (D0,D1,D2) is |X|
   9847	tst.l		%d0
   9848	bne.b		X_Normal
   9849	mov.l		&0x00003FFE,%d0
   9850	tst.l		%d1
   9851	bne.b		HiX_not0
   9852
   9853HiX_0:
   9854	mov.l		%d2,%d1
   9855	clr.l		%d2
   9856	sub.l		&32,%d0
   9857	clr.l		%d6
   9858	bfffo		%d1{&0:&32},%d6
   9859	lsl.l		%d6,%d1
   9860	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
   9861#                                       ...with bias $7FFD
   9862	bra.b		Init
   9863
   9864HiX_not0:
   9865	clr.l		%d6
   9866	bfffo		%d1{&0:&32},%d6
   9867	sub.l		%d6,%d0
   9868	lsl.l		%d6,%d1
   9869	mov.l		%d2,%d7			# a copy of D2
   9870	lsl.l		%d6,%d2
   9871	neg.l		%d6
   9872	add.l		&32,%d6
   9873	lsr.l		%d6,%d7
   9874	or.l		%d7,%d1			# (D0,D1,D2) normalized
   9875#                                       ...with bias $7FFD
   9876	bra.b		Init
   9877
   9878X_Normal:
   9879	add.l		&0x00003FFE,%d0		# (D0,D1,D2) normalized
   9880#                                       ...with bias $7FFD
   9881
   9882Init:
   9883#
   9884	mov.l		%d3,L_SCR1(%a6)		# save biased exp(Y)
   9885	mov.l		%d0,-(%sp)		# save biased exp(X)
   9886	sub.l		%d3,%d0			# L := expo(X)-expo(Y)
   9887
   9888	clr.l		%d6			# D6 := carry <- 0
   9889	clr.l		%d3			# D3 is Q
   9890	mov.l		&0,%a1			# A1 is k; j+k=L, Q=0
   9891
   9892#..(Carry,D1,D2) is R
   9893	tst.l		%d0
   9894	bge.b		Mod_Loop_pre
   9895
   9896#..expo(X) < expo(Y). Thus X = mod(X,Y)
   9897#
   9898	mov.l		(%sp)+,%d0		# restore d0
   9899	bra.w		Get_Mod
   9900
   9901Mod_Loop_pre:
   9902	addq.l		&0x4,%sp		# erase exp(X)
   9903#..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
   9904Mod_Loop:
   9905	tst.l		%d6			# test carry bit
   9906	bgt.b		R_GT_Y
   9907
   9908#..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
   9909	cmp.l		%d1,%d4			# compare hi(R) and hi(Y)
   9910	bne.b		R_NE_Y
   9911	cmp.l		%d2,%d5			# compare lo(R) and lo(Y)
   9912	bne.b		R_NE_Y
   9913
   9914#..At this point, R = Y
   9915	bra.w		Rem_is_0
   9916
   9917R_NE_Y:
   9918#..use the borrow of the previous compare
   9919	bcs.b		R_LT_Y			# borrow is set iff R < Y
   9920
   9921R_GT_Y:
   9922#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
   9923#..and Y < (D1,D2) < 2Y. Either way, perform R - Y
   9924	sub.l		%d5,%d2			# lo(R) - lo(Y)
   9925	subx.l		%d4,%d1			# hi(R) - hi(Y)
   9926	clr.l		%d6			# clear carry
   9927	addq.l		&1,%d3			# Q := Q + 1
   9928
   9929R_LT_Y:
   9930#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
   9931	tst.l		%d0			# see if j = 0.
   9932	beq.b		PostLoop
   9933
   9934	add.l		%d3,%d3			# Q := 2Q
   9935	add.l		%d2,%d2			# lo(R) = 2lo(R)
   9936	roxl.l		&1,%d1			# hi(R) = 2hi(R) + carry
   9937	scs		%d6			# set Carry if 2(R) overflows
   9938	addq.l		&1,%a1			# k := k+1
   9939	subq.l		&1,%d0			# j := j - 1
   9940#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
   9941
   9942	bra.b		Mod_Loop
   9943
   9944PostLoop:
   9945#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
   9946
   9947#..normalize R.
   9948	mov.l		L_SCR1(%a6),%d0		# new biased expo of R
   9949	tst.l		%d1
   9950	bne.b		HiR_not0
   9951
   9952HiR_0:
   9953	mov.l		%d2,%d1
   9954	clr.l		%d2
   9955	sub.l		&32,%d0
   9956	clr.l		%d6
   9957	bfffo		%d1{&0:&32},%d6
   9958	lsl.l		%d6,%d1
   9959	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
   9960#                                       ...with bias $7FFD
   9961	bra.b		Get_Mod
   9962
   9963HiR_not0:
   9964	clr.l		%d6
   9965	bfffo		%d1{&0:&32},%d6
   9966	bmi.b		Get_Mod			# already normalized
   9967	sub.l		%d6,%d0
   9968	lsl.l		%d6,%d1
   9969	mov.l		%d2,%d7			# a copy of D2
   9970	lsl.l		%d6,%d2
   9971	neg.l		%d6
   9972	add.l		&32,%d6
   9973	lsr.l		%d6,%d7
   9974	or.l		%d7,%d1			# (D0,D1,D2) normalized
   9975
   9976#
   9977Get_Mod:
   9978	cmp.l		%d0,&0x000041FE
   9979	bge.b		No_Scale
   9980Do_Scale:
   9981	mov.w		%d0,R(%a6)
   9982	mov.l		%d1,R_Hi(%a6)
   9983	mov.l		%d2,R_Lo(%a6)
   9984	mov.l		L_SCR1(%a6),%d6
   9985	mov.w		%d6,Y(%a6)
   9986	mov.l		%d4,Y_Hi(%a6)
   9987	mov.l		%d5,Y_Lo(%a6)
   9988	fmov.x		R(%a6),%fp0		# no exception
   9989	mov.b		&1,Sc_Flag(%a6)
   9990	bra.b		ModOrRem
   9991No_Scale:
   9992	mov.l		%d1,R_Hi(%a6)
   9993	mov.l		%d2,R_Lo(%a6)
   9994	sub.l		&0x3FFE,%d0
   9995	mov.w		%d0,R(%a6)
   9996	mov.l		L_SCR1(%a6),%d6
   9997	sub.l		&0x3FFE,%d6
   9998	mov.l		%d6,L_SCR1(%a6)
   9999	fmov.x		R(%a6),%fp0
  10000	mov.w		%d6,Y(%a6)
  10001	mov.l		%d4,Y_Hi(%a6)
  10002	mov.l		%d5,Y_Lo(%a6)
  10003	clr.b		Sc_Flag(%a6)
  10004
  10005#
  10006ModOrRem:
  10007	tst.b		Mod_Flag(%a6)
  10008	beq.b		Fix_Sign
  10009
  10010	mov.l		L_SCR1(%a6),%d6		# new biased expo(Y)
  10011	subq.l		&1,%d6			# biased expo(Y/2)
  10012	cmp.l		%d0,%d6
  10013	blt.b		Fix_Sign
  10014	bgt.b		Last_Sub
  10015
  10016	cmp.l		%d1,%d4
  10017	bne.b		Not_EQ
  10018	cmp.l		%d2,%d5
  10019	bne.b		Not_EQ
  10020	bra.w		Tie_Case
  10021
  10022Not_EQ:
  10023	bcs.b		Fix_Sign
  10024
  10025Last_Sub:
  10026#
  10027	fsub.x		Y(%a6),%fp0		# no exceptions
  10028	addq.l		&1,%d3			# Q := Q + 1
  10029
  10030#
  10031Fix_Sign:
  10032#..Get sign of X
  10033	mov.w		SignX(%a6),%d6
  10034	bge.b		Get_Q
  10035	fneg.x		%fp0
  10036
  10037#..Get Q
  10038#
  10039Get_Q:
  10040	clr.l		%d6
  10041	mov.w		SignQ(%a6),%d6		# D6 is sign(Q)
  10042	mov.l		&8,%d7
  10043	lsr.l		%d7,%d6
  10044	and.l		&0x0000007F,%d3		# 7 bits of Q
  10045	or.l		%d6,%d3			# sign and bits of Q
  10046#	swap		%d3
  10047#	fmov.l		%fpsr,%d6
  10048#	and.l		&0xFF00FFFF,%d6
  10049#	or.l		%d3,%d6
  10050#	fmov.l		%d6,%fpsr		# put Q in fpsr
  10051	mov.b		%d3,FPSR_QBYTE(%a6)	# put Q in fpsr
  10052
  10053#
  10054Restore:
  10055	movm.l		(%sp)+,&0xfc		#  {%d2-%d7}
  10056	mov.l		(%sp)+,%d0
  10057	fmov.l		%d0,%fpcr
  10058	tst.b		Sc_Flag(%a6)
  10059	beq.b		Finish
  10060	mov.b		&FMUL_OP,%d1		# last inst is MUL
  10061	fmul.x		Scale(%pc),%fp0		# may cause underflow
  10062	bra		t_catch2
  10063# the '040 package did this apparently to see if the dst operand for the
  10064# preceding fmul was a denorm. but, it better not have been since the
  10065# algorithm just got done playing with fp0 and expected no exceptions
  10066# as a result. trust me...
  10067#	bra		t_avoid_unsupp		# check for denorm as a
  10068#						;result of the scaling
  10069
  10070Finish:
  10071	mov.b		&FMOV_OP,%d1		# last inst is MOVE
  10072	fmov.x		%fp0,%fp0		# capture exceptions & round
  10073	bra		t_catch2
  10074
  10075Rem_is_0:
  10076#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
  10077	addq.l		&1,%d3
  10078	cmp.l		%d0,&8			# D0 is j
  10079	bge.b		Q_Big
  10080
  10081	lsl.l		%d0,%d3
  10082	bra.b		Set_R_0
  10083
  10084Q_Big:
  10085	clr.l		%d3
  10086
  10087Set_R_0:
  10088	fmov.s		&0x00000000,%fp0
  10089	clr.b		Sc_Flag(%a6)
  10090	bra.w		Fix_Sign
  10091
  10092Tie_Case:
  10093#..Check parity of Q
  10094	mov.l		%d3,%d6
  10095	and.l		&0x00000001,%d6
  10096	tst.l		%d6
  10097	beq.w		Fix_Sign		# Q is even
  10098
  10099#..Q is odd, Q := Q + 1, signX := -signX
  10100	addq.l		&1,%d3
  10101	mov.w		SignX(%a6),%d6
  10102	eor.l		&0x00008000,%d6
  10103	mov.w		%d6,SignX(%a6)
  10104	bra.w		Fix_Sign
  10105
  10106qnan:	long		0x7fff0000, 0xffffffff, 0xffffffff
  10107
  10108#########################################################################
  10109# XDEF ****************************************************************	#
  10110#	t_dz(): Handle DZ exception during transcendental emulation.	#
  10111#	        Sets N bit according to sign of source operand.		#
  10112#	t_dz2(): Handle DZ exception during transcendental emulation.	#
  10113#		 Sets N bit always.					#
  10114#									#
  10115# XREF ****************************************************************	#
  10116#	None								#
  10117#									#
  10118# INPUT ***************************************************************	#
  10119#	a0 = pointer to source operand					#
  10120#									#
  10121# OUTPUT **************************************************************	#
  10122#	fp0 = default result						#
  10123#									#
  10124# ALGORITHM ***********************************************************	#
  10125#	- Store properly signed INF into fp0.				#
  10126#	- Set FPSR exception status dz bit, ccode inf bit, and		#
  10127#	  accrued dz bit.						#
  10128#									#
  10129#########################################################################
  10130
  10131	global		t_dz
  10132t_dz:
  10133	tst.b		SRC_EX(%a0)		# no; is src negative?
  10134	bmi.b		t_dz2			# yes
  10135
  10136dz_pinf:
  10137	fmov.s		&0x7f800000,%fp0	# return +INF in fp0
  10138	ori.l		&dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
  10139	rts
  10140
  10141	global		t_dz2
  10142t_dz2:
  10143	fmov.s		&0xff800000,%fp0	# return -INF in fp0
  10144	ori.l		&dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
  10145	rts
  10146
  10147#################################################################
  10148# OPERR exception:						#
  10149#	- set FPSR exception status operr bit, condition code	#
  10150#	  nan bit; Store default NAN into fp0			#
  10151#################################################################
  10152	global		t_operr
  10153t_operr:
  10154	ori.l		&opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP
  10155	fmovm.x		qnan(%pc),&0x80		# return default NAN in fp0
  10156	rts
  10157
  10158#################################################################
  10159# Extended DENORM:						#
  10160#	- For all functions that have a denormalized input and	#
  10161#	  that f(x)=x, this is the entry point.			#
  10162#	- we only return the EXOP here if either underflow or	#
  10163#	  inexact is enabled.					#
  10164#################################################################
  10165
  10166# Entry point for scale w/ extended denorm. The function does
  10167# NOT set INEX2/AUNFL/AINEX.
  10168	global		t_resdnrm
  10169t_resdnrm:
  10170	ori.l		&unfl_mask,USER_FPSR(%a6) # set UNFL
  10171	bra.b		xdnrm_con
  10172
  10173	global		t_extdnrm
  10174t_extdnrm:
  10175	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
  10176
  10177xdnrm_con:
  10178	mov.l		%a0,%a1			# make copy of src ptr
  10179	mov.l		%d0,%d1			# make copy of rnd prec,mode
  10180	andi.b		&0xc0,%d1		# extended precision?
  10181	bne.b		xdnrm_sd		# no
  10182
  10183# result precision is extended.
  10184	tst.b		LOCAL_EX(%a0)		# is denorm negative?
  10185	bpl.b		xdnrm_exit		# no
  10186
  10187	bset		&neg_bit,FPSR_CC(%a6)	# yes; set 'N' ccode bit
  10188	bra.b		xdnrm_exit
  10189
  10190# result precision is single or double
  10191xdnrm_sd:
  10192	mov.l		%a1,-(%sp)
  10193	tst.b		LOCAL_EX(%a0)		# is denorm pos or neg?
  10194	smi.b		%d1			# set d0 accordingly
  10195	bsr.l		unf_sub
  10196	mov.l		(%sp)+,%a1
  10197xdnrm_exit:
  10198	fmovm.x		(%a0),&0x80		# return default result in fp0
  10199
  10200	mov.b		FPCR_ENABLE(%a6),%d0
  10201	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
  10202	bne.b		xdnrm_ena		# yes
  10203	rts
  10204
  10205################
  10206# unfl enabled #
  10207################
  10208# we have a DENORM that needs to be converted into an EXOP.
  10209# so, normalize the mantissa, add 0x6000 to the new exponent,
  10210# and return the result in fp1.
  10211xdnrm_ena:
  10212	mov.w		LOCAL_EX(%a1),FP_SCR0_EX(%a6)
  10213	mov.l		LOCAL_HI(%a1),FP_SCR0_HI(%a6)
  10214	mov.l		LOCAL_LO(%a1),FP_SCR0_LO(%a6)
  10215
  10216	lea		FP_SCR0(%a6),%a0
  10217	bsr.l		norm			# normalize mantissa
  10218	addi.l		&0x6000,%d0		# add extra bias
  10219	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep old sign
  10220	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
  10221
  10222	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  10223	rts
  10224
  10225#################################################################
  10226# UNFL exception:						#
  10227#	- This routine is for cases where even an EXOP isn't	#
  10228#	  large enough to hold the range of this result.	#
  10229#	  In such a case, the EXOP equals zero.			#
  10230#	- Return the default result to the proper precision	#
  10231#	  with the sign of this result being the same as that	#
  10232#	  of the src operand.					#
  10233#	- t_unfl2() is provided to force the result sign to	#
  10234#	  positive which is the desired result for fetox().	#
  10235#################################################################
  10236	global		t_unfl
  10237t_unfl:
  10238	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
  10239
  10240	tst.b		(%a0)			# is result pos or neg?
  10241	smi.b		%d1			# set d1 accordingly
  10242	bsr.l		unf_sub			# calc default unfl result
  10243	fmovm.x		(%a0),&0x80		# return default result in fp0
  10244
  10245	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
  10246	rts
  10247
  10248# t_unfl2 ALWAYS tells unf_sub to create a positive result
  10249	global		t_unfl2
  10250t_unfl2:
  10251	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
  10252
  10253	sf.b		%d1			# set d0 to represent positive
  10254	bsr.l		unf_sub			# calc default unfl result
  10255	fmovm.x		(%a0),&0x80		# return default result in fp0
  10256
  10257	fmov.s		&0x0000000,%fp1		# return EXOP in fp1
  10258	rts
  10259
  10260#################################################################
  10261# OVFL exception:						#
  10262#	- This routine is for cases where even an EXOP isn't	#
  10263#	  large enough to hold the range of this result.	#
  10264#	- Return the default result to the proper precision	#
  10265#	  with the sign of this result being the same as that	#
  10266#	  of the src operand.					#
  10267#	- t_ovfl2() is provided to force the result sign to	#
  10268#	  positive which is the desired result for fcosh().	#
  10269#	- t_ovfl_sc() is provided for scale() which only sets	#
  10270#	  the inexact bits if the number is inexact for the	#
  10271#	  precision indicated.					#
  10272#################################################################
  10273
  10274	global		t_ovfl_sc
  10275t_ovfl_sc:
  10276	ori.l		&ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
  10277
  10278	mov.b		%d0,%d1			# fetch rnd mode/prec
  10279	andi.b		&0xc0,%d1		# extract rnd prec
  10280	beq.b		ovfl_work		# prec is extended
  10281
  10282	tst.b		LOCAL_HI(%a0)		# is dst a DENORM?
  10283	bmi.b		ovfl_sc_norm		# no
  10284
  10285# dst op is a DENORM. we have to normalize the mantissa to see if the
  10286# result would be inexact for the given precision. make a copy of the
  10287# dst so we don't screw up the version passed to us.
  10288	mov.w		LOCAL_EX(%a0),FP_SCR0_EX(%a6)
  10289	mov.l		LOCAL_HI(%a0),FP_SCR0_HI(%a6)
  10290	mov.l		LOCAL_LO(%a0),FP_SCR0_LO(%a6)
  10291	lea		FP_SCR0(%a6),%a0	# pass ptr to FP_SCR0
  10292	movm.l		&0xc080,-(%sp)		# save d0-d1/a0
  10293	bsr.l		norm			# normalize mantissa
  10294	movm.l		(%sp)+,&0x0103		# restore d0-d1/a0
  10295
  10296ovfl_sc_norm:
  10297	cmpi.b		%d1,&0x40		# is prec dbl?
  10298	bne.b		ovfl_sc_dbl		# no; sgl
  10299ovfl_sc_sgl:
  10300	tst.l		LOCAL_LO(%a0)		# is lo lw of sgl set?
  10301	bne.b		ovfl_sc_inx		# yes
  10302	tst.b		3+LOCAL_HI(%a0)		# is lo byte of hi lw set?
  10303	bne.b		ovfl_sc_inx		# yes
  10304	bra.b		ovfl_work		# don't set INEX2
  10305ovfl_sc_dbl:
  10306	mov.l		LOCAL_LO(%a0),%d1	# are any of lo 11 bits of
  10307	andi.l		&0x7ff,%d1		# dbl mantissa set?
  10308	beq.b		ovfl_work		# no; don't set INEX2
  10309ovfl_sc_inx:
  10310	ori.l		&inex2_mask,USER_FPSR(%a6) # set INEX2
  10311	bra.b		ovfl_work		# continue
  10312
  10313	global		t_ovfl
  10314t_ovfl:
  10315	ori.l		&ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
  10316
  10317ovfl_work:
  10318	tst.b		LOCAL_EX(%a0)		# what is the sign?
  10319	smi.b		%d1			# set d1 accordingly
  10320	bsr.l		ovf_res			# calc default ovfl result
  10321	mov.b		%d0,FPSR_CC(%a6)	# insert new ccodes
  10322	fmovm.x		(%a0),&0x80		# return default result in fp0
  10323
  10324	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
  10325	rts
  10326
  10327# t_ovfl2 ALWAYS tells ovf_res to create a positive result
  10328	global		t_ovfl2
  10329t_ovfl2:
  10330	ori.l		&ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
  10331
  10332	sf.b		%d1			# clear sign flag for positive
  10333	bsr.l		ovf_res			# calc default ovfl result
  10334	mov.b		%d0,FPSR_CC(%a6)	# insert new ccodes
  10335	fmovm.x		(%a0),&0x80		# return default result in fp0
  10336
  10337	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
  10338	rts
  10339
  10340#################################################################
  10341# t_catch():							#
  10342#	- the last operation of a transcendental emulation	#
  10343#	  routine may have caused an underflow or overflow.	#
  10344#	  we find out if this occurred by doing an fsave and	#
  10345#	  checking the exception bit. if one did occur, then we	#
  10346#	  jump to fgen_except() which creates the default	#
  10347#	  result and EXOP for us.				#
  10348#################################################################
  10349	global		t_catch
  10350t_catch:
  10351
  10352	fsave		-(%sp)
  10353	tst.b		0x2(%sp)
  10354	bmi.b		catch
  10355	add.l		&0xc,%sp
  10356
  10357#################################################################
  10358# INEX2 exception:						#
  10359#	- The inex2 and ainex bits are set.			#
  10360#################################################################
  10361	global		t_inx2
  10362t_inx2:
  10363	fblt.w		t_minx2
  10364	fbeq.w		inx2_zero
  10365
  10366	global		t_pinx2
  10367t_pinx2:
  10368	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
  10369	rts
  10370
  10371	global		t_minx2
  10372t_minx2:
  10373	ori.l		&inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX
  10374	rts
  10375
  10376inx2_zero:
  10377	mov.b		&z_bmask,FPSR_CC(%a6)
  10378	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
  10379	rts
  10380
  10381# an underflow or overflow exception occurred.
  10382# we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
  10383catch:
  10384	ori.w		&inx2a_mask,FPSR_EXCEPT(%a6)
  10385catch2:
  10386	bsr.l		fgen_except
  10387	add.l		&0xc,%sp
  10388	rts
  10389
  10390	global		t_catch2
  10391t_catch2:
  10392
  10393	fsave		-(%sp)
  10394
  10395	tst.b		0x2(%sp)
  10396	bmi.b		catch2
  10397	add.l		&0xc,%sp
  10398
  10399	fmov.l		%fpsr,%d0
  10400	or.l		%d0,USER_FPSR(%a6)
  10401
  10402	rts
  10403
  10404#########################################################################
  10405
  10406#########################################################################
  10407# unf_res(): underflow default result calculation for transcendentals	#
  10408#									#
  10409# INPUT:								#
  10410#	d0   : rnd mode,precision					#
  10411#	d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+))	#
  10412# OUTPUT:								#
  10413#	a0   : points to result (in instruction memory)			#
  10414#########################################################################
  10415unf_sub:
  10416	ori.l		&unfinx_mask,USER_FPSR(%a6)
  10417
  10418	andi.w		&0x10,%d1		# keep sign bit in 4th spot
  10419
  10420	lsr.b		&0x4,%d0		# shift rnd prec,mode to lo bits
  10421	andi.b		&0xf,%d0		# strip hi rnd mode bit
  10422	or.b		%d1,%d0			# concat {sgn,mode,prec}
  10423
  10424	mov.l		%d0,%d1			# make a copy
  10425	lsl.b		&0x1,%d1		# mult index 2 by 2
  10426
  10427	mov.b		(tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits
  10428	lea		(tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr
  10429	rts
  10430
  10431tbl_unf_cc:
  10432	byte		0x4, 0x4, 0x4, 0x0
  10433	byte		0x4, 0x4, 0x4, 0x0
  10434	byte		0x4, 0x4, 0x4, 0x0
  10435	byte		0x0, 0x0, 0x0, 0x0
  10436	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
  10437	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
  10438	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
  10439
  10440tbl_unf_result:
  10441	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
  10442	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
  10443	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
  10444	long		0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
  10445
  10446	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
  10447	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
  10448	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
  10449	long		0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
  10450
  10451	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
  10452	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl
  10453	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
  10454	long		0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
  10455
  10456	long		0x0,0x0,0x0,0x0
  10457	long		0x0,0x0,0x0,0x0
  10458	long		0x0,0x0,0x0,0x0
  10459	long		0x0,0x0,0x0,0x0
  10460
  10461	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
  10462	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
  10463	long		0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
  10464	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
  10465
  10466	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
  10467	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
  10468	long		0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
  10469	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
  10470
  10471	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
  10472	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
  10473	long		0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
  10474	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
  10475
  10476############################################################
  10477
  10478#########################################################################
  10479# src_zero(): Return signed zero according to sign of src operand.	#
  10480#########################################################################
  10481	global		src_zero
  10482src_zero:
  10483	tst.b		SRC_EX(%a0)		# get sign of src operand
  10484	bmi.b		ld_mzero		# if neg, load neg zero
  10485
  10486#
  10487# ld_pzero(): return a positive zero.
  10488#
  10489	global		ld_pzero
  10490ld_pzero:
  10491	fmov.s		&0x00000000,%fp0	# load +0
  10492	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
  10493	rts
  10494
  10495# ld_mzero(): return a negative zero.
  10496	global		ld_mzero
  10497ld_mzero:
  10498	fmov.s		&0x80000000,%fp0	# load -0
  10499	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
  10500	rts
  10501
  10502#########################################################################
  10503# dst_zero(): Return signed zero according to sign of dst operand.	#
  10504#########################################################################
  10505	global		dst_zero
  10506dst_zero:
  10507	tst.b		DST_EX(%a1)		# get sign of dst operand
  10508	bmi.b		ld_mzero		# if neg, load neg zero
  10509	bra.b		ld_pzero		# load positive zero
  10510
  10511#########################################################################
  10512# src_inf(): Return signed inf according to sign of src operand.	#
  10513#########################################################################
  10514	global		src_inf
  10515src_inf:
  10516	tst.b		SRC_EX(%a0)		# get sign of src operand
  10517	bmi.b		ld_minf			# if negative branch
  10518
  10519#
  10520# ld_pinf(): return a positive infinity.
  10521#
  10522	global		ld_pinf
  10523ld_pinf:
  10524	fmov.s		&0x7f800000,%fp0	# load +INF
  10525	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'INF' ccode bit
  10526	rts
  10527
  10528#
  10529# ld_minf():return a negative infinity.
  10530#
  10531	global		ld_minf
  10532ld_minf:
  10533	fmov.s		&0xff800000,%fp0	# load -INF
  10534	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
  10535	rts
  10536
  10537#########################################################################
  10538# dst_inf(): Return signed inf according to sign of dst operand.	#
  10539#########################################################################
  10540	global		dst_inf
  10541dst_inf:
  10542	tst.b		DST_EX(%a1)		# get sign of dst operand
  10543	bmi.b		ld_minf			# if negative branch
  10544	bra.b		ld_pinf
  10545
  10546	global		szr_inf
  10547#################################################################
  10548# szr_inf(): Return +ZERO for a negative src operand or		#
  10549#	            +INF for a positive src operand.		#
  10550#	     Routine used for fetox, ftwotox, and ftentox.	#
  10551#################################################################
  10552szr_inf:
  10553	tst.b		SRC_EX(%a0)		# check sign of source
  10554	bmi.b		ld_pzero
  10555	bra.b		ld_pinf
  10556
  10557#########################################################################
  10558# sopr_inf(): Return +INF for a positive src operand or			#
  10559#	      jump to operand error routine for a negative src operand.	#
  10560#	      Routine used for flogn, flognp1, flog10, and flog2.	#
  10561#########################################################################
  10562	global		sopr_inf
  10563sopr_inf:
  10564	tst.b		SRC_EX(%a0)		# check sign of source
  10565	bmi.w		t_operr
  10566	bra.b		ld_pinf
  10567
  10568#################################################################
  10569# setoxm1i(): Return minus one for a negative src operand or	#
  10570#	      positive infinity for a positive src operand.	#
  10571#	      Routine used for fetoxm1.				#
  10572#################################################################
  10573	global		setoxm1i
  10574setoxm1i:
  10575	tst.b		SRC_EX(%a0)		# check sign of source
  10576	bmi.b		ld_mone
  10577	bra.b		ld_pinf
  10578
  10579#########################################################################
  10580# src_one(): Return signed one according to sign of src operand.	#
  10581#########################################################################
  10582	global		src_one
  10583src_one:
  10584	tst.b		SRC_EX(%a0)		# check sign of source
  10585	bmi.b		ld_mone
  10586
  10587#
  10588# ld_pone(): return positive one.
  10589#
  10590	global		ld_pone
  10591ld_pone:
  10592	fmov.s		&0x3f800000,%fp0	# load +1
  10593	clr.b		FPSR_CC(%a6)
  10594	rts
  10595
  10596#
  10597# ld_mone(): return negative one.
  10598#
  10599	global		ld_mone
  10600ld_mone:
  10601	fmov.s		&0xbf800000,%fp0	# load -1
  10602	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
  10603	rts
  10604
  10605ppiby2:	long		0x3fff0000, 0xc90fdaa2, 0x2168c235
  10606mpiby2:	long		0xbfff0000, 0xc90fdaa2, 0x2168c235
  10607
  10608#################################################################
  10609# spi_2(): Return signed PI/2 according to sign of src operand.	#
  10610#################################################################
  10611	global		spi_2
  10612spi_2:
  10613	tst.b		SRC_EX(%a0)		# check sign of source
  10614	bmi.b		ld_mpi2
  10615
  10616#
  10617# ld_ppi2(): return positive PI/2.
  10618#
  10619	global		ld_ppi2
  10620ld_ppi2:
  10621	fmov.l		%d0,%fpcr
  10622	fmov.x		ppiby2(%pc),%fp0	# load +pi/2
  10623	bra.w		t_pinx2			# set INEX2
  10624
  10625#
  10626# ld_mpi2(): return negative PI/2.
  10627#
  10628	global		ld_mpi2
  10629ld_mpi2:
  10630	fmov.l		%d0,%fpcr
  10631	fmov.x		mpiby2(%pc),%fp0	# load -pi/2
  10632	bra.w		t_minx2			# set INEX2
  10633
  10634####################################################
  10635# The following routines give support for fsincos. #
  10636####################################################
  10637
  10638#
  10639# ssincosz(): When the src operand is ZERO, store a one in the
  10640#	      cosine register and return a ZERO in fp0 w/ the same sign
  10641#	      as the src operand.
  10642#
  10643	global		ssincosz
  10644ssincosz:
  10645	fmov.s		&0x3f800000,%fp1
  10646	tst.b		SRC_EX(%a0)		# test sign
  10647	bpl.b		sincoszp
  10648	fmov.s		&0x80000000,%fp0	# return sin result in fp0
  10649	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)
  10650	bra.b		sto_cos			# store cosine result
  10651sincoszp:
  10652	fmov.s		&0x00000000,%fp0	# return sin result in fp0
  10653	mov.b		&z_bmask,FPSR_CC(%a6)
  10654	bra.b		sto_cos			# store cosine result
  10655
  10656#
  10657# ssincosi(): When the src operand is INF, store a QNAN in the cosine
  10658#	      register and jump to the operand error routine for negative
  10659#	      src operands.
  10660#
  10661	global		ssincosi
  10662ssincosi:
  10663	fmov.x		qnan(%pc),%fp1		# load NAN
  10664	bsr.l		sto_cos			# store cosine result
  10665	bra.w		t_operr
  10666
  10667#
  10668# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
  10669#		 register and branch to the src QNAN routine.
  10670#
  10671	global		ssincosqnan
  10672ssincosqnan:
  10673	fmov.x		LOCAL_EX(%a0),%fp1
  10674	bsr.l		sto_cos
  10675	bra.w		src_qnan
  10676
  10677#
  10678# ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
  10679#		 in the cosine register and branch to the src SNAN routine.
  10680#
  10681	global		ssincossnan
  10682ssincossnan:
  10683	fmov.x		LOCAL_EX(%a0),%fp1
  10684	bsr.l		sto_cos
  10685	bra.w		src_snan
  10686
  10687########################################################################
  10688
  10689#########################################################################
  10690# sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field.	#
  10691#	     fp1 holds the result of the cosine portion of ssincos().	#
  10692#	     the value in fp1 will not take any exceptions when moved.	#
  10693# INPUT:								#
  10694#	fp1 : fp value to store						#
  10695# MODIFIED:								#
  10696#	d0								#
  10697#########################################################################
  10698	global		sto_cos
  10699sto_cos:
  10700	mov.b		1+EXC_CMDREG(%a6),%d0
  10701	andi.w		&0x7,%d0
  10702	mov.w		(tbl_sto_cos.b,%pc,%d0.w*2),%d0
  10703	jmp		(tbl_sto_cos.b,%pc,%d0.w*1)
  10704
  10705tbl_sto_cos:
  10706	short		sto_cos_0 - tbl_sto_cos
  10707	short		sto_cos_1 - tbl_sto_cos
  10708	short		sto_cos_2 - tbl_sto_cos
  10709	short		sto_cos_3 - tbl_sto_cos
  10710	short		sto_cos_4 - tbl_sto_cos
  10711	short		sto_cos_5 - tbl_sto_cos
  10712	short		sto_cos_6 - tbl_sto_cos
  10713	short		sto_cos_7 - tbl_sto_cos
  10714
  10715sto_cos_0:
  10716	fmovm.x		&0x40,EXC_FP0(%a6)
  10717	rts
  10718sto_cos_1:
  10719	fmovm.x		&0x40,EXC_FP1(%a6)
  10720	rts
  10721sto_cos_2:
  10722	fmov.x		%fp1,%fp2
  10723	rts
  10724sto_cos_3:
  10725	fmov.x		%fp1,%fp3
  10726	rts
  10727sto_cos_4:
  10728	fmov.x		%fp1,%fp4
  10729	rts
  10730sto_cos_5:
  10731	fmov.x		%fp1,%fp5
  10732	rts
  10733sto_cos_6:
  10734	fmov.x		%fp1,%fp6
  10735	rts
  10736sto_cos_7:
  10737	fmov.x		%fp1,%fp7
  10738	rts
  10739
  10740##################################################################
  10741	global		smod_sdnrm
  10742	global		smod_snorm
  10743smod_sdnrm:
  10744smod_snorm:
  10745	mov.b		DTAG(%a6),%d1
  10746	beq.l		smod
  10747	cmpi.b		%d1,&ZERO
  10748	beq.w		smod_zro
  10749	cmpi.b		%d1,&INF
  10750	beq.l		t_operr
  10751	cmpi.b		%d1,&DENORM
  10752	beq.l		smod
  10753	cmpi.b		%d1,&SNAN
  10754	beq.l		dst_snan
  10755	bra.l		dst_qnan
  10756
  10757	global		smod_szero
  10758smod_szero:
  10759	mov.b		DTAG(%a6),%d1
  10760	beq.l		t_operr
  10761	cmpi.b		%d1,&ZERO
  10762	beq.l		t_operr
  10763	cmpi.b		%d1,&INF
  10764	beq.l		t_operr
  10765	cmpi.b		%d1,&DENORM
  10766	beq.l		t_operr
  10767	cmpi.b		%d1,&QNAN
  10768	beq.l		dst_qnan
  10769	bra.l		dst_snan
  10770
  10771	global		smod_sinf
  10772smod_sinf:
  10773	mov.b		DTAG(%a6),%d1
  10774	beq.l		smod_fpn
  10775	cmpi.b		%d1,&ZERO
  10776	beq.l		smod_zro
  10777	cmpi.b		%d1,&INF
  10778	beq.l		t_operr
  10779	cmpi.b		%d1,&DENORM
  10780	beq.l		smod_fpn
  10781	cmpi.b		%d1,&QNAN
  10782	beq.l		dst_qnan
  10783	bra.l		dst_snan
  10784
  10785smod_zro:
  10786srem_zro:
  10787	mov.b		SRC_EX(%a0),%d1		# get src sign
  10788	mov.b		DST_EX(%a1),%d0		# get dst sign
  10789	eor.b		%d0,%d1			# get qbyte sign
  10790	andi.b		&0x80,%d1
  10791	mov.b		%d1,FPSR_QBYTE(%a6)
  10792	tst.b		%d0
  10793	bpl.w		ld_pzero
  10794	bra.w		ld_mzero
  10795
  10796smod_fpn:
  10797srem_fpn:
  10798	clr.b		FPSR_QBYTE(%a6)
  10799	mov.l		%d0,-(%sp)
  10800	mov.b		SRC_EX(%a0),%d1		# get src sign
  10801	mov.b		DST_EX(%a1),%d0		# get dst sign
  10802	eor.b		%d0,%d1			# get qbyte sign
  10803	andi.b		&0x80,%d1
  10804	mov.b		%d1,FPSR_QBYTE(%a6)
  10805	cmpi.b		DTAG(%a6),&DENORM
  10806	bne.b		smod_nrm
  10807	lea		DST(%a1),%a0
  10808	mov.l		(%sp)+,%d0
  10809	bra		t_resdnrm
  10810smod_nrm:
  10811	fmov.l		(%sp)+,%fpcr
  10812	fmov.x		DST(%a1),%fp0
  10813	tst.b		DST_EX(%a1)
  10814	bmi.b		smod_nrm_neg
  10815	rts
  10816
  10817smod_nrm_neg:
  10818	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode
  10819	rts
  10820
  10821#########################################################################
  10822	global		srem_snorm
  10823	global		srem_sdnrm
  10824srem_sdnrm:
  10825srem_snorm:
  10826	mov.b		DTAG(%a6),%d1
  10827	beq.l		srem
  10828	cmpi.b		%d1,&ZERO
  10829	beq.w		srem_zro
  10830	cmpi.b		%d1,&INF
  10831	beq.l		t_operr
  10832	cmpi.b		%d1,&DENORM
  10833	beq.l		srem
  10834	cmpi.b		%d1,&QNAN
  10835	beq.l		dst_qnan
  10836	bra.l		dst_snan
  10837
  10838	global		srem_szero
  10839srem_szero:
  10840	mov.b		DTAG(%a6),%d1
  10841	beq.l		t_operr
  10842	cmpi.b		%d1,&ZERO
  10843	beq.l		t_operr
  10844	cmpi.b		%d1,&INF
  10845	beq.l		t_operr
  10846	cmpi.b		%d1,&DENORM
  10847	beq.l		t_operr
  10848	cmpi.b		%d1,&QNAN
  10849	beq.l		dst_qnan
  10850	bra.l		dst_snan
  10851
  10852	global		srem_sinf
  10853srem_sinf:
  10854	mov.b		DTAG(%a6),%d1
  10855	beq.w		srem_fpn
  10856	cmpi.b		%d1,&ZERO
  10857	beq.w		srem_zro
  10858	cmpi.b		%d1,&INF
  10859	beq.l		t_operr
  10860	cmpi.b		%d1,&DENORM
  10861	beq.l		srem_fpn
  10862	cmpi.b		%d1,&QNAN
  10863	beq.l		dst_qnan
  10864	bra.l		dst_snan
  10865
  10866#########################################################################
  10867	global		sscale_snorm
  10868	global		sscale_sdnrm
  10869sscale_snorm:
  10870sscale_sdnrm:
  10871	mov.b		DTAG(%a6),%d1
  10872	beq.l		sscale
  10873	cmpi.b		%d1,&ZERO
  10874	beq.l		dst_zero
  10875	cmpi.b		%d1,&INF
  10876	beq.l		dst_inf
  10877	cmpi.b		%d1,&DENORM
  10878	beq.l		sscale
  10879	cmpi.b		%d1,&QNAN
  10880	beq.l		dst_qnan
  10881	bra.l		dst_snan
  10882
  10883	global		sscale_szero
  10884sscale_szero:
  10885	mov.b		DTAG(%a6),%d1
  10886	beq.l		sscale
  10887	cmpi.b		%d1,&ZERO
  10888	beq.l		dst_zero
  10889	cmpi.b		%d1,&INF
  10890	beq.l		dst_inf
  10891	cmpi.b		%d1,&DENORM
  10892	beq.l		sscale
  10893	cmpi.b		%d1,&QNAN
  10894	beq.l		dst_qnan
  10895	bra.l		dst_snan
  10896
  10897	global		sscale_sinf
  10898sscale_sinf:
  10899	mov.b		DTAG(%a6),%d1
  10900	beq.l		t_operr
  10901	cmpi.b		%d1,&QNAN
  10902	beq.l		dst_qnan
  10903	cmpi.b		%d1,&SNAN
  10904	beq.l		dst_snan
  10905	bra.l		t_operr
  10906
  10907########################################################################
  10908
  10909#
  10910# sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
  10911#
  10912	global		sop_sqnan
  10913sop_sqnan:
  10914	mov.b		DTAG(%a6),%d1
  10915	cmpi.b		%d1,&QNAN
  10916	beq.b		dst_qnan
  10917	cmpi.b		%d1,&SNAN
  10918	beq.b		dst_snan
  10919	bra.b		src_qnan
  10920
  10921#
  10922# sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
  10923#
  10924	global		sop_ssnan
  10925sop_ssnan:
  10926	mov.b		DTAG(%a6),%d1
  10927	cmpi.b		%d1,&QNAN
  10928	beq.b		dst_qnan_src_snan
  10929	cmpi.b		%d1,&SNAN
  10930	beq.b		dst_snan
  10931	bra.b		src_snan
  10932
  10933dst_qnan_src_snan:
  10934	ori.l		&snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP
  10935	bra.b		dst_qnan
  10936
  10937#
  10938# dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
  10939#
  10940	global		dst_snan
  10941dst_snan:
  10942	fmov.x		DST(%a1),%fp0		# the fmove sets the SNAN bit
  10943	fmov.l		%fpsr,%d0		# catch resulting status
  10944	or.l		%d0,USER_FPSR(%a6)	# store status
  10945	rts
  10946
  10947#
  10948# dst_qnan(): Return the dst QNAN.
  10949#
  10950	global		dst_qnan
  10951dst_qnan:
  10952	fmov.x		DST(%a1),%fp0		# return the non-signalling nan
  10953	tst.b		DST_EX(%a1)		# set ccodes according to QNAN sign
  10954	bmi.b		dst_qnan_m
  10955dst_qnan_p:
  10956	mov.b		&nan_bmask,FPSR_CC(%a6)
  10957	rts
  10958dst_qnan_m:
  10959	mov.b		&neg_bmask+nan_bmask,FPSR_CC(%a6)
  10960	rts
  10961
  10962#
  10963# src_snan(): Return the src SNAN w/ the SNAN bit set.
  10964#
  10965	global		src_snan
  10966src_snan:
  10967	fmov.x		SRC(%a0),%fp0		# the fmove sets the SNAN bit
  10968	fmov.l		%fpsr,%d0		# catch resulting status
  10969	or.l		%d0,USER_FPSR(%a6)	# store status
  10970	rts
  10971
  10972#
  10973# src_qnan(): Return the src QNAN.
  10974#
  10975	global		src_qnan
  10976src_qnan:
  10977	fmov.x		SRC(%a0),%fp0		# return the non-signalling nan
  10978	tst.b		SRC_EX(%a0)		# set ccodes according to QNAN sign
  10979	bmi.b		dst_qnan_m
  10980src_qnan_p:
  10981	mov.b		&nan_bmask,FPSR_CC(%a6)
  10982	rts
  10983src_qnan_m:
  10984	mov.b		&neg_bmask+nan_bmask,FPSR_CC(%a6)
  10985	rts
  10986
  10987#
  10988# fkern2.s:
  10989#	These entry points are used by the exception handler
  10990# routines where an instruction is selected by an index into
  10991# a large jump table corresponding to a given instruction which
  10992# has been decoded. Flow continues here where we now decode
  10993# further according to the source operand type.
  10994#
  10995
  10996	global		fsinh
  10997fsinh:
  10998	mov.b		STAG(%a6),%d1
  10999	beq.l		ssinh
  11000	cmpi.b		%d1,&ZERO
  11001	beq.l		src_zero
  11002	cmpi.b		%d1,&INF
  11003	beq.l		src_inf
  11004	cmpi.b		%d1,&DENORM
  11005	beq.l		ssinhd
  11006	cmpi.b		%d1,&QNAN
  11007	beq.l		src_qnan
  11008	bra.l		src_snan
  11009
  11010	global		flognp1
  11011flognp1:
  11012	mov.b		STAG(%a6),%d1
  11013	beq.l		slognp1
  11014	cmpi.b		%d1,&ZERO
  11015	beq.l		src_zero
  11016	cmpi.b		%d1,&INF
  11017	beq.l		sopr_inf
  11018	cmpi.b		%d1,&DENORM
  11019	beq.l		slognp1d
  11020	cmpi.b		%d1,&QNAN
  11021	beq.l		src_qnan
  11022	bra.l		src_snan
  11023
  11024	global		fetoxm1
  11025fetoxm1:
  11026	mov.b		STAG(%a6),%d1
  11027	beq.l		setoxm1
  11028	cmpi.b		%d1,&ZERO
  11029	beq.l		src_zero
  11030	cmpi.b		%d1,&INF
  11031	beq.l		setoxm1i
  11032	cmpi.b		%d1,&DENORM
  11033	beq.l		setoxm1d
  11034	cmpi.b		%d1,&QNAN
  11035	beq.l		src_qnan
  11036	bra.l		src_snan
  11037
  11038	global		ftanh
  11039ftanh:
  11040	mov.b		STAG(%a6),%d1
  11041	beq.l		stanh
  11042	cmpi.b		%d1,&ZERO
  11043	beq.l		src_zero
  11044	cmpi.b		%d1,&INF
  11045	beq.l		src_one
  11046	cmpi.b		%d1,&DENORM
  11047	beq.l		stanhd
  11048	cmpi.b		%d1,&QNAN
  11049	beq.l		src_qnan
  11050	bra.l		src_snan
  11051
  11052	global		fatan
  11053fatan:
  11054	mov.b		STAG(%a6),%d1
  11055	beq.l		satan
  11056	cmpi.b		%d1,&ZERO
  11057	beq.l		src_zero
  11058	cmpi.b		%d1,&INF
  11059	beq.l		spi_2
  11060	cmpi.b		%d1,&DENORM
  11061	beq.l		satand
  11062	cmpi.b		%d1,&QNAN
  11063	beq.l		src_qnan
  11064	bra.l		src_snan
  11065
  11066	global		fasin
  11067fasin:
  11068	mov.b		STAG(%a6),%d1
  11069	beq.l		sasin
  11070	cmpi.b		%d1,&ZERO
  11071	beq.l		src_zero
  11072	cmpi.b		%d1,&INF
  11073	beq.l		t_operr
  11074	cmpi.b		%d1,&DENORM
  11075	beq.l		sasind
  11076	cmpi.b		%d1,&QNAN
  11077	beq.l		src_qnan
  11078	bra.l		src_snan
  11079
  11080	global		fatanh
  11081fatanh:
  11082	mov.b		STAG(%a6),%d1
  11083	beq.l		satanh
  11084	cmpi.b		%d1,&ZERO
  11085	beq.l		src_zero
  11086	cmpi.b		%d1,&INF
  11087	beq.l		t_operr
  11088	cmpi.b		%d1,&DENORM
  11089	beq.l		satanhd
  11090	cmpi.b		%d1,&QNAN
  11091	beq.l		src_qnan
  11092	bra.l		src_snan
  11093
  11094	global		fsine
  11095fsine:
  11096	mov.b		STAG(%a6),%d1
  11097	beq.l		ssin
  11098	cmpi.b		%d1,&ZERO
  11099	beq.l		src_zero
  11100	cmpi.b		%d1,&INF
  11101	beq.l		t_operr
  11102	cmpi.b		%d1,&DENORM
  11103	beq.l		ssind
  11104	cmpi.b		%d1,&QNAN
  11105	beq.l		src_qnan
  11106	bra.l		src_snan
  11107
  11108	global		ftan
  11109ftan:
  11110	mov.b		STAG(%a6),%d1
  11111	beq.l		stan
  11112	cmpi.b		%d1,&ZERO
  11113	beq.l		src_zero
  11114	cmpi.b		%d1,&INF
  11115	beq.l		t_operr
  11116	cmpi.b		%d1,&DENORM
  11117	beq.l		stand
  11118	cmpi.b		%d1,&QNAN
  11119	beq.l		src_qnan
  11120	bra.l		src_snan
  11121
  11122	global		fetox
  11123fetox:
  11124	mov.b		STAG(%a6),%d1
  11125	beq.l		setox
  11126	cmpi.b		%d1,&ZERO
  11127	beq.l		ld_pone
  11128	cmpi.b		%d1,&INF
  11129	beq.l		szr_inf
  11130	cmpi.b		%d1,&DENORM
  11131	beq.l		setoxd
  11132	cmpi.b		%d1,&QNAN
  11133	beq.l		src_qnan
  11134	bra.l		src_snan
  11135
  11136	global		ftwotox
  11137ftwotox:
  11138	mov.b		STAG(%a6),%d1
  11139	beq.l		stwotox
  11140	cmpi.b		%d1,&ZERO
  11141	beq.l		ld_pone
  11142	cmpi.b		%d1,&INF
  11143	beq.l		szr_inf
  11144	cmpi.b		%d1,&DENORM
  11145	beq.l		stwotoxd
  11146	cmpi.b		%d1,&QNAN
  11147	beq.l		src_qnan
  11148	bra.l		src_snan
  11149
  11150	global		ftentox
  11151ftentox:
  11152	mov.b		STAG(%a6),%d1
  11153	beq.l		stentox
  11154	cmpi.b		%d1,&ZERO
  11155	beq.l		ld_pone
  11156	cmpi.b		%d1,&INF
  11157	beq.l		szr_inf
  11158	cmpi.b		%d1,&DENORM
  11159	beq.l		stentoxd
  11160	cmpi.b		%d1,&QNAN
  11161	beq.l		src_qnan
  11162	bra.l		src_snan
  11163
  11164	global		flogn
  11165flogn:
  11166	mov.b		STAG(%a6),%d1
  11167	beq.l		slogn
  11168	cmpi.b		%d1,&ZERO
  11169	beq.l		t_dz2
  11170	cmpi.b		%d1,&INF
  11171	beq.l		sopr_inf
  11172	cmpi.b		%d1,&DENORM
  11173	beq.l		slognd
  11174	cmpi.b		%d1,&QNAN
  11175	beq.l		src_qnan
  11176	bra.l		src_snan
  11177
  11178	global		flog10
  11179flog10:
  11180	mov.b		STAG(%a6),%d1
  11181	beq.l		slog10
  11182	cmpi.b		%d1,&ZERO
  11183	beq.l		t_dz2
  11184	cmpi.b		%d1,&INF
  11185	beq.l		sopr_inf
  11186	cmpi.b		%d1,&DENORM
  11187	beq.l		slog10d
  11188	cmpi.b		%d1,&QNAN
  11189	beq.l		src_qnan
  11190	bra.l		src_snan
  11191
  11192	global		flog2
  11193flog2:
  11194	mov.b		STAG(%a6),%d1
  11195	beq.l		slog2
  11196	cmpi.b		%d1,&ZERO
  11197	beq.l		t_dz2
  11198	cmpi.b		%d1,&INF
  11199	beq.l		sopr_inf
  11200	cmpi.b		%d1,&DENORM
  11201	beq.l		slog2d
  11202	cmpi.b		%d1,&QNAN
  11203	beq.l		src_qnan
  11204	bra.l		src_snan
  11205
  11206	global		fcosh
  11207fcosh:
  11208	mov.b		STAG(%a6),%d1
  11209	beq.l		scosh
  11210	cmpi.b		%d1,&ZERO
  11211	beq.l		ld_pone
  11212	cmpi.b		%d1,&INF
  11213	beq.l		ld_pinf
  11214	cmpi.b		%d1,&DENORM
  11215	beq.l		scoshd
  11216	cmpi.b		%d1,&QNAN
  11217	beq.l		src_qnan
  11218	bra.l		src_snan
  11219
  11220	global		facos
  11221facos:
  11222	mov.b		STAG(%a6),%d1
  11223	beq.l		sacos
  11224	cmpi.b		%d1,&ZERO
  11225	beq.l		ld_ppi2
  11226	cmpi.b		%d1,&INF
  11227	beq.l		t_operr
  11228	cmpi.b		%d1,&DENORM
  11229	beq.l		sacosd
  11230	cmpi.b		%d1,&QNAN
  11231	beq.l		src_qnan
  11232	bra.l		src_snan
  11233
  11234	global		fcos
  11235fcos:
  11236	mov.b		STAG(%a6),%d1
  11237	beq.l		scos
  11238	cmpi.b		%d1,&ZERO
  11239	beq.l		ld_pone
  11240	cmpi.b		%d1,&INF
  11241	beq.l		t_operr
  11242	cmpi.b		%d1,&DENORM
  11243	beq.l		scosd
  11244	cmpi.b		%d1,&QNAN
  11245	beq.l		src_qnan
  11246	bra.l		src_snan
  11247
  11248	global		fgetexp
  11249fgetexp:
  11250	mov.b		STAG(%a6),%d1
  11251	beq.l		sgetexp
  11252	cmpi.b		%d1,&ZERO
  11253	beq.l		src_zero
  11254	cmpi.b		%d1,&INF
  11255	beq.l		t_operr
  11256	cmpi.b		%d1,&DENORM
  11257	beq.l		sgetexpd
  11258	cmpi.b		%d1,&QNAN
  11259	beq.l		src_qnan
  11260	bra.l		src_snan
  11261
  11262	global		fgetman
  11263fgetman:
  11264	mov.b		STAG(%a6),%d1
  11265	beq.l		sgetman
  11266	cmpi.b		%d1,&ZERO
  11267	beq.l		src_zero
  11268	cmpi.b		%d1,&INF
  11269	beq.l		t_operr
  11270	cmpi.b		%d1,&DENORM
  11271	beq.l		sgetmand
  11272	cmpi.b		%d1,&QNAN
  11273	beq.l		src_qnan
  11274	bra.l		src_snan
  11275
  11276	global		fsincos
  11277fsincos:
  11278	mov.b		STAG(%a6),%d1
  11279	beq.l		ssincos
  11280	cmpi.b		%d1,&ZERO
  11281	beq.l		ssincosz
  11282	cmpi.b		%d1,&INF
  11283	beq.l		ssincosi
  11284	cmpi.b		%d1,&DENORM
  11285	beq.l		ssincosd
  11286	cmpi.b		%d1,&QNAN
  11287	beq.l		ssincosqnan
  11288	bra.l		ssincossnan
  11289
  11290	global		fmod
  11291fmod:
  11292	mov.b		STAG(%a6),%d1
  11293	beq.l		smod_snorm
  11294	cmpi.b		%d1,&ZERO
  11295	beq.l		smod_szero
  11296	cmpi.b		%d1,&INF
  11297	beq.l		smod_sinf
  11298	cmpi.b		%d1,&DENORM
  11299	beq.l		smod_sdnrm
  11300	cmpi.b		%d1,&QNAN
  11301	beq.l		sop_sqnan
  11302	bra.l		sop_ssnan
  11303
  11304	global		frem
  11305frem:
  11306	mov.b		STAG(%a6),%d1
  11307	beq.l		srem_snorm
  11308	cmpi.b		%d1,&ZERO
  11309	beq.l		srem_szero
  11310	cmpi.b		%d1,&INF
  11311	beq.l		srem_sinf
  11312	cmpi.b		%d1,&DENORM
  11313	beq.l		srem_sdnrm
  11314	cmpi.b		%d1,&QNAN
  11315	beq.l		sop_sqnan
  11316	bra.l		sop_ssnan
  11317
  11318	global		fscale
  11319fscale:
  11320	mov.b		STAG(%a6),%d1
  11321	beq.l		sscale_snorm
  11322	cmpi.b		%d1,&ZERO
  11323	beq.l		sscale_szero
  11324	cmpi.b		%d1,&INF
  11325	beq.l		sscale_sinf
  11326	cmpi.b		%d1,&DENORM
  11327	beq.l		sscale_sdnrm
  11328	cmpi.b		%d1,&QNAN
  11329	beq.l		sop_sqnan
  11330	bra.l		sop_ssnan
  11331
  11332#########################################################################
  11333# XDEF ****************************************************************	#
  11334#	fgen_except(): catch an exception during transcendental		#
  11335#		       emulation					#
  11336#									#
  11337# XREF ****************************************************************	#
  11338#	fmul() - emulate a multiply instruction				#
  11339#	fadd() - emulate an add instruction				#
  11340#	fin() - emulate an fmove instruction				#
  11341#									#
  11342# INPUT ***************************************************************	#
  11343#	fp0 = destination operand					#
  11344#	d0  = type of instruction that took exception			#
  11345#	fsave frame = source operand					#
  11346#									#
  11347# OUTPUT **************************************************************	#
  11348#	fp0 = result							#
  11349#	fp1 = EXOP							#
  11350#									#
  11351# ALGORITHM ***********************************************************	#
  11352#	An exception occurred on the last instruction of the		#
  11353# transcendental emulation. hopefully, this won't be happening much	#
  11354# because it will be VERY slow.						#
  11355#	The only exceptions capable of passing through here are		#
  11356# Overflow, Underflow, and Unsupported Data Type.			#
  11357#									#
  11358#########################################################################
  11359
  11360	global		fgen_except
  11361fgen_except:
  11362	cmpi.b		0x3(%sp),&0x7		# is exception UNSUPP?
  11363	beq.b		fge_unsupp		# yes
  11364
  11365	mov.b		&NORM,STAG(%a6)
  11366
  11367fge_cont:
  11368	mov.b		&NORM,DTAG(%a6)
  11369
  11370# ok, I have a problem with putting the dst op at FP_DST. the emulation
  11371# routines aren't supposed to alter the operands but we've just squashed
  11372# FP_DST here...
  11373
  11374# 8/17/93 - this turns out to be more of a "cleanliness" standpoint
  11375# then a potential bug. to begin with, only the dyadic functions
  11376# frem,fmod, and fscale would get the dst trashed here. But, for
  11377# the 060SP, the FP_DST is never used again anyways.
  11378	fmovm.x		&0x80,FP_DST(%a6)	# dst op is in fp0
  11379
  11380	lea		0x4(%sp),%a0		# pass: ptr to src op
  11381	lea		FP_DST(%a6),%a1		# pass: ptr to dst op
  11382
  11383	cmpi.b		%d1,&FMOV_OP
  11384	beq.b		fge_fin			# it was an "fmov"
  11385	cmpi.b		%d1,&FADD_OP
  11386	beq.b		fge_fadd		# it was an "fadd"
  11387fge_fmul:
  11388	bsr.l		fmul
  11389	rts
  11390fge_fadd:
  11391	bsr.l		fadd
  11392	rts
  11393fge_fin:
  11394	bsr.l		fin
  11395	rts
  11396
  11397fge_unsupp:
  11398	mov.b		&DENORM,STAG(%a6)
  11399	bra.b		fge_cont
  11400
  11401#
  11402# This table holds the offsets of the emulation routines for each individual
  11403# math operation relative to the address of this table. Included are
  11404# routines like fadd/fmul/fabs as well as the transcendentals.
  11405# The location within the table is determined by the extension bits of the
  11406# operation longword.
  11407#
  11408
  11409	swbeg		&109
  11410tbl_unsupp:
  11411	long		fin		- tbl_unsupp	# 00: fmove
  11412	long		fint		- tbl_unsupp	# 01: fint
  11413	long		fsinh		- tbl_unsupp	# 02: fsinh
  11414	long		fintrz		- tbl_unsupp	# 03: fintrz
  11415	long		fsqrt		- tbl_unsupp	# 04: fsqrt
  11416	long		tbl_unsupp	- tbl_unsupp
  11417	long		flognp1		- tbl_unsupp	# 06: flognp1
  11418	long		tbl_unsupp	- tbl_unsupp
  11419	long		fetoxm1		- tbl_unsupp	# 08: fetoxm1
  11420	long		ftanh		- tbl_unsupp	# 09: ftanh
  11421	long		fatan		- tbl_unsupp	# 0a: fatan
  11422	long		tbl_unsupp	- tbl_unsupp
  11423	long		fasin		- tbl_unsupp	# 0c: fasin
  11424	long		fatanh		- tbl_unsupp	# 0d: fatanh
  11425	long		fsine		- tbl_unsupp	# 0e: fsin
  11426	long		ftan		- tbl_unsupp	# 0f: ftan
  11427	long		fetox		- tbl_unsupp	# 10: fetox
  11428	long		ftwotox		- tbl_unsupp	# 11: ftwotox
  11429	long		ftentox		- tbl_unsupp	# 12: ftentox
  11430	long		tbl_unsupp	- tbl_unsupp
  11431	long		flogn		- tbl_unsupp	# 14: flogn
  11432	long		flog10		- tbl_unsupp	# 15: flog10
  11433	long		flog2		- tbl_unsupp	# 16: flog2
  11434	long		tbl_unsupp	- tbl_unsupp
  11435	long		fabs		- tbl_unsupp	# 18: fabs
  11436	long		fcosh		- tbl_unsupp	# 19: fcosh
  11437	long		fneg		- tbl_unsupp	# 1a: fneg
  11438	long		tbl_unsupp	- tbl_unsupp
  11439	long		facos		- tbl_unsupp	# 1c: facos
  11440	long		fcos		- tbl_unsupp	# 1d: fcos
  11441	long		fgetexp		- tbl_unsupp	# 1e: fgetexp
  11442	long		fgetman		- tbl_unsupp	# 1f: fgetman
  11443	long		fdiv		- tbl_unsupp	# 20: fdiv
  11444	long		fmod		- tbl_unsupp	# 21: fmod
  11445	long		fadd		- tbl_unsupp	# 22: fadd
  11446	long		fmul		- tbl_unsupp	# 23: fmul
  11447	long		fsgldiv		- tbl_unsupp	# 24: fsgldiv
  11448	long		frem		- tbl_unsupp	# 25: frem
  11449	long		fscale		- tbl_unsupp	# 26: fscale
  11450	long		fsglmul		- tbl_unsupp	# 27: fsglmul
  11451	long		fsub		- tbl_unsupp	# 28: fsub
  11452	long		tbl_unsupp	- tbl_unsupp
  11453	long		tbl_unsupp	- tbl_unsupp
  11454	long		tbl_unsupp	- tbl_unsupp
  11455	long		tbl_unsupp	- tbl_unsupp
  11456	long		tbl_unsupp	- tbl_unsupp
  11457	long		tbl_unsupp	- tbl_unsupp
  11458	long		tbl_unsupp	- tbl_unsupp
  11459	long		fsincos		- tbl_unsupp	# 30: fsincos
  11460	long		fsincos		- tbl_unsupp	# 31: fsincos
  11461	long		fsincos		- tbl_unsupp	# 32: fsincos
  11462	long		fsincos		- tbl_unsupp	# 33: fsincos
  11463	long		fsincos		- tbl_unsupp	# 34: fsincos
  11464	long		fsincos		- tbl_unsupp	# 35: fsincos
  11465	long		fsincos		- tbl_unsupp	# 36: fsincos
  11466	long		fsincos		- tbl_unsupp	# 37: fsincos
  11467	long		fcmp		- tbl_unsupp	# 38: fcmp
  11468	long		tbl_unsupp	- tbl_unsupp
  11469	long		ftst		- tbl_unsupp	# 3a: ftst
  11470	long		tbl_unsupp	- tbl_unsupp
  11471	long		tbl_unsupp	- tbl_unsupp
  11472	long		tbl_unsupp	- tbl_unsupp
  11473	long		tbl_unsupp	- tbl_unsupp
  11474	long		tbl_unsupp	- tbl_unsupp
  11475	long		fsin		- tbl_unsupp	# 40: fsmove
  11476	long		fssqrt		- tbl_unsupp	# 41: fssqrt
  11477	long		tbl_unsupp	- tbl_unsupp
  11478	long		tbl_unsupp	- tbl_unsupp
  11479	long		fdin		- tbl_unsupp	# 44: fdmove
  11480	long		fdsqrt		- tbl_unsupp	# 45: fdsqrt
  11481	long		tbl_unsupp	- tbl_unsupp
  11482	long		tbl_unsupp	- tbl_unsupp
  11483	long		tbl_unsupp	- tbl_unsupp
  11484	long		tbl_unsupp	- tbl_unsupp
  11485	long		tbl_unsupp	- tbl_unsupp
  11486	long		tbl_unsupp	- tbl_unsupp
  11487	long		tbl_unsupp	- tbl_unsupp
  11488	long		tbl_unsupp	- tbl_unsupp
  11489	long		tbl_unsupp	- tbl_unsupp
  11490	long		tbl_unsupp	- tbl_unsupp
  11491	long		tbl_unsupp	- tbl_unsupp
  11492	long		tbl_unsupp	- tbl_unsupp
  11493	long		tbl_unsupp	- tbl_unsupp
  11494	long		tbl_unsupp	- tbl_unsupp
  11495	long		tbl_unsupp	- tbl_unsupp
  11496	long		tbl_unsupp	- tbl_unsupp
  11497	long		tbl_unsupp	- tbl_unsupp
  11498	long		tbl_unsupp	- tbl_unsupp
  11499	long		fsabs		- tbl_unsupp	# 58: fsabs
  11500	long		tbl_unsupp	- tbl_unsupp
  11501	long		fsneg		- tbl_unsupp	# 5a: fsneg
  11502	long		tbl_unsupp	- tbl_unsupp
  11503	long		fdabs		- tbl_unsupp	# 5c: fdabs
  11504	long		tbl_unsupp	- tbl_unsupp
  11505	long		fdneg		- tbl_unsupp	# 5e: fdneg
  11506	long		tbl_unsupp	- tbl_unsupp
  11507	long		fsdiv		- tbl_unsupp	# 60: fsdiv
  11508	long		tbl_unsupp	- tbl_unsupp
  11509	long		fsadd		- tbl_unsupp	# 62: fsadd
  11510	long		fsmul		- tbl_unsupp	# 63: fsmul
  11511	long		fddiv		- tbl_unsupp	# 64: fddiv
  11512	long		tbl_unsupp	- tbl_unsupp
  11513	long		fdadd		- tbl_unsupp	# 66: fdadd
  11514	long		fdmul		- tbl_unsupp	# 67: fdmul
  11515	long		fssub		- tbl_unsupp	# 68: fssub
  11516	long		tbl_unsupp	- tbl_unsupp
  11517	long		tbl_unsupp	- tbl_unsupp
  11518	long		tbl_unsupp	- tbl_unsupp
  11519	long		fdsub		- tbl_unsupp	# 6c: fdsub
  11520
  11521#########################################################################
  11522# XDEF ****************************************************************	#
  11523#	fmul(): emulates the fmul instruction				#
  11524#	fsmul(): emulates the fsmul instruction				#
  11525#	fdmul(): emulates the fdmul instruction				#
  11526#									#
  11527# XREF ****************************************************************	#
  11528#	scale_to_zero_src() - scale src exponent to zero		#
  11529#	scale_to_zero_dst() - scale dst exponent to zero		#
  11530#	unf_res() - return default underflow result			#
  11531#	ovf_res() - return default overflow result			#
  11532#	res_qnan() - return QNAN result					#
  11533#	res_snan() - return SNAN result					#
  11534#									#
  11535# INPUT ***************************************************************	#
  11536#	a0 = pointer to extended precision source operand		#
  11537#	a1 = pointer to extended precision destination operand		#
  11538#	d0  rnd prec,mode						#
  11539#									#
  11540# OUTPUT **************************************************************	#
  11541#	fp0 = result							#
  11542#	fp1 = EXOP (if exception occurred)				#
  11543#									#
  11544# ALGORITHM ***********************************************************	#
  11545#	Handle NANs, infinities, and zeroes as special cases. Divide	#
  11546# norms/denorms into ext/sgl/dbl precision.				#
  11547#	For norms/denorms, scale the exponents such that a multiply	#
  11548# instruction won't cause an exception. Use the regular fmul to		#
  11549# compute a result. Check if the regular operands would have taken	#
  11550# an exception. If so, return the default overflow/underflow result	#
  11551# and return the EXOP if exceptions are enabled. Else, scale the	#
  11552# result operand to the proper exponent.				#
  11553#									#
  11554#########################################################################
  11555
  11556	align		0x10
  11557tbl_fmul_ovfl:
  11558	long		0x3fff - 0x7ffe		# ext_max
  11559	long		0x3fff - 0x407e		# sgl_max
  11560	long		0x3fff - 0x43fe		# dbl_max
  11561tbl_fmul_unfl:
  11562	long		0x3fff + 0x0001		# ext_unfl
  11563	long		0x3fff - 0x3f80		# sgl_unfl
  11564	long		0x3fff - 0x3c00		# dbl_unfl
  11565
  11566	global		fsmul
  11567fsmul:
  11568	andi.b		&0x30,%d0		# clear rnd prec
  11569	ori.b		&s_mode*0x10,%d0	# insert sgl prec
  11570	bra.b		fmul
  11571
  11572	global		fdmul
  11573fdmul:
  11574	andi.b		&0x30,%d0
  11575	ori.b		&d_mode*0x10,%d0	# insert dbl prec
  11576
  11577	global		fmul
  11578fmul:
  11579	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  11580
  11581	clr.w		%d1
  11582	mov.b		DTAG(%a6),%d1
  11583	lsl.b		&0x3,%d1
  11584	or.b		STAG(%a6),%d1		# combine src tags
  11585	bne.w		fmul_not_norm		# optimize on non-norm input
  11586
  11587fmul_norm:
  11588	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
  11589	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
  11590	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
  11591
  11592	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  11593	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  11594	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  11595
  11596	bsr.l		scale_to_zero_src	# scale src exponent
  11597	mov.l		%d0,-(%sp)		# save scale factor 1
  11598
  11599	bsr.l		scale_to_zero_dst	# scale dst exponent
  11600
  11601	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
  11602
  11603	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
  11604	lsr.b		&0x6,%d1		# shift to lo bits
  11605	mov.l		(%sp)+,%d0		# load S.F.
  11606	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
  11607	beq.w		fmul_may_ovfl		# result may rnd to overflow
  11608	blt.w		fmul_ovfl		# result will overflow
  11609
  11610	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
  11611	beq.w		fmul_may_unfl		# result may rnd to no unfl
  11612	bgt.w		fmul_unfl		# result will underflow
  11613
  11614#
  11615# NORMAL:
  11616# - the result of the multiply operation will neither overflow nor underflow.
  11617# - do the multiply to the proper precision and rounding mode.
  11618# - scale the result exponent using the scale factor. if both operands were
  11619# normalized then we really don't need to go through this scaling. but for now,
  11620# this will do.
  11621#
  11622fmul_normal:
  11623	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
  11624
  11625	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  11626	fmov.l		&0x0,%fpsr		# clear FPSR
  11627
  11628	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
  11629
  11630	fmov.l		%fpsr,%d1		# save status
  11631	fmov.l		&0x0,%fpcr		# clear FPCR
  11632
  11633	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  11634
  11635fmul_normal_exit:
  11636	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  11637	mov.l		%d2,-(%sp)		# save d2
  11638	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
  11639	mov.l		%d1,%d2			# make a copy
  11640	andi.l		&0x7fff,%d1		# strip sign
  11641	andi.w		&0x8000,%d2		# keep old sign
  11642	sub.l		%d0,%d1			# add scale factor
  11643	or.w		%d2,%d1			# concat old sign,new exp
  11644	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  11645	mov.l		(%sp)+,%d2		# restore d2
  11646	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  11647	rts
  11648
  11649#
  11650# OVERFLOW:
  11651# - the result of the multiply operation is an overflow.
  11652# - do the multiply to the proper precision and rounding mode in order to
  11653# set the inexact bits.
  11654# - calculate the default result and return it in fp0.
  11655# - if overflow or inexact is enabled, we need a multiply result rounded to
  11656# extended precision. if the original operation was extended, then we have this
  11657# result. if the original operation was single or double, we have to do another
  11658# multiply using extended precision and the correct rounding mode. the result
  11659# of this operation then has its exponent scaled by -0x6000 to create the
  11660# exceptional operand.
  11661#
  11662fmul_ovfl:
  11663	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
  11664
  11665	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  11666	fmov.l		&0x0,%fpsr		# clear FPSR
  11667
  11668	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
  11669
  11670	fmov.l		%fpsr,%d1		# save status
  11671	fmov.l		&0x0,%fpcr		# clear FPCR
  11672
  11673	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  11674
  11675# save setting this until now because this is where fmul_may_ovfl may jump in
  11676fmul_ovfl_tst:
  11677	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  11678
  11679	mov.b		FPCR_ENABLE(%a6),%d1
  11680	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  11681	bne.b		fmul_ovfl_ena		# yes
  11682
  11683# calculate the default result
  11684fmul_ovfl_dis:
  11685	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
  11686	sne		%d1			# set sign param accordingly
  11687	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
  11688	bsr.l		ovf_res			# calculate default result
  11689	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
  11690	fmovm.x		(%a0),&0x80		# return default result in fp0
  11691	rts
  11692
  11693#
  11694# OVFL is enabled; Create EXOP:
  11695# - if precision is extended, then we have the EXOP. simply bias the exponent
  11696# with an extra -0x6000. if the precision is single or double, we need to
  11697# calculate a result rounded to extended precision.
  11698#
  11699fmul_ovfl_ena:
  11700	mov.l		L_SCR3(%a6),%d1
  11701	andi.b		&0xc0,%d1		# test the rnd prec
  11702	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
  11703
  11704fmul_ovfl_ena_cont:
  11705	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
  11706
  11707	mov.l		%d2,-(%sp)		# save d2
  11708	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  11709	mov.w		%d1,%d2			# make a copy
  11710	andi.l		&0x7fff,%d1		# strip sign
  11711	sub.l		%d0,%d1			# add scale factor
  11712	subi.l		&0x6000,%d1		# subtract bias
  11713	andi.w		&0x7fff,%d1		# clear sign bit
  11714	andi.w		&0x8000,%d2		# keep old sign
  11715	or.w		%d2,%d1			# concat old sign,new exp
  11716	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  11717	mov.l		(%sp)+,%d2		# restore d2
  11718	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  11719	bra.b		fmul_ovfl_dis
  11720
  11721fmul_ovfl_ena_sd:
  11722	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
  11723
  11724	mov.l		L_SCR3(%a6),%d1
  11725	andi.b		&0x30,%d1		# keep rnd mode only
  11726	fmov.l		%d1,%fpcr		# set FPCR
  11727
  11728	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
  11729
  11730	fmov.l		&0x0,%fpcr		# clear FPCR
  11731	bra.b		fmul_ovfl_ena_cont
  11732
  11733#
  11734# may OVERFLOW:
  11735# - the result of the multiply operation MAY overflow.
  11736# - do the multiply to the proper precision and rounding mode in order to
  11737# set the inexact bits.
  11738# - calculate the default result and return it in fp0.
  11739#
  11740fmul_may_ovfl:
  11741	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  11742
  11743	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  11744	fmov.l		&0x0,%fpsr		# clear FPSR
  11745
  11746	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
  11747
  11748	fmov.l		%fpsr,%d1		# save status
  11749	fmov.l		&0x0,%fpcr		# clear FPCR
  11750
  11751	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  11752
  11753	fabs.x		%fp0,%fp1		# make a copy of result
  11754	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
  11755	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
  11756
  11757# no, it didn't overflow; we have correct result
  11758	bra.w		fmul_normal_exit
  11759
  11760#
  11761# UNDERFLOW:
  11762# - the result of the multiply operation is an underflow.
  11763# - do the multiply to the proper precision and rounding mode in order to
  11764# set the inexact bits.
  11765# - calculate the default result and return it in fp0.
  11766# - if overflow or inexact is enabled, we need a multiply result rounded to
  11767# extended precision. if the original operation was extended, then we have this
  11768# result. if the original operation was single or double, we have to do another
  11769# multiply using extended precision and the correct rounding mode. the result
  11770# of this operation then has its exponent scaled by -0x6000 to create the
  11771# exceptional operand.
  11772#
  11773fmul_unfl:
  11774	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  11775
  11776# for fun, let's use only extended precision, round to zero. then, let
  11777# the unf_res() routine figure out all the rest.
  11778# will we get the correct answer.
  11779	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
  11780
  11781	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
  11782	fmov.l		&0x0,%fpsr		# clear FPSR
  11783
  11784	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
  11785
  11786	fmov.l		%fpsr,%d1		# save status
  11787	fmov.l		&0x0,%fpcr		# clear FPCR
  11788
  11789	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  11790
  11791	mov.b		FPCR_ENABLE(%a6),%d1
  11792	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  11793	bne.b		fmul_unfl_ena		# yes
  11794
  11795fmul_unfl_dis:
  11796	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  11797
  11798	lea		FP_SCR0(%a6),%a0	# pass: result addr
  11799	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  11800	bsr.l		unf_res			# calculate default result
  11801	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
  11802	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  11803	rts
  11804
  11805#
  11806# UNFL is enabled.
  11807#
  11808fmul_unfl_ena:
  11809	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
  11810
  11811	mov.l		L_SCR3(%a6),%d1
  11812	andi.b		&0xc0,%d1		# is precision extended?
  11813	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
  11814
  11815# if the rnd mode is anything but RZ, then we have to re-do the above
  11816# multiplication because we used RZ for all.
  11817	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  11818
  11819fmul_unfl_ena_cont:
  11820	fmov.l		&0x0,%fpsr		# clear FPSR
  11821
  11822	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
  11823
  11824	fmov.l		&0x0,%fpcr		# clear FPCR
  11825
  11826	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
  11827	mov.l		%d2,-(%sp)		# save d2
  11828	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  11829	mov.l		%d1,%d2			# make a copy
  11830	andi.l		&0x7fff,%d1		# strip sign
  11831	andi.w		&0x8000,%d2		# keep old sign
  11832	sub.l		%d0,%d1			# add scale factor
  11833	addi.l		&0x6000,%d1		# add bias
  11834	andi.w		&0x7fff,%d1
  11835	or.w		%d2,%d1			# concat old sign,new exp
  11836	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  11837	mov.l		(%sp)+,%d2		# restore d2
  11838	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  11839	bra.w		fmul_unfl_dis
  11840
  11841fmul_unfl_ena_sd:
  11842	mov.l		L_SCR3(%a6),%d1
  11843	andi.b		&0x30,%d1		# use only rnd mode
  11844	fmov.l		%d1,%fpcr		# set FPCR
  11845
  11846	bra.b		fmul_unfl_ena_cont
  11847
  11848# MAY UNDERFLOW:
  11849# -use the correct rounding mode and precision. this code favors operations
  11850# that do not underflow.
  11851fmul_may_unfl:
  11852	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
  11853
  11854	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  11855	fmov.l		&0x0,%fpsr		# clear FPSR
  11856
  11857	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
  11858
  11859	fmov.l		%fpsr,%d1		# save status
  11860	fmov.l		&0x0,%fpcr		# clear FPCR
  11861
  11862	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  11863
  11864	fabs.x		%fp0,%fp1		# make a copy of result
  11865	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
  11866	fbgt.w		fmul_normal_exit	# no; no underflow occurred
  11867	fblt.w		fmul_unfl		# yes; underflow occurred
  11868
  11869#
  11870# we still don't know if underflow occurred. result is ~ equal to 2. but,
  11871# we don't know if the result was an underflow that rounded up to a 2 or
  11872# a normalized number that rounded down to a 2. so, redo the entire operation
  11873# using RZ as the rounding mode to see what the pre-rounded result is.
  11874# this case should be relatively rare.
  11875#
  11876	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
  11877
  11878	mov.l		L_SCR3(%a6),%d1
  11879	andi.b		&0xc0,%d1		# keep rnd prec
  11880	ori.b		&rz_mode*0x10,%d1	# insert RZ
  11881
  11882	fmov.l		%d1,%fpcr		# set FPCR
  11883	fmov.l		&0x0,%fpsr		# clear FPSR
  11884
  11885	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
  11886
  11887	fmov.l		&0x0,%fpcr		# clear FPCR
  11888	fabs.x		%fp1			# make absolute value
  11889	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
  11890	fbge.w		fmul_normal_exit	# no; no underflow occurred
  11891	bra.w		fmul_unfl		# yes, underflow occurred
  11892
  11893################################################################################
  11894
  11895#
  11896# Multiply: inputs are not both normalized; what are they?
  11897#
  11898fmul_not_norm:
  11899	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
  11900	jmp		(tbl_fmul_op.b,%pc,%d1.w)
  11901
  11902	swbeg		&48
  11903tbl_fmul_op:
  11904	short		fmul_norm	- tbl_fmul_op # NORM x NORM
  11905	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
  11906	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
  11907	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
  11908	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
  11909	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
  11910	short		tbl_fmul_op	- tbl_fmul_op #
  11911	short		tbl_fmul_op	- tbl_fmul_op #
  11912
  11913	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
  11914	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
  11915	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
  11916	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
  11917	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
  11918	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
  11919	short		tbl_fmul_op	- tbl_fmul_op #
  11920	short		tbl_fmul_op	- tbl_fmul_op #
  11921
  11922	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
  11923	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
  11924	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
  11925	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
  11926	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
  11927	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
  11928	short		tbl_fmul_op	- tbl_fmul_op #
  11929	short		tbl_fmul_op	- tbl_fmul_op #
  11930
  11931	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
  11932	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
  11933	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
  11934	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
  11935	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
  11936	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
  11937	short		tbl_fmul_op	- tbl_fmul_op #
  11938	short		tbl_fmul_op	- tbl_fmul_op #
  11939
  11940	short		fmul_norm	- tbl_fmul_op # NORM x NORM
  11941	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
  11942	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
  11943	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
  11944	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
  11945	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
  11946	short		tbl_fmul_op	- tbl_fmul_op #
  11947	short		tbl_fmul_op	- tbl_fmul_op #
  11948
  11949	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
  11950	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
  11951	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
  11952	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
  11953	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
  11954	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
  11955	short		tbl_fmul_op	- tbl_fmul_op #
  11956	short		tbl_fmul_op	- tbl_fmul_op #
  11957
  11958fmul_res_operr:
  11959	bra.l		res_operr
  11960fmul_res_snan:
  11961	bra.l		res_snan
  11962fmul_res_qnan:
  11963	bra.l		res_qnan
  11964
  11965#
  11966# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
  11967#
  11968	global		fmul_zero		# global for fsglmul
  11969fmul_zero:
  11970	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
  11971	mov.b		DST_EX(%a1),%d1
  11972	eor.b		%d0,%d1
  11973	bpl.b		fmul_zero_p		# result ZERO is pos.
  11974fmul_zero_n:
  11975	fmov.s		&0x80000000,%fp0	# load -ZERO
  11976	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
  11977	rts
  11978fmul_zero_p:
  11979	fmov.s		&0x00000000,%fp0	# load +ZERO
  11980	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
  11981	rts
  11982
  11983#
  11984# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
  11985#
  11986# Note: The j-bit for an infinity is a don't-care. However, to be
  11987# strictly compatible w/ the 68881/882, we make sure to return an
  11988# INF w/ the j-bit set if the input INF j-bit was set. Destination
  11989# INFs take priority.
  11990#
  11991	global		fmul_inf_dst		# global for fsglmul
  11992fmul_inf_dst:
  11993	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
  11994	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
  11995	mov.b		DST_EX(%a1),%d1
  11996	eor.b		%d0,%d1
  11997	bpl.b		fmul_inf_dst_p		# result INF is pos.
  11998fmul_inf_dst_n:
  11999	fabs.x		%fp0			# clear result sign
  12000	fneg.x		%fp0			# set result sign
  12001	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
  12002	rts
  12003fmul_inf_dst_p:
  12004	fabs.x		%fp0			# clear result sign
  12005	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
  12006	rts
  12007
  12008	global		fmul_inf_src		# global for fsglmul
  12009fmul_inf_src:
  12010	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
  12011	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
  12012	mov.b		DST_EX(%a1),%d1
  12013	eor.b		%d0,%d1
  12014	bpl.b		fmul_inf_dst_p		# result INF is pos.
  12015	bra.b		fmul_inf_dst_n
  12016
  12017#########################################################################
  12018# XDEF ****************************************************************	#
  12019#	fin(): emulates the fmove instruction				#
  12020#	fsin(): emulates the fsmove instruction				#
  12021#	fdin(): emulates the fdmove instruction				#
  12022#									#
  12023# XREF ****************************************************************	#
  12024#	norm() - normalize mantissa for EXOP on denorm			#
  12025#	scale_to_zero_src() - scale src exponent to zero		#
  12026#	ovf_res() - return default overflow result			#
  12027#	unf_res() - return default underflow result			#
  12028#	res_qnan_1op() - return QNAN result				#
  12029#	res_snan_1op() - return SNAN result				#
  12030#									#
  12031# INPUT ***************************************************************	#
  12032#	a0 = pointer to extended precision source operand		#
  12033#	d0 = round prec/mode						#
  12034#									#
  12035# OUTPUT **************************************************************	#
  12036#	fp0 = result							#
  12037#	fp1 = EXOP (if exception occurred)				#
  12038#									#
  12039# ALGORITHM ***********************************************************	#
  12040#	Handle NANs, infinities, and zeroes as special cases. Divide	#
  12041# norms into extended, single, and double precision.			#
  12042#	Norms can be emulated w/ a regular fmove instruction. For	#
  12043# sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
  12044# if the result would have overflowed/underflowed. If so, use unf_res()	#
  12045# or ovf_res() to return the default result. Also return EXOP if	#
  12046# exception is enabled. If no exception, return the default result.	#
  12047#	Unnorms don't pass through here.				#
  12048#									#
  12049#########################################################################
  12050
  12051	global		fsin
  12052fsin:
  12053	andi.b		&0x30,%d0		# clear rnd prec
  12054	ori.b		&s_mode*0x10,%d0	# insert sgl precision
  12055	bra.b		fin
  12056
  12057	global		fdin
  12058fdin:
  12059	andi.b		&0x30,%d0		# clear rnd prec
  12060	ori.b		&d_mode*0x10,%d0	# insert dbl precision
  12061
  12062	global		fin
  12063fin:
  12064	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  12065
  12066	mov.b		STAG(%a6),%d1		# fetch src optype tag
  12067	bne.w		fin_not_norm		# optimize on non-norm input
  12068
  12069#
  12070# FP MOVE IN: NORMs and DENORMs ONLY!
  12071#
  12072fin_norm:
  12073	andi.b		&0xc0,%d0		# is precision extended?
  12074	bne.w		fin_not_ext		# no, so go handle dbl or sgl
  12075
  12076#
  12077# precision selected is extended. so...we cannot get an underflow
  12078# or overflow because of rounding to the correct precision. so...
  12079# skip the scaling and unscaling...
  12080#
  12081	tst.b		SRC_EX(%a0)		# is the operand negative?
  12082	bpl.b		fin_norm_done		# no
  12083	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
  12084fin_norm_done:
  12085	fmovm.x		SRC(%a0),&0x80		# return result in fp0
  12086	rts
  12087
  12088#
  12089# for an extended precision DENORM, the UNFL exception bit is set
  12090# the accrued bit is NOT set in this instance(no inexactness!)
  12091#
  12092fin_denorm:
  12093	andi.b		&0xc0,%d0		# is precision extended?
  12094	bne.w		fin_not_ext		# no, so go handle dbl or sgl
  12095
  12096	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  12097	tst.b		SRC_EX(%a0)		# is the operand negative?
  12098	bpl.b		fin_denorm_done		# no
  12099	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
  12100fin_denorm_done:
  12101	fmovm.x		SRC(%a0),&0x80		# return result in fp0
  12102	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
  12103	bne.b		fin_denorm_unfl_ena	# yes
  12104	rts
  12105
  12106#
  12107# the input is an extended DENORM and underflow is enabled in the FPCR.
  12108# normalize the mantissa and add the bias of 0x6000 to the resulting negative
  12109# exponent and insert back into the operand.
  12110#
  12111fin_denorm_unfl_ena:
  12112	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  12113	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  12114	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  12115	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
  12116	bsr.l		norm			# normalize result
  12117	neg.w		%d0			# new exponent = -(shft val)
  12118	addi.w		&0x6000,%d0		# add new bias to exponent
  12119	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
  12120	andi.w		&0x8000,%d1		# keep old sign
  12121	andi.w		&0x7fff,%d0		# clear sign position
  12122	or.w		%d1,%d0			# concat new exo,old sign
  12123	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
  12124	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  12125	rts
  12126
  12127#
  12128# operand is to be rounded to single or double precision
  12129#
  12130fin_not_ext:
  12131	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
  12132	bne.b		fin_dbl
  12133
  12134#
  12135# operand is to be rounded to single precision
  12136#
  12137fin_sgl:
  12138	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  12139	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  12140	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  12141	bsr.l		scale_to_zero_src	# calculate scale factor
  12142
  12143	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
  12144	bge.w		fin_sd_unfl		# yes; go handle underflow
  12145	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
  12146	beq.w		fin_sd_may_ovfl		# maybe; go check
  12147	blt.w		fin_sd_ovfl		# yes; go handle overflow
  12148
  12149#
  12150# operand will NOT overflow or underflow when moved into the fp reg file
  12151#
  12152fin_sd_normal:
  12153	fmov.l		&0x0,%fpsr		# clear FPSR
  12154	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  12155
  12156	fmov.x		FP_SCR0(%a6),%fp0	# perform move
  12157
  12158	fmov.l		%fpsr,%d1		# save FPSR
  12159	fmov.l		&0x0,%fpcr		# clear FPCR
  12160
  12161	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  12162
  12163fin_sd_normal_exit:
  12164	mov.l		%d2,-(%sp)		# save d2
  12165	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  12166	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
  12167	mov.w		%d1,%d2			# make a copy
  12168	andi.l		&0x7fff,%d1		# strip sign
  12169	sub.l		%d0,%d1			# add scale factor
  12170	andi.w		&0x8000,%d2		# keep old sign
  12171	or.w		%d1,%d2			# concat old sign,new exponent
  12172	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
  12173	mov.l		(%sp)+,%d2		# restore d2
  12174	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
  12175	rts
  12176
  12177#
  12178# operand is to be rounded to double precision
  12179#
  12180fin_dbl:
  12181	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  12182	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  12183	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  12184	bsr.l		scale_to_zero_src	# calculate scale factor
  12185
  12186	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
  12187	bge.w		fin_sd_unfl		# yes; go handle underflow
  12188	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
  12189	beq.w		fin_sd_may_ovfl		# maybe; go check
  12190	blt.w		fin_sd_ovfl		# yes; go handle overflow
  12191	bra.w		fin_sd_normal		# no; ho handle normalized op
  12192
  12193#
  12194# operand WILL underflow when moved in to the fp register file
  12195#
  12196fin_sd_unfl:
  12197	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  12198
  12199	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
  12200	bpl.b		fin_sd_unfl_tst
  12201	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
  12202
  12203# if underflow or inexact is enabled, then go calculate the EXOP first.
  12204fin_sd_unfl_tst:
  12205	mov.b		FPCR_ENABLE(%a6),%d1
  12206	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  12207	bne.b		fin_sd_unfl_ena		# yes
  12208
  12209fin_sd_unfl_dis:
  12210	lea		FP_SCR0(%a6),%a0	# pass: result addr
  12211	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  12212	bsr.l		unf_res			# calculate default result
  12213	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
  12214	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  12215	rts
  12216
  12217#
  12218# operand will underflow AND underflow or inexact is enabled.
  12219# Therefore, we must return the result rounded to extended precision.
  12220#
  12221fin_sd_unfl_ena:
  12222	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
  12223	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
  12224	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
  12225
  12226	mov.l		%d2,-(%sp)		# save d2
  12227	mov.w		%d1,%d2			# make a copy
  12228	andi.l		&0x7fff,%d1		# strip sign
  12229	sub.l		%d0,%d1			# subtract scale factor
  12230	andi.w		&0x8000,%d2		# extract old sign
  12231	addi.l		&0x6000,%d1		# add new bias
  12232	andi.w		&0x7fff,%d1
  12233	or.w		%d1,%d2			# concat old sign,new exp
  12234	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
  12235	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
  12236	mov.l		(%sp)+,%d2		# restore d2
  12237	bra.b		fin_sd_unfl_dis
  12238
  12239#
  12240# operand WILL overflow.
  12241#
  12242fin_sd_ovfl:
  12243	fmov.l		&0x0,%fpsr		# clear FPSR
  12244	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  12245
  12246	fmov.x		FP_SCR0(%a6),%fp0	# perform move
  12247
  12248	fmov.l		&0x0,%fpcr		# clear FPCR
  12249	fmov.l		%fpsr,%d1		# save FPSR
  12250
  12251	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  12252
  12253fin_sd_ovfl_tst:
  12254	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  12255
  12256	mov.b		FPCR_ENABLE(%a6),%d1
  12257	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  12258	bne.b		fin_sd_ovfl_ena		# yes
  12259
  12260#
  12261# OVFL is not enabled; therefore, we must create the default result by
  12262# calling ovf_res().
  12263#
  12264fin_sd_ovfl_dis:
  12265	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
  12266	sne		%d1			# set sign param accordingly
  12267	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
  12268	bsr.l		ovf_res			# calculate default result
  12269	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
  12270	fmovm.x		(%a0),&0x80		# return default result in fp0
  12271	rts
  12272
  12273#
  12274# OVFL is enabled.
  12275# the INEX2 bit has already been updated by the round to the correct precision.
  12276# now, round to extended(and don't alter the FPSR).
  12277#
  12278fin_sd_ovfl_ena:
  12279	mov.l		%d2,-(%sp)		# save d2
  12280	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  12281	mov.l		%d1,%d2			# make a copy
  12282	andi.l		&0x7fff,%d1		# strip sign
  12283	andi.w		&0x8000,%d2		# keep old sign
  12284	sub.l		%d0,%d1			# add scale factor
  12285	sub.l		&0x6000,%d1		# subtract bias
  12286	andi.w		&0x7fff,%d1
  12287	or.w		%d2,%d1
  12288	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  12289	mov.l		(%sp)+,%d2		# restore d2
  12290	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  12291	bra.b		fin_sd_ovfl_dis
  12292
  12293#
  12294# the move in MAY overflow. so...
  12295#
  12296fin_sd_may_ovfl:
  12297	fmov.l		&0x0,%fpsr		# clear FPSR
  12298	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  12299
  12300	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
  12301
  12302	fmov.l		%fpsr,%d1		# save status
  12303	fmov.l		&0x0,%fpcr		# clear FPCR
  12304
  12305	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  12306
  12307	fabs.x		%fp0,%fp1		# make a copy of result
  12308	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
  12309	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
  12310
  12311# no, it didn't overflow; we have correct result
  12312	bra.w		fin_sd_normal_exit
  12313
  12314##########################################################################
  12315
  12316#
  12317# operand is not a NORM: check its optype and branch accordingly
  12318#
  12319fin_not_norm:
  12320	cmpi.b		%d1,&DENORM		# weed out DENORM
  12321	beq.w		fin_denorm
  12322	cmpi.b		%d1,&SNAN		# weed out SNANs
  12323	beq.l		res_snan_1op
  12324	cmpi.b		%d1,&QNAN		# weed out QNANs
  12325	beq.l		res_qnan_1op
  12326
  12327#
  12328# do the fmove in; at this point, only possible ops are ZERO and INF.
  12329# use fmov to determine ccodes.
  12330# prec:mode should be zero at this point but it won't affect answer anyways.
  12331#
  12332	fmov.x		SRC(%a0),%fp0		# do fmove in
  12333	fmov.l		%fpsr,%d0		# no exceptions possible
  12334	rol.l		&0x8,%d0		# put ccodes in lo byte
  12335	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
  12336	rts
  12337
  12338#########################################################################
  12339# XDEF ****************************************************************	#
  12340#	fdiv(): emulates the fdiv instruction				#
  12341#	fsdiv(): emulates the fsdiv instruction				#
  12342#	fddiv(): emulates the fddiv instruction				#
  12343#									#
  12344# XREF ****************************************************************	#
  12345#	scale_to_zero_src() - scale src exponent to zero		#
  12346#	scale_to_zero_dst() - scale dst exponent to zero		#
  12347#	unf_res() - return default underflow result			#
  12348#	ovf_res() - return default overflow result			#
  12349#	res_qnan() - return QNAN result					#
  12350#	res_snan() - return SNAN result					#
  12351#									#
  12352# INPUT ***************************************************************	#
  12353#	a0 = pointer to extended precision source operand		#
  12354#	a1 = pointer to extended precision destination operand		#
  12355#	d0  rnd prec,mode						#
  12356#									#
  12357# OUTPUT **************************************************************	#
  12358#	fp0 = result							#
  12359#	fp1 = EXOP (if exception occurred)				#
  12360#									#
  12361# ALGORITHM ***********************************************************	#
  12362#	Handle NANs, infinities, and zeroes as special cases. Divide	#
  12363# norms/denorms into ext/sgl/dbl precision.				#
  12364#	For norms/denorms, scale the exponents such that a divide	#
  12365# instruction won't cause an exception. Use the regular fdiv to		#
  12366# compute a result. Check if the regular operands would have taken	#
  12367# an exception. If so, return the default overflow/underflow result	#
  12368# and return the EXOP if exceptions are enabled. Else, scale the	#
  12369# result operand to the proper exponent.				#
  12370#									#
  12371#########################################################################
  12372
  12373	align		0x10
  12374tbl_fdiv_unfl:
  12375	long		0x3fff - 0x0000		# ext_unfl
  12376	long		0x3fff - 0x3f81		# sgl_unfl
  12377	long		0x3fff - 0x3c01		# dbl_unfl
  12378
  12379tbl_fdiv_ovfl:
  12380	long		0x3fff - 0x7ffe		# ext overflow exponent
  12381	long		0x3fff - 0x407e		# sgl overflow exponent
  12382	long		0x3fff - 0x43fe		# dbl overflow exponent
  12383
  12384	global		fsdiv
  12385fsdiv:
  12386	andi.b		&0x30,%d0		# clear rnd prec
  12387	ori.b		&s_mode*0x10,%d0	# insert sgl prec
  12388	bra.b		fdiv
  12389
  12390	global		fddiv
  12391fddiv:
  12392	andi.b		&0x30,%d0		# clear rnd prec
  12393	ori.b		&d_mode*0x10,%d0	# insert dbl prec
  12394
  12395	global		fdiv
  12396fdiv:
  12397	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  12398
  12399	clr.w		%d1
  12400	mov.b		DTAG(%a6),%d1
  12401	lsl.b		&0x3,%d1
  12402	or.b		STAG(%a6),%d1		# combine src tags
  12403
  12404	bne.w		fdiv_not_norm		# optimize on non-norm input
  12405
  12406#
  12407# DIVIDE: NORMs and DENORMs ONLY!
  12408#
  12409fdiv_norm:
  12410	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
  12411	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
  12412	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
  12413
  12414	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  12415	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  12416	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  12417
  12418	bsr.l		scale_to_zero_src	# scale src exponent
  12419	mov.l		%d0,-(%sp)		# save scale factor 1
  12420
  12421	bsr.l		scale_to_zero_dst	# scale dst exponent
  12422
  12423	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
  12424	add.l		%d0,(%sp)
  12425
  12426	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
  12427	lsr.b		&0x6,%d1		# shift to lo bits
  12428	mov.l		(%sp)+,%d0		# load S.F.
  12429	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
  12430	ble.w		fdiv_may_ovfl		# result will overflow
  12431
  12432	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
  12433	beq.w		fdiv_may_unfl		# maybe
  12434	bgt.w		fdiv_unfl		# yes; go handle underflow
  12435
  12436fdiv_normal:
  12437	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  12438
  12439	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
  12440	fmov.l		&0x0,%fpsr		# clear FPSR
  12441
  12442	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
  12443
  12444	fmov.l		%fpsr,%d1		# save FPSR
  12445	fmov.l		&0x0,%fpcr		# clear FPCR
  12446
  12447	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  12448
  12449fdiv_normal_exit:
  12450	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
  12451	mov.l		%d2,-(%sp)		# store d2
  12452	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
  12453	mov.l		%d1,%d2			# make a copy
  12454	andi.l		&0x7fff,%d1		# strip sign
  12455	andi.w		&0x8000,%d2		# keep old sign
  12456	sub.l		%d0,%d1			# add scale factor
  12457	or.w		%d2,%d1			# concat old sign,new exp
  12458	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  12459	mov.l		(%sp)+,%d2		# restore d2
  12460	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
  12461	rts
  12462
  12463tbl_fdiv_ovfl2:
  12464	long		0x7fff
  12465	long		0x407f
  12466	long		0x43ff
  12467
  12468fdiv_no_ovfl:
  12469	mov.l		(%sp)+,%d0		# restore scale factor
  12470	bra.b		fdiv_normal_exit
  12471
  12472fdiv_may_ovfl:
  12473	mov.l		%d0,-(%sp)		# save scale factor
  12474
  12475	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  12476
  12477	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  12478	fmov.l		&0x0,%fpsr		# set FPSR
  12479
  12480	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
  12481
  12482	fmov.l		%fpsr,%d0
  12483	fmov.l		&0x0,%fpcr
  12484
  12485	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
  12486
  12487	fmovm.x		&0x01,-(%sp)		# save result to stack
  12488	mov.w		(%sp),%d0		# fetch new exponent
  12489	add.l		&0xc,%sp		# clear result from stack
  12490	andi.l		&0x7fff,%d0		# strip sign
  12491	sub.l		(%sp),%d0		# add scale factor
  12492	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
  12493	blt.b		fdiv_no_ovfl
  12494	mov.l		(%sp)+,%d0
  12495
  12496fdiv_ovfl_tst:
  12497	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  12498
  12499	mov.b		FPCR_ENABLE(%a6),%d1
  12500	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  12501	bne.b		fdiv_ovfl_ena		# yes
  12502
  12503fdiv_ovfl_dis:
  12504	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
  12505	sne		%d1			# set sign param accordingly
  12506	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
  12507	bsr.l		ovf_res			# calculate default result
  12508	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
  12509	fmovm.x		(%a0),&0x80		# return default result in fp0
  12510	rts
  12511
  12512fdiv_ovfl_ena:
  12513	mov.l		L_SCR3(%a6),%d1
  12514	andi.b		&0xc0,%d1		# is precision extended?
  12515	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
  12516
  12517fdiv_ovfl_ena_cont:
  12518	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
  12519
  12520	mov.l		%d2,-(%sp)		# save d2
  12521	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  12522	mov.w		%d1,%d2			# make a copy
  12523	andi.l		&0x7fff,%d1		# strip sign
  12524	sub.l		%d0,%d1			# add scale factor
  12525	subi.l		&0x6000,%d1		# subtract bias
  12526	andi.w		&0x7fff,%d1		# clear sign bit
  12527	andi.w		&0x8000,%d2		# keep old sign
  12528	or.w		%d2,%d1			# concat old sign,new exp
  12529	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  12530	mov.l		(%sp)+,%d2		# restore d2
  12531	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  12532	bra.b		fdiv_ovfl_dis
  12533
  12534fdiv_ovfl_ena_sd:
  12535	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
  12536
  12537	mov.l		L_SCR3(%a6),%d1
  12538	andi.b		&0x30,%d1		# keep rnd mode
  12539	fmov.l		%d1,%fpcr		# set FPCR
  12540
  12541	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
  12542
  12543	fmov.l		&0x0,%fpcr		# clear FPCR
  12544	bra.b		fdiv_ovfl_ena_cont
  12545
  12546fdiv_unfl:
  12547	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  12548
  12549	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  12550
  12551	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
  12552	fmov.l		&0x0,%fpsr		# clear FPSR
  12553
  12554	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
  12555
  12556	fmov.l		%fpsr,%d1		# save status
  12557	fmov.l		&0x0,%fpcr		# clear FPCR
  12558
  12559	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  12560
  12561	mov.b		FPCR_ENABLE(%a6),%d1
  12562	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  12563	bne.b		fdiv_unfl_ena		# yes
  12564
  12565fdiv_unfl_dis:
  12566	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  12567
  12568	lea		FP_SCR0(%a6),%a0	# pass: result addr
  12569	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  12570	bsr.l		unf_res			# calculate default result
  12571	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
  12572	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  12573	rts
  12574
  12575#
  12576# UNFL is enabled.
  12577#
  12578fdiv_unfl_ena:
  12579	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
  12580
  12581	mov.l		L_SCR3(%a6),%d1
  12582	andi.b		&0xc0,%d1		# is precision extended?
  12583	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
  12584
  12585	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  12586
  12587fdiv_unfl_ena_cont:
  12588	fmov.l		&0x0,%fpsr		# clear FPSR
  12589
  12590	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
  12591
  12592	fmov.l		&0x0,%fpcr		# clear FPCR
  12593
  12594	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
  12595	mov.l		%d2,-(%sp)		# save d2
  12596	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  12597	mov.l		%d1,%d2			# make a copy
  12598	andi.l		&0x7fff,%d1		# strip sign
  12599	andi.w		&0x8000,%d2		# keep old sign
  12600	sub.l		%d0,%d1			# add scale factoer
  12601	addi.l		&0x6000,%d1		# add bias
  12602	andi.w		&0x7fff,%d1
  12603	or.w		%d2,%d1			# concat old sign,new exp
  12604	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
  12605	mov.l		(%sp)+,%d2		# restore d2
  12606	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  12607	bra.w		fdiv_unfl_dis
  12608
  12609fdiv_unfl_ena_sd:
  12610	mov.l		L_SCR3(%a6),%d1
  12611	andi.b		&0x30,%d1		# use only rnd mode
  12612	fmov.l		%d1,%fpcr		# set FPCR
  12613
  12614	bra.b		fdiv_unfl_ena_cont
  12615
  12616#
  12617# the divide operation MAY underflow:
  12618#
  12619fdiv_may_unfl:
  12620	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  12621
  12622	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  12623	fmov.l		&0x0,%fpsr		# clear FPSR
  12624
  12625	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
  12626
  12627	fmov.l		%fpsr,%d1		# save status
  12628	fmov.l		&0x0,%fpcr		# clear FPCR
  12629
  12630	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  12631
  12632	fabs.x		%fp0,%fp1		# make a copy of result
  12633	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
  12634	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
  12635	fblt.w		fdiv_unfl		# yes; underflow occurred
  12636
  12637#
  12638# we still don't know if underflow occurred. result is ~ equal to 1. but,
  12639# we don't know if the result was an underflow that rounded up to a 1
  12640# or a normalized number that rounded down to a 1. so, redo the entire
  12641# operation using RZ as the rounding mode to see what the pre-rounded
  12642# result is. this case should be relatively rare.
  12643#
  12644	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
  12645
  12646	mov.l		L_SCR3(%a6),%d1
  12647	andi.b		&0xc0,%d1		# keep rnd prec
  12648	ori.b		&rz_mode*0x10,%d1	# insert RZ
  12649
  12650	fmov.l		%d1,%fpcr		# set FPCR
  12651	fmov.l		&0x0,%fpsr		# clear FPSR
  12652
  12653	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
  12654
  12655	fmov.l		&0x0,%fpcr		# clear FPCR
  12656	fabs.x		%fp1			# make absolute value
  12657	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
  12658	fbge.w		fdiv_normal_exit	# no; no underflow occurred
  12659	bra.w		fdiv_unfl		# yes; underflow occurred
  12660
  12661############################################################################
  12662
  12663#
  12664# Divide: inputs are not both normalized; what are they?
  12665#
  12666fdiv_not_norm:
  12667	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
  12668	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
  12669
  12670	swbeg		&48
  12671tbl_fdiv_op:
  12672	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
  12673	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
  12674	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
  12675	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
  12676	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
  12677	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
  12678	short		tbl_fdiv_op	- tbl_fdiv_op #
  12679	short		tbl_fdiv_op	- tbl_fdiv_op #
  12680
  12681	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
  12682	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
  12683	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
  12684	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
  12685	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
  12686	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
  12687	short		tbl_fdiv_op	- tbl_fdiv_op #
  12688	short		tbl_fdiv_op	- tbl_fdiv_op #
  12689
  12690	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
  12691	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
  12692	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
  12693	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
  12694	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
  12695	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
  12696	short		tbl_fdiv_op	- tbl_fdiv_op #
  12697	short		tbl_fdiv_op	- tbl_fdiv_op #
  12698
  12699	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
  12700	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
  12701	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
  12702	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
  12703	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
  12704	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
  12705	short		tbl_fdiv_op	- tbl_fdiv_op #
  12706	short		tbl_fdiv_op	- tbl_fdiv_op #
  12707
  12708	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
  12709	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
  12710	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
  12711	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
  12712	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
  12713	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
  12714	short		tbl_fdiv_op	- tbl_fdiv_op #
  12715	short		tbl_fdiv_op	- tbl_fdiv_op #
  12716
  12717	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
  12718	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
  12719	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
  12720	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
  12721	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
  12722	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
  12723	short		tbl_fdiv_op	- tbl_fdiv_op #
  12724	short		tbl_fdiv_op	- tbl_fdiv_op #
  12725
  12726fdiv_res_qnan:
  12727	bra.l		res_qnan
  12728fdiv_res_snan:
  12729	bra.l		res_snan
  12730fdiv_res_operr:
  12731	bra.l		res_operr
  12732
  12733	global		fdiv_zero_load		# global for fsgldiv
  12734fdiv_zero_load:
  12735	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
  12736	mov.b		DST_EX(%a1),%d1		# or of input signs.
  12737	eor.b		%d0,%d1
  12738	bpl.b		fdiv_zero_load_p	# result is positive
  12739	fmov.s		&0x80000000,%fp0	# load a -ZERO
  12740	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
  12741	rts
  12742fdiv_zero_load_p:
  12743	fmov.s		&0x00000000,%fp0	# load a +ZERO
  12744	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
  12745	rts
  12746
  12747#
  12748# The destination was In Range and the source was a ZERO. The result,
  12749# Therefore, is an INF w/ the proper sign.
  12750# So, determine the sign and return a new INF (w/ the j-bit cleared).
  12751#
  12752	global		fdiv_inf_load		# global for fsgldiv
  12753fdiv_inf_load:
  12754	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
  12755	mov.b		SRC_EX(%a0),%d0		# load both signs
  12756	mov.b		DST_EX(%a1),%d1
  12757	eor.b		%d0,%d1
  12758	bpl.b		fdiv_inf_load_p		# result is positive
  12759	fmov.s		&0xff800000,%fp0	# make result -INF
  12760	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
  12761	rts
  12762fdiv_inf_load_p:
  12763	fmov.s		&0x7f800000,%fp0	# make result +INF
  12764	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
  12765	rts
  12766
  12767#
  12768# The destination was an INF w/ an In Range or ZERO source, the result is
  12769# an INF w/ the proper sign.
  12770# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
  12771# dst INF is set, then then j-bit of the result INF is also set).
  12772#
  12773	global		fdiv_inf_dst		# global for fsgldiv
  12774fdiv_inf_dst:
  12775	mov.b		DST_EX(%a1),%d0		# load both signs
  12776	mov.b		SRC_EX(%a0),%d1
  12777	eor.b		%d0,%d1
  12778	bpl.b		fdiv_inf_dst_p		# result is positive
  12779
  12780	fmovm.x		DST(%a1),&0x80		# return result in fp0
  12781	fabs.x		%fp0			# clear sign bit
  12782	fneg.x		%fp0			# set sign bit
  12783	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
  12784	rts
  12785
  12786fdiv_inf_dst_p:
  12787	fmovm.x		DST(%a1),&0x80		# return result in fp0
  12788	fabs.x		%fp0			# return positive INF
  12789	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
  12790	rts
  12791
  12792#########################################################################
  12793# XDEF ****************************************************************	#
  12794#	fneg(): emulates the fneg instruction				#
  12795#	fsneg(): emulates the fsneg instruction				#
  12796#	fdneg(): emulates the fdneg instruction				#
  12797#									#
  12798# XREF ****************************************************************	#
  12799#	norm() - normalize a denorm to provide EXOP			#
  12800#	scale_to_zero_src() - scale sgl/dbl source exponent		#
  12801#	ovf_res() - return default overflow result			#
  12802#	unf_res() - return default underflow result			#
  12803#	res_qnan_1op() - return QNAN result				#
  12804#	res_snan_1op() - return SNAN result				#
  12805#									#
  12806# INPUT ***************************************************************	#
  12807#	a0 = pointer to extended precision source operand		#
  12808#	d0 = rnd prec,mode						#
  12809#									#
  12810# OUTPUT **************************************************************	#
  12811#	fp0 = result							#
  12812#	fp1 = EXOP (if exception occurred)				#
  12813#									#
  12814# ALGORITHM ***********************************************************	#
  12815#	Handle NANs, zeroes, and infinities as special cases. Separate	#
  12816# norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
  12817# emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
  12818# and an actual fneg performed to see if overflow/underflow would have	#
  12819# occurred. If so, return default underflow/overflow result. Else,	#
  12820# scale the result exponent and return result. FPSR gets set based on	#
  12821# the result value.							#
  12822#									#
  12823#########################################################################
  12824
  12825	global		fsneg
  12826fsneg:
  12827	andi.b		&0x30,%d0		# clear rnd prec
  12828	ori.b		&s_mode*0x10,%d0	# insert sgl precision
  12829	bra.b		fneg
  12830
  12831	global		fdneg
  12832fdneg:
  12833	andi.b		&0x30,%d0		# clear rnd prec
  12834	ori.b		&d_mode*0x10,%d0	# insert dbl prec
  12835
  12836	global		fneg
  12837fneg:
  12838	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  12839	mov.b		STAG(%a6),%d1
  12840	bne.w		fneg_not_norm		# optimize on non-norm input
  12841
  12842#
  12843# NEGATE SIGN : norms and denorms ONLY!
  12844#
  12845fneg_norm:
  12846	andi.b		&0xc0,%d0		# is precision extended?
  12847	bne.w		fneg_not_ext		# no; go handle sgl or dbl
  12848
  12849#
  12850# precision selected is extended. so...we can not get an underflow
  12851# or overflow because of rounding to the correct precision. so...
  12852# skip the scaling and unscaling...
  12853#
  12854	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  12855	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  12856	mov.w		SRC_EX(%a0),%d0
  12857	eori.w		&0x8000,%d0		# negate sign
  12858	bpl.b		fneg_norm_load		# sign is positive
  12859	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
  12860fneg_norm_load:
  12861	mov.w		%d0,FP_SCR0_EX(%a6)
  12862	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
  12863	rts
  12864
  12865#
  12866# for an extended precision DENORM, the UNFL exception bit is set
  12867# the accrued bit is NOT set in this instance(no inexactness!)
  12868#
  12869fneg_denorm:
  12870	andi.b		&0xc0,%d0		# is precision extended?
  12871	bne.b		fneg_not_ext		# no; go handle sgl or dbl
  12872
  12873	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  12874
  12875	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  12876	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  12877	mov.w		SRC_EX(%a0),%d0
  12878	eori.w		&0x8000,%d0		# negate sign
  12879	bpl.b		fneg_denorm_done	# no
  12880	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
  12881fneg_denorm_done:
  12882	mov.w		%d0,FP_SCR0_EX(%a6)
  12883	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  12884
  12885	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
  12886	bne.b		fneg_ext_unfl_ena	# yes
  12887	rts
  12888
  12889#
  12890# the input is an extended DENORM and underflow is enabled in the FPCR.
  12891# normalize the mantissa and add the bias of 0x6000 to the resulting negative
  12892# exponent and insert back into the operand.
  12893#
  12894fneg_ext_unfl_ena:
  12895	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
  12896	bsr.l		norm			# normalize result
  12897	neg.w		%d0			# new exponent = -(shft val)
  12898	addi.w		&0x6000,%d0		# add new bias to exponent
  12899	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
  12900	andi.w		&0x8000,%d1		# keep old sign
  12901	andi.w		&0x7fff,%d0		# clear sign position
  12902	or.w		%d1,%d0			# concat old sign, new exponent
  12903	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
  12904	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  12905	rts
  12906
  12907#
  12908# operand is either single or double
  12909#
  12910fneg_not_ext:
  12911	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
  12912	bne.b		fneg_dbl
  12913
  12914#
  12915# operand is to be rounded to single precision
  12916#
  12917fneg_sgl:
  12918	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  12919	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  12920	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  12921	bsr.l		scale_to_zero_src	# calculate scale factor
  12922
  12923	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
  12924	bge.w		fneg_sd_unfl		# yes; go handle underflow
  12925	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
  12926	beq.w		fneg_sd_may_ovfl	# maybe; go check
  12927	blt.w		fneg_sd_ovfl		# yes; go handle overflow
  12928
  12929#
  12930# operand will NOT overflow or underflow when moved in to the fp reg file
  12931#
  12932fneg_sd_normal:
  12933	fmov.l		&0x0,%fpsr		# clear FPSR
  12934	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  12935
  12936	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
  12937
  12938	fmov.l		%fpsr,%d1		# save FPSR
  12939	fmov.l		&0x0,%fpcr		# clear FPCR
  12940
  12941	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  12942
  12943fneg_sd_normal_exit:
  12944	mov.l		%d2,-(%sp)		# save d2
  12945	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  12946	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
  12947	mov.w		%d1,%d2			# make a copy
  12948	andi.l		&0x7fff,%d1		# strip sign
  12949	sub.l		%d0,%d1			# add scale factor
  12950	andi.w		&0x8000,%d2		# keep old sign
  12951	or.w		%d1,%d2			# concat old sign,new exp
  12952	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
  12953	mov.l		(%sp)+,%d2		# restore d2
  12954	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
  12955	rts
  12956
  12957#
  12958# operand is to be rounded to double precision
  12959#
  12960fneg_dbl:
  12961	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  12962	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  12963	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  12964	bsr.l		scale_to_zero_src	# calculate scale factor
  12965
  12966	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
  12967	bge.b		fneg_sd_unfl		# yes; go handle underflow
  12968	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
  12969	beq.w		fneg_sd_may_ovfl	# maybe; go check
  12970	blt.w		fneg_sd_ovfl		# yes; go handle overflow
  12971	bra.w		fneg_sd_normal		# no; ho handle normalized op
  12972
  12973#
  12974# operand WILL underflow when moved in to the fp register file
  12975#
  12976fneg_sd_unfl:
  12977	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  12978
  12979	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
  12980	bpl.b		fneg_sd_unfl_tst
  12981	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
  12982
  12983# if underflow or inexact is enabled, go calculate EXOP first.
  12984fneg_sd_unfl_tst:
  12985	mov.b		FPCR_ENABLE(%a6),%d1
  12986	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  12987	bne.b		fneg_sd_unfl_ena	# yes
  12988
  12989fneg_sd_unfl_dis:
  12990	lea		FP_SCR0(%a6),%a0	# pass: result addr
  12991	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  12992	bsr.l		unf_res			# calculate default result
  12993	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
  12994	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  12995	rts
  12996
  12997#
  12998# operand will underflow AND underflow is enabled.
  12999# Therefore, we must return the result rounded to extended precision.
  13000#
  13001fneg_sd_unfl_ena:
  13002	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
  13003	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
  13004	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
  13005
  13006	mov.l		%d2,-(%sp)		# save d2
  13007	mov.l		%d1,%d2			# make a copy
  13008	andi.l		&0x7fff,%d1		# strip sign
  13009	andi.w		&0x8000,%d2		# keep old sign
  13010	sub.l		%d0,%d1			# subtract scale factor
  13011	addi.l		&0x6000,%d1		# add new bias
  13012	andi.w		&0x7fff,%d1
  13013	or.w		%d2,%d1			# concat new sign,new exp
  13014	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
  13015	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
  13016	mov.l		(%sp)+,%d2		# restore d2
  13017	bra.b		fneg_sd_unfl_dis
  13018
  13019#
  13020# operand WILL overflow.
  13021#
  13022fneg_sd_ovfl:
  13023	fmov.l		&0x0,%fpsr		# clear FPSR
  13024	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  13025
  13026	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
  13027
  13028	fmov.l		&0x0,%fpcr		# clear FPCR
  13029	fmov.l		%fpsr,%d1		# save FPSR
  13030
  13031	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  13032
  13033fneg_sd_ovfl_tst:
  13034	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  13035
  13036	mov.b		FPCR_ENABLE(%a6),%d1
  13037	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  13038	bne.b		fneg_sd_ovfl_ena	# yes
  13039
  13040#
  13041# OVFL is not enabled; therefore, we must create the default result by
  13042# calling ovf_res().
  13043#
  13044fneg_sd_ovfl_dis:
  13045	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
  13046	sne		%d1			# set sign param accordingly
  13047	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
  13048	bsr.l		ovf_res			# calculate default result
  13049	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
  13050	fmovm.x		(%a0),&0x80		# return default result in fp0
  13051	rts
  13052
  13053#
  13054# OVFL is enabled.
  13055# the INEX2 bit has already been updated by the round to the correct precision.
  13056# now, round to extended(and don't alter the FPSR).
  13057#
  13058fneg_sd_ovfl_ena:
  13059	mov.l		%d2,-(%sp)		# save d2
  13060	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  13061	mov.l		%d1,%d2			# make a copy
  13062	andi.l		&0x7fff,%d1		# strip sign
  13063	andi.w		&0x8000,%d2		# keep old sign
  13064	sub.l		%d0,%d1			# add scale factor
  13065	subi.l		&0x6000,%d1		# subtract bias
  13066	andi.w		&0x7fff,%d1
  13067	or.w		%d2,%d1			# concat sign,exp
  13068	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  13069	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  13070	mov.l		(%sp)+,%d2		# restore d2
  13071	bra.b		fneg_sd_ovfl_dis
  13072
  13073#
  13074# the move in MAY underflow. so...
  13075#
  13076fneg_sd_may_ovfl:
  13077	fmov.l		&0x0,%fpsr		# clear FPSR
  13078	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  13079
  13080	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
  13081
  13082	fmov.l		%fpsr,%d1		# save status
  13083	fmov.l		&0x0,%fpcr		# clear FPCR
  13084
  13085	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  13086
  13087	fabs.x		%fp0,%fp1		# make a copy of result
  13088	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
  13089	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
  13090
  13091# no, it didn't overflow; we have correct result
  13092	bra.w		fneg_sd_normal_exit
  13093
  13094##########################################################################
  13095
  13096#
  13097# input is not normalized; what is it?
  13098#
  13099fneg_not_norm:
  13100	cmpi.b		%d1,&DENORM		# weed out DENORM
  13101	beq.w		fneg_denorm
  13102	cmpi.b		%d1,&SNAN		# weed out SNAN
  13103	beq.l		res_snan_1op
  13104	cmpi.b		%d1,&QNAN		# weed out QNAN
  13105	beq.l		res_qnan_1op
  13106
  13107#
  13108# do the fneg; at this point, only possible ops are ZERO and INF.
  13109# use fneg to determine ccodes.
  13110# prec:mode should be zero at this point but it won't affect answer anyways.
  13111#
  13112	fneg.x		SRC_EX(%a0),%fp0	# do fneg
  13113	fmov.l		%fpsr,%d0
  13114	rol.l		&0x8,%d0		# put ccodes in lo byte
  13115	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
  13116	rts
  13117
  13118#########################################################################
  13119# XDEF ****************************************************************	#
  13120#	ftst(): emulates the ftest instruction				#
  13121#									#
  13122# XREF ****************************************************************	#
  13123#	res{s,q}nan_1op() - set NAN result for monadic instruction	#
  13124#									#
  13125# INPUT ***************************************************************	#
  13126#	a0 = pointer to extended precision source operand		#
  13127#									#
  13128# OUTPUT **************************************************************	#
  13129#	none								#
  13130#									#
  13131# ALGORITHM ***********************************************************	#
  13132#	Check the source operand tag (STAG) and set the FPCR according	#
  13133# to the operand type and sign.						#
  13134#									#
  13135#########################################################################
  13136
  13137	global		ftst
  13138ftst:
  13139	mov.b		STAG(%a6),%d1
  13140	bne.b		ftst_not_norm		# optimize on non-norm input
  13141
  13142#
  13143# Norm:
  13144#
  13145ftst_norm:
  13146	tst.b		SRC_EX(%a0)		# is operand negative?
  13147	bmi.b		ftst_norm_m		# yes
  13148	rts
  13149ftst_norm_m:
  13150	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
  13151	rts
  13152
  13153#
  13154# input is not normalized; what is it?
  13155#
  13156ftst_not_norm:
  13157	cmpi.b		%d1,&ZERO		# weed out ZERO
  13158	beq.b		ftst_zero
  13159	cmpi.b		%d1,&INF		# weed out INF
  13160	beq.b		ftst_inf
  13161	cmpi.b		%d1,&SNAN		# weed out SNAN
  13162	beq.l		res_snan_1op
  13163	cmpi.b		%d1,&QNAN		# weed out QNAN
  13164	beq.l		res_qnan_1op
  13165
  13166#
  13167# Denorm:
  13168#
  13169ftst_denorm:
  13170	tst.b		SRC_EX(%a0)		# is operand negative?
  13171	bmi.b		ftst_denorm_m		# yes
  13172	rts
  13173ftst_denorm_m:
  13174	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
  13175	rts
  13176
  13177#
  13178# Infinity:
  13179#
  13180ftst_inf:
  13181	tst.b		SRC_EX(%a0)		# is operand negative?
  13182	bmi.b		ftst_inf_m		# yes
  13183ftst_inf_p:
  13184	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
  13185	rts
  13186ftst_inf_m:
  13187	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
  13188	rts
  13189
  13190#
  13191# Zero:
  13192#
  13193ftst_zero:
  13194	tst.b		SRC_EX(%a0)		# is operand negative?
  13195	bmi.b		ftst_zero_m		# yes
  13196ftst_zero_p:
  13197	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
  13198	rts
  13199ftst_zero_m:
  13200	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
  13201	rts
  13202
  13203#########################################################################
  13204# XDEF ****************************************************************	#
  13205#	fint(): emulates the fint instruction				#
  13206#									#
  13207# XREF ****************************************************************	#
  13208#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
  13209#									#
  13210# INPUT ***************************************************************	#
  13211#	a0 = pointer to extended precision source operand		#
  13212#	d0 = round precision/mode					#
  13213#									#
  13214# OUTPUT **************************************************************	#
  13215#	fp0 = result							#
  13216#									#
  13217# ALGORITHM ***********************************************************	#
  13218#	Separate according to operand type. Unnorms don't pass through	#
  13219# here. For norms, load the rounding mode/prec, execute a "fint", then	#
  13220# store the resulting FPSR bits.					#
  13221#	For denorms, force the j-bit to a one and do the same as for	#
  13222# norms. Denorms are so low that the answer will either be a zero or a	#
  13223# one.									#
  13224#	For zeroes/infs/NANs, return the same while setting the FPSR	#
  13225# as appropriate.							#
  13226#									#
  13227#########################################################################
  13228
  13229	global		fint
  13230fint:
  13231	mov.b		STAG(%a6),%d1
  13232	bne.b		fint_not_norm		# optimize on non-norm input
  13233
  13234#
  13235# Norm:
  13236#
  13237fint_norm:
  13238	andi.b		&0x30,%d0		# set prec = ext
  13239
  13240	fmov.l		%d0,%fpcr		# set FPCR
  13241	fmov.l		&0x0,%fpsr		# clear FPSR
  13242
  13243	fint.x		SRC(%a0),%fp0		# execute fint
  13244
  13245	fmov.l		&0x0,%fpcr		# clear FPCR
  13246	fmov.l		%fpsr,%d0		# save FPSR
  13247	or.l		%d0,USER_FPSR(%a6)	# set exception bits
  13248
  13249	rts
  13250
  13251#
  13252# input is not normalized; what is it?
  13253#
  13254fint_not_norm:
  13255	cmpi.b		%d1,&ZERO		# weed out ZERO
  13256	beq.b		fint_zero
  13257	cmpi.b		%d1,&INF		# weed out INF
  13258	beq.b		fint_inf
  13259	cmpi.b		%d1,&DENORM		# weed out DENORM
  13260	beq.b		fint_denorm
  13261	cmpi.b		%d1,&SNAN		# weed out SNAN
  13262	beq.l		res_snan_1op
  13263	bra.l		res_qnan_1op		# weed out QNAN
  13264
  13265#
  13266# Denorm:
  13267#
  13268# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
  13269# also, the INEX2 and AINEX exception bits will be set.
  13270# so, we could either set these manually or force the DENORM
  13271# to a very small NORM and ship it to the NORM routine.
  13272# I do the latter.
  13273#
  13274fint_denorm:
  13275	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
  13276	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
  13277	lea		FP_SCR0(%a6),%a0
  13278	bra.b		fint_norm
  13279
  13280#
  13281# Zero:
  13282#
  13283fint_zero:
  13284	tst.b		SRC_EX(%a0)		# is ZERO negative?
  13285	bmi.b		fint_zero_m		# yes
  13286fint_zero_p:
  13287	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
  13288	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
  13289	rts
  13290fint_zero_m:
  13291	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
  13292	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
  13293	rts
  13294
  13295#
  13296# Infinity:
  13297#
  13298fint_inf:
  13299	fmovm.x		SRC(%a0),&0x80		# return result in fp0
  13300	tst.b		SRC_EX(%a0)		# is INF negative?
  13301	bmi.b		fint_inf_m		# yes
  13302fint_inf_p:
  13303	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
  13304	rts
  13305fint_inf_m:
  13306	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
  13307	rts
  13308
  13309#########################################################################
  13310# XDEF ****************************************************************	#
  13311#	fintrz(): emulates the fintrz instruction			#
  13312#									#
  13313# XREF ****************************************************************	#
  13314#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
  13315#									#
  13316# INPUT ***************************************************************	#
  13317#	a0 = pointer to extended precision source operand		#
  13318#	d0 = round precision/mode					#
  13319#									#
  13320# OUTPUT **************************************************************	#
  13321#	fp0 = result							#
  13322#									#
  13323# ALGORITHM ***********************************************************	#
  13324#	Separate according to operand type. Unnorms don't pass through	#
  13325# here. For norms, load the rounding mode/prec, execute a "fintrz",	#
  13326# then store the resulting FPSR bits.					#
  13327#	For denorms, force the j-bit to a one and do the same as for	#
  13328# norms. Denorms are so low that the answer will either be a zero or a	#
  13329# one.									#
  13330#	For zeroes/infs/NANs, return the same while setting the FPSR	#
  13331# as appropriate.							#
  13332#									#
  13333#########################################################################
  13334
  13335	global		fintrz
  13336fintrz:
  13337	mov.b		STAG(%a6),%d1
  13338	bne.b		fintrz_not_norm		# optimize on non-norm input
  13339
  13340#
  13341# Norm:
  13342#
  13343fintrz_norm:
  13344	fmov.l		&0x0,%fpsr		# clear FPSR
  13345
  13346	fintrz.x	SRC(%a0),%fp0		# execute fintrz
  13347
  13348	fmov.l		%fpsr,%d0		# save FPSR
  13349	or.l		%d0,USER_FPSR(%a6)	# set exception bits
  13350
  13351	rts
  13352
  13353#
  13354# input is not normalized; what is it?
  13355#
  13356fintrz_not_norm:
  13357	cmpi.b		%d1,&ZERO		# weed out ZERO
  13358	beq.b		fintrz_zero
  13359	cmpi.b		%d1,&INF		# weed out INF
  13360	beq.b		fintrz_inf
  13361	cmpi.b		%d1,&DENORM		# weed out DENORM
  13362	beq.b		fintrz_denorm
  13363	cmpi.b		%d1,&SNAN		# weed out SNAN
  13364	beq.l		res_snan_1op
  13365	bra.l		res_qnan_1op		# weed out QNAN
  13366
  13367#
  13368# Denorm:
  13369#
  13370# for DENORMs, the result will be (+/-)ZERO.
  13371# also, the INEX2 and AINEX exception bits will be set.
  13372# so, we could either set these manually or force the DENORM
  13373# to a very small NORM and ship it to the NORM routine.
  13374# I do the latter.
  13375#
  13376fintrz_denorm:
  13377	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
  13378	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
  13379	lea		FP_SCR0(%a6),%a0
  13380	bra.b		fintrz_norm
  13381
  13382#
  13383# Zero:
  13384#
  13385fintrz_zero:
  13386	tst.b		SRC_EX(%a0)		# is ZERO negative?
  13387	bmi.b		fintrz_zero_m		# yes
  13388fintrz_zero_p:
  13389	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
  13390	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
  13391	rts
  13392fintrz_zero_m:
  13393	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
  13394	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
  13395	rts
  13396
  13397#
  13398# Infinity:
  13399#
  13400fintrz_inf:
  13401	fmovm.x		SRC(%a0),&0x80		# return result in fp0
  13402	tst.b		SRC_EX(%a0)		# is INF negative?
  13403	bmi.b		fintrz_inf_m		# yes
  13404fintrz_inf_p:
  13405	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
  13406	rts
  13407fintrz_inf_m:
  13408	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
  13409	rts
  13410
  13411#########################################################################
  13412# XDEF ****************************************************************	#
  13413#	fabs():  emulates the fabs instruction				#
  13414#	fsabs(): emulates the fsabs instruction				#
  13415#	fdabs(): emulates the fdabs instruction				#
  13416#									#
  13417# XREF **************************************************************** #
  13418#	norm() - normalize denorm mantissa to provide EXOP		#
  13419#	scale_to_zero_src() - make exponent. = 0; get scale factor	#
  13420#	unf_res() - calculate underflow result				#
  13421#	ovf_res() - calculate overflow result				#
  13422#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
  13423#									#
  13424# INPUT *************************************************************** #
  13425#	a0 = pointer to extended precision source operand		#
  13426#	d0 = rnd precision/mode						#
  13427#									#
  13428# OUTPUT ************************************************************** #
  13429#	fp0 = result							#
  13430#	fp1 = EXOP (if exception occurred)				#
  13431#									#
  13432# ALGORITHM ***********************************************************	#
  13433#	Handle NANs, infinities, and zeroes as special cases. Divide	#
  13434# norms into extended, single, and double precision.			#
  13435#	Simply clear sign for extended precision norm. Ext prec denorm	#
  13436# gets an EXOP created for it since it's an underflow.			#
  13437#	Double and single precision can overflow and underflow. First,	#
  13438# scale the operand such that the exponent is zero. Perform an "fabs"	#
  13439# using the correct rnd mode/prec. Check to see if the original		#
  13440# exponent would take an exception. If so, use unf_res() or ovf_res()	#
  13441# to calculate the default result. Also, create the EXOP for the	#
  13442# exceptional case. If no exception should occur, insert the correct	#
  13443# result exponent and return.						#
  13444#	Unnorms don't pass through here.				#
  13445#									#
  13446#########################################################################
  13447
  13448	global		fsabs
  13449fsabs:
  13450	andi.b		&0x30,%d0		# clear rnd prec
  13451	ori.b		&s_mode*0x10,%d0	# insert sgl precision
  13452	bra.b		fabs
  13453
  13454	global		fdabs
  13455fdabs:
  13456	andi.b		&0x30,%d0		# clear rnd prec
  13457	ori.b		&d_mode*0x10,%d0	# insert dbl precision
  13458
  13459	global		fabs
  13460fabs:
  13461	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  13462	mov.b		STAG(%a6),%d1
  13463	bne.w		fabs_not_norm		# optimize on non-norm input
  13464
  13465#
  13466# ABSOLUTE VALUE: norms and denorms ONLY!
  13467#
  13468fabs_norm:
  13469	andi.b		&0xc0,%d0		# is precision extended?
  13470	bne.b		fabs_not_ext		# no; go handle sgl or dbl
  13471
  13472#
  13473# precision selected is extended. so...we can not get an underflow
  13474# or overflow because of rounding to the correct precision. so...
  13475# skip the scaling and unscaling...
  13476#
  13477	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  13478	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  13479	mov.w		SRC_EX(%a0),%d1
  13480	bclr		&15,%d1			# force absolute value
  13481	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
  13482	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
  13483	rts
  13484
  13485#
  13486# for an extended precision DENORM, the UNFL exception bit is set
  13487# the accrued bit is NOT set in this instance(no inexactness!)
  13488#
  13489fabs_denorm:
  13490	andi.b		&0xc0,%d0		# is precision extended?
  13491	bne.b		fabs_not_ext		# no
  13492
  13493	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  13494
  13495	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  13496	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  13497	mov.w		SRC_EX(%a0),%d0
  13498	bclr		&15,%d0			# clear sign
  13499	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
  13500
  13501	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  13502
  13503	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
  13504	bne.b		fabs_ext_unfl_ena
  13505	rts
  13506
  13507#
  13508# the input is an extended DENORM and underflow is enabled in the FPCR.
  13509# normalize the mantissa and add the bias of 0x6000 to the resulting negative
  13510# exponent and insert back into the operand.
  13511#
  13512fabs_ext_unfl_ena:
  13513	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
  13514	bsr.l		norm			# normalize result
  13515	neg.w		%d0			# new exponent = -(shft val)
  13516	addi.w		&0x6000,%d0		# add new bias to exponent
  13517	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
  13518	andi.w		&0x8000,%d1		# keep old sign
  13519	andi.w		&0x7fff,%d0		# clear sign position
  13520	or.w		%d1,%d0			# concat old sign, new exponent
  13521	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
  13522	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  13523	rts
  13524
  13525#
  13526# operand is either single or double
  13527#
  13528fabs_not_ext:
  13529	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
  13530	bne.b		fabs_dbl
  13531
  13532#
  13533# operand is to be rounded to single precision
  13534#
  13535fabs_sgl:
  13536	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  13537	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  13538	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  13539	bsr.l		scale_to_zero_src	# calculate scale factor
  13540
  13541	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
  13542	bge.w		fabs_sd_unfl		# yes; go handle underflow
  13543	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
  13544	beq.w		fabs_sd_may_ovfl	# maybe; go check
  13545	blt.w		fabs_sd_ovfl		# yes; go handle overflow
  13546
  13547#
  13548# operand will NOT overflow or underflow when moved in to the fp reg file
  13549#
  13550fabs_sd_normal:
  13551	fmov.l		&0x0,%fpsr		# clear FPSR
  13552	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  13553
  13554	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
  13555
  13556	fmov.l		%fpsr,%d1		# save FPSR
  13557	fmov.l		&0x0,%fpcr		# clear FPCR
  13558
  13559	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  13560
  13561fabs_sd_normal_exit:
  13562	mov.l		%d2,-(%sp)		# save d2
  13563	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  13564	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
  13565	mov.l		%d1,%d2			# make a copy
  13566	andi.l		&0x7fff,%d1		# strip sign
  13567	sub.l		%d0,%d1			# add scale factor
  13568	andi.w		&0x8000,%d2		# keep old sign
  13569	or.w		%d1,%d2			# concat old sign,new exp
  13570	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
  13571	mov.l		(%sp)+,%d2		# restore d2
  13572	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
  13573	rts
  13574
  13575#
  13576# operand is to be rounded to double precision
  13577#
  13578fabs_dbl:
  13579	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  13580	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  13581	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  13582	bsr.l		scale_to_zero_src	# calculate scale factor
  13583
  13584	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
  13585	bge.b		fabs_sd_unfl		# yes; go handle underflow
  13586	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
  13587	beq.w		fabs_sd_may_ovfl	# maybe; go check
  13588	blt.w		fabs_sd_ovfl		# yes; go handle overflow
  13589	bra.w		fabs_sd_normal		# no; ho handle normalized op
  13590
  13591#
  13592# operand WILL underflow when moved in to the fp register file
  13593#
  13594fabs_sd_unfl:
  13595	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  13596
  13597	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
  13598
  13599# if underflow or inexact is enabled, go calculate EXOP first.
  13600	mov.b		FPCR_ENABLE(%a6),%d1
  13601	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  13602	bne.b		fabs_sd_unfl_ena	# yes
  13603
  13604fabs_sd_unfl_dis:
  13605	lea		FP_SCR0(%a6),%a0	# pass: result addr
  13606	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  13607	bsr.l		unf_res			# calculate default result
  13608	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
  13609	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  13610	rts
  13611
  13612#
  13613# operand will underflow AND underflow is enabled.
  13614# Therefore, we must return the result rounded to extended precision.
  13615#
  13616fabs_sd_unfl_ena:
  13617	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
  13618	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
  13619	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
  13620
  13621	mov.l		%d2,-(%sp)		# save d2
  13622	mov.l		%d1,%d2			# make a copy
  13623	andi.l		&0x7fff,%d1		# strip sign
  13624	andi.w		&0x8000,%d2		# keep old sign
  13625	sub.l		%d0,%d1			# subtract scale factor
  13626	addi.l		&0x6000,%d1		# add new bias
  13627	andi.w		&0x7fff,%d1
  13628	or.w		%d2,%d1			# concat new sign,new exp
  13629	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
  13630	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
  13631	mov.l		(%sp)+,%d2		# restore d2
  13632	bra.b		fabs_sd_unfl_dis
  13633
  13634#
  13635# operand WILL overflow.
  13636#
  13637fabs_sd_ovfl:
  13638	fmov.l		&0x0,%fpsr		# clear FPSR
  13639	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  13640
  13641	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
  13642
  13643	fmov.l		&0x0,%fpcr		# clear FPCR
  13644	fmov.l		%fpsr,%d1		# save FPSR
  13645
  13646	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  13647
  13648fabs_sd_ovfl_tst:
  13649	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  13650
  13651	mov.b		FPCR_ENABLE(%a6),%d1
  13652	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  13653	bne.b		fabs_sd_ovfl_ena	# yes
  13654
  13655#
  13656# OVFL is not enabled; therefore, we must create the default result by
  13657# calling ovf_res().
  13658#
  13659fabs_sd_ovfl_dis:
  13660	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
  13661	sne		%d1			# set sign param accordingly
  13662	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
  13663	bsr.l		ovf_res			# calculate default result
  13664	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
  13665	fmovm.x		(%a0),&0x80		# return default result in fp0
  13666	rts
  13667
  13668#
  13669# OVFL is enabled.
  13670# the INEX2 bit has already been updated by the round to the correct precision.
  13671# now, round to extended(and don't alter the FPSR).
  13672#
  13673fabs_sd_ovfl_ena:
  13674	mov.l		%d2,-(%sp)		# save d2
  13675	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  13676	mov.l		%d1,%d2			# make a copy
  13677	andi.l		&0x7fff,%d1		# strip sign
  13678	andi.w		&0x8000,%d2		# keep old sign
  13679	sub.l		%d0,%d1			# add scale factor
  13680	subi.l		&0x6000,%d1		# subtract bias
  13681	andi.w		&0x7fff,%d1
  13682	or.w		%d2,%d1			# concat sign,exp
  13683	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  13684	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  13685	mov.l		(%sp)+,%d2		# restore d2
  13686	bra.b		fabs_sd_ovfl_dis
  13687
  13688#
  13689# the move in MAY underflow. so...
  13690#
  13691fabs_sd_may_ovfl:
  13692	fmov.l		&0x0,%fpsr		# clear FPSR
  13693	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  13694
  13695	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
  13696
  13697	fmov.l		%fpsr,%d1		# save status
  13698	fmov.l		&0x0,%fpcr		# clear FPCR
  13699
  13700	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  13701
  13702	fabs.x		%fp0,%fp1		# make a copy of result
  13703	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
  13704	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
  13705
  13706# no, it didn't overflow; we have correct result
  13707	bra.w		fabs_sd_normal_exit
  13708
  13709##########################################################################
  13710
  13711#
  13712# input is not normalized; what is it?
  13713#
  13714fabs_not_norm:
  13715	cmpi.b		%d1,&DENORM		# weed out DENORM
  13716	beq.w		fabs_denorm
  13717	cmpi.b		%d1,&SNAN		# weed out SNAN
  13718	beq.l		res_snan_1op
  13719	cmpi.b		%d1,&QNAN		# weed out QNAN
  13720	beq.l		res_qnan_1op
  13721
  13722	fabs.x		SRC(%a0),%fp0		# force absolute value
  13723
  13724	cmpi.b		%d1,&INF		# weed out INF
  13725	beq.b		fabs_inf
  13726fabs_zero:
  13727	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
  13728	rts
  13729fabs_inf:
  13730	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
  13731	rts
  13732
  13733#########################################################################
  13734# XDEF ****************************************************************	#
  13735#	fcmp(): fp compare op routine					#
  13736#									#
  13737# XREF ****************************************************************	#
  13738#	res_qnan() - return QNAN result					#
  13739#	res_snan() - return SNAN result					#
  13740#									#
  13741# INPUT ***************************************************************	#
  13742#	a0 = pointer to extended precision source operand		#
  13743#	a1 = pointer to extended precision destination operand		#
  13744#	d0 = round prec/mode						#
  13745#									#
  13746# OUTPUT ************************************************************** #
  13747#	None								#
  13748#									#
  13749# ALGORITHM ***********************************************************	#
  13750#	Handle NANs and denorms as special cases. For everything else,	#
  13751# just use the actual fcmp instruction to produce the correct condition	#
  13752# codes.								#
  13753#									#
  13754#########################################################################
  13755
  13756	global		fcmp
  13757fcmp:
  13758	clr.w		%d1
  13759	mov.b		DTAG(%a6),%d1
  13760	lsl.b		&0x3,%d1
  13761	or.b		STAG(%a6),%d1
  13762	bne.b		fcmp_not_norm		# optimize on non-norm input
  13763
  13764#
  13765# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
  13766#
  13767fcmp_norm:
  13768	fmovm.x		DST(%a1),&0x80		# load dst op
  13769
  13770	fcmp.x		%fp0,SRC(%a0)		# do compare
  13771
  13772	fmov.l		%fpsr,%d0		# save FPSR
  13773	rol.l		&0x8,%d0		# extract ccode bits
  13774	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
  13775
  13776	rts
  13777
  13778#
  13779# fcmp: inputs are not both normalized; what are they?
  13780#
  13781fcmp_not_norm:
  13782	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
  13783	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
  13784
  13785	swbeg		&48
  13786tbl_fcmp_op:
  13787	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
  13788	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
  13789	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
  13790	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
  13791	short		fcmp_nrm_dnrm	- tbl_fcmp_op # NORM - DENORM
  13792	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
  13793	short		tbl_fcmp_op	- tbl_fcmp_op #
  13794	short		tbl_fcmp_op	- tbl_fcmp_op #
  13795
  13796	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
  13797	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
  13798	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
  13799	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
  13800	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
  13801	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
  13802	short		tbl_fcmp_op	- tbl_fcmp_op #
  13803	short		tbl_fcmp_op	- tbl_fcmp_op #
  13804
  13805	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
  13806	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
  13807	short		fcmp_norm	- tbl_fcmp_op # INF - INF
  13808	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
  13809	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
  13810	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
  13811	short		tbl_fcmp_op	- tbl_fcmp_op #
  13812	short		tbl_fcmp_op	- tbl_fcmp_op #
  13813
  13814	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
  13815	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
  13816	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
  13817	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
  13818	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
  13819	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
  13820	short		tbl_fcmp_op	- tbl_fcmp_op #
  13821	short		tbl_fcmp_op	- tbl_fcmp_op #
  13822
  13823	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
  13824	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
  13825	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
  13826	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
  13827	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
  13828	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
  13829	short		tbl_fcmp_op	- tbl_fcmp_op #
  13830	short		tbl_fcmp_op	- tbl_fcmp_op #
  13831
  13832	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
  13833	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
  13834	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
  13835	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
  13836	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
  13837	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
  13838	short		tbl_fcmp_op	- tbl_fcmp_op #
  13839	short		tbl_fcmp_op	- tbl_fcmp_op #
  13840
  13841# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
  13842# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
  13843fcmp_res_qnan:
  13844	bsr.l		res_qnan
  13845	andi.b		&0xf7,FPSR_CC(%a6)
  13846	rts
  13847fcmp_res_snan:
  13848	bsr.l		res_snan
  13849	andi.b		&0xf7,FPSR_CC(%a6)
  13850	rts
  13851
  13852#
  13853# DENORMs are a little more difficult.
  13854# If you have a 2 DENORMs, then you can just force the j-bit to a one
  13855# and use the fcmp_norm routine.
  13856# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
  13857# and use the fcmp_norm routine.
  13858# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
  13859# But with a DENORM and a NORM of the same sign, the neg bit is set if the
  13860# (1) signs are (+) and the DENORM is the dst or
  13861# (2) signs are (-) and the DENORM is the src
  13862#
  13863
  13864fcmp_dnrm_s:
  13865	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  13866	mov.l		SRC_HI(%a0),%d0
  13867	bset		&31,%d0			# DENORM src; make into small norm
  13868	mov.l		%d0,FP_SCR0_HI(%a6)
  13869	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  13870	lea		FP_SCR0(%a6),%a0
  13871	bra.w		fcmp_norm
  13872
  13873fcmp_dnrm_d:
  13874	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
  13875	mov.l		DST_HI(%a1),%d0
  13876	bset		&31,%d0			# DENORM src; make into small norm
  13877	mov.l		%d0,FP_SCR0_HI(%a6)
  13878	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
  13879	lea		FP_SCR0(%a6),%a1
  13880	bra.w		fcmp_norm
  13881
  13882fcmp_dnrm_sd:
  13883	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
  13884	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  13885	mov.l		DST_HI(%a1),%d0
  13886	bset		&31,%d0			# DENORM dst; make into small norm
  13887	mov.l		%d0,FP_SCR1_HI(%a6)
  13888	mov.l		SRC_HI(%a0),%d0
  13889	bset		&31,%d0			# DENORM dst; make into small norm
  13890	mov.l		%d0,FP_SCR0_HI(%a6)
  13891	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
  13892	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  13893	lea		FP_SCR1(%a6),%a1
  13894	lea		FP_SCR0(%a6),%a0
  13895	bra.w		fcmp_norm
  13896
  13897fcmp_nrm_dnrm:
  13898	mov.b		SRC_EX(%a0),%d0		# determine if like signs
  13899	mov.b		DST_EX(%a1),%d1
  13900	eor.b		%d0,%d1
  13901	bmi.w		fcmp_dnrm_s
  13902
  13903# signs are the same, so must determine the answer ourselves.
  13904	tst.b		%d0			# is src op negative?
  13905	bmi.b		fcmp_nrm_dnrm_m		# yes
  13906	rts
  13907fcmp_nrm_dnrm_m:
  13908	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
  13909	rts
  13910
  13911fcmp_dnrm_nrm:
  13912	mov.b		SRC_EX(%a0),%d0		# determine if like signs
  13913	mov.b		DST_EX(%a1),%d1
  13914	eor.b		%d0,%d1
  13915	bmi.w		fcmp_dnrm_d
  13916
  13917# signs are the same, so must determine the answer ourselves.
  13918	tst.b		%d0			# is src op negative?
  13919	bpl.b		fcmp_dnrm_nrm_m		# no
  13920	rts
  13921fcmp_dnrm_nrm_m:
  13922	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
  13923	rts
  13924
  13925#########################################################################
  13926# XDEF ****************************************************************	#
  13927#	fsglmul(): emulates the fsglmul instruction			#
  13928#									#
  13929# XREF ****************************************************************	#
  13930#	scale_to_zero_src() - scale src exponent to zero		#
  13931#	scale_to_zero_dst() - scale dst exponent to zero		#
  13932#	unf_res4() - return default underflow result for sglop		#
  13933#	ovf_res() - return default overflow result			#
  13934#	res_qnan() - return QNAN result					#
  13935#	res_snan() - return SNAN result					#
  13936#									#
  13937# INPUT ***************************************************************	#
  13938#	a0 = pointer to extended precision source operand		#
  13939#	a1 = pointer to extended precision destination operand		#
  13940#	d0  rnd prec,mode						#
  13941#									#
  13942# OUTPUT **************************************************************	#
  13943#	fp0 = result							#
  13944#	fp1 = EXOP (if exception occurred)				#
  13945#									#
  13946# ALGORITHM ***********************************************************	#
  13947#	Handle NANs, infinities, and zeroes as special cases. Divide	#
  13948# norms/denorms into ext/sgl/dbl precision.				#
  13949#	For norms/denorms, scale the exponents such that a multiply	#
  13950# instruction won't cause an exception. Use the regular fsglmul to	#
  13951# compute a result. Check if the regular operands would have taken	#
  13952# an exception. If so, return the default overflow/underflow result	#
  13953# and return the EXOP if exceptions are enabled. Else, scale the	#
  13954# result operand to the proper exponent.				#
  13955#									#
  13956#########################################################################
  13957
  13958	global		fsglmul
  13959fsglmul:
  13960	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  13961
  13962	clr.w		%d1
  13963	mov.b		DTAG(%a6),%d1
  13964	lsl.b		&0x3,%d1
  13965	or.b		STAG(%a6),%d1
  13966
  13967	bne.w		fsglmul_not_norm	# optimize on non-norm input
  13968
  13969fsglmul_norm:
  13970	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
  13971	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
  13972	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
  13973
  13974	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  13975	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  13976	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  13977
  13978	bsr.l		scale_to_zero_src	# scale exponent
  13979	mov.l		%d0,-(%sp)		# save scale factor 1
  13980
  13981	bsr.l		scale_to_zero_dst	# scale dst exponent
  13982
  13983	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
  13984
  13985	cmpi.l		%d0,&0x3fff-0x7ffe	# would result ovfl?
  13986	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
  13987	blt.w		fsglmul_ovfl		# result will overflow
  13988
  13989	cmpi.l		%d0,&0x3fff+0x0001	# would result unfl?
  13990	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
  13991	bgt.w		fsglmul_unfl		# result will underflow
  13992
  13993fsglmul_normal:
  13994	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  13995
  13996	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  13997	fmov.l		&0x0,%fpsr		# clear FPSR
  13998
  13999	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
  14000
  14001	fmov.l		%fpsr,%d1		# save status
  14002	fmov.l		&0x0,%fpcr		# clear FPCR
  14003
  14004	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  14005
  14006fsglmul_normal_exit:
  14007	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  14008	mov.l		%d2,-(%sp)		# save d2
  14009	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
  14010	mov.l		%d1,%d2			# make a copy
  14011	andi.l		&0x7fff,%d1		# strip sign
  14012	andi.w		&0x8000,%d2		# keep old sign
  14013	sub.l		%d0,%d1			# add scale factor
  14014	or.w		%d2,%d1			# concat old sign,new exp
  14015	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  14016	mov.l		(%sp)+,%d2		# restore d2
  14017	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
  14018	rts
  14019
  14020fsglmul_ovfl:
  14021	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  14022
  14023	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  14024	fmov.l		&0x0,%fpsr		# clear FPSR
  14025
  14026	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
  14027
  14028	fmov.l		%fpsr,%d1		# save status
  14029	fmov.l		&0x0,%fpcr		# clear FPCR
  14030
  14031	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  14032
  14033fsglmul_ovfl_tst:
  14034
  14035# save setting this until now because this is where fsglmul_may_ovfl may jump in
  14036	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
  14037
  14038	mov.b		FPCR_ENABLE(%a6),%d1
  14039	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  14040	bne.b		fsglmul_ovfl_ena	# yes
  14041
  14042fsglmul_ovfl_dis:
  14043	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
  14044	sne		%d1			# set sign param accordingly
  14045	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
  14046	andi.b		&0x30,%d0		# force prec = ext
  14047	bsr.l		ovf_res			# calculate default result
  14048	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
  14049	fmovm.x		(%a0),&0x80		# return default result in fp0
  14050	rts
  14051
  14052fsglmul_ovfl_ena:
  14053	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
  14054
  14055	mov.l		%d2,-(%sp)		# save d2
  14056	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  14057	mov.l		%d1,%d2			# make a copy
  14058	andi.l		&0x7fff,%d1		# strip sign
  14059	sub.l		%d0,%d1			# add scale factor
  14060	subi.l		&0x6000,%d1		# subtract bias
  14061	andi.w		&0x7fff,%d1
  14062	andi.w		&0x8000,%d2		# keep old sign
  14063	or.w		%d2,%d1			# concat old sign,new exp
  14064	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  14065	mov.l		(%sp)+,%d2		# restore d2
  14066	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  14067	bra.b		fsglmul_ovfl_dis
  14068
  14069fsglmul_may_ovfl:
  14070	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  14071
  14072	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  14073	fmov.l		&0x0,%fpsr		# clear FPSR
  14074
  14075	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
  14076
  14077	fmov.l		%fpsr,%d1		# save status
  14078	fmov.l		&0x0,%fpcr		# clear FPCR
  14079
  14080	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  14081
  14082	fabs.x		%fp0,%fp1		# make a copy of result
  14083	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
  14084	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
  14085
  14086# no, it didn't overflow; we have correct result
  14087	bra.w		fsglmul_normal_exit
  14088
  14089fsglmul_unfl:
  14090	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  14091
  14092	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  14093
  14094	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
  14095	fmov.l		&0x0,%fpsr		# clear FPSR
  14096
  14097	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
  14098
  14099	fmov.l		%fpsr,%d1		# save status
  14100	fmov.l		&0x0,%fpcr		# clear FPCR
  14101
  14102	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  14103
  14104	mov.b		FPCR_ENABLE(%a6),%d1
  14105	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  14106	bne.b		fsglmul_unfl_ena	# yes
  14107
  14108fsglmul_unfl_dis:
  14109	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  14110
  14111	lea		FP_SCR0(%a6),%a0	# pass: result addr
  14112	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  14113	bsr.l		unf_res4		# calculate default result
  14114	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
  14115	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  14116	rts
  14117
  14118#
  14119# UNFL is enabled.
  14120#
  14121fsglmul_unfl_ena:
  14122	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
  14123
  14124	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  14125	fmov.l		&0x0,%fpsr		# clear FPSR
  14126
  14127	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
  14128
  14129	fmov.l		&0x0,%fpcr		# clear FPCR
  14130
  14131	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
  14132	mov.l		%d2,-(%sp)		# save d2
  14133	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  14134	mov.l		%d1,%d2			# make a copy
  14135	andi.l		&0x7fff,%d1		# strip sign
  14136	andi.w		&0x8000,%d2		# keep old sign
  14137	sub.l		%d0,%d1			# add scale factor
  14138	addi.l		&0x6000,%d1		# add bias
  14139	andi.w		&0x7fff,%d1
  14140	or.w		%d2,%d1			# concat old sign,new exp
  14141	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  14142	mov.l		(%sp)+,%d2		# restore d2
  14143	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  14144	bra.w		fsglmul_unfl_dis
  14145
  14146fsglmul_may_unfl:
  14147	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  14148
  14149	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  14150	fmov.l		&0x0,%fpsr		# clear FPSR
  14151
  14152	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
  14153
  14154	fmov.l		%fpsr,%d1		# save status
  14155	fmov.l		&0x0,%fpcr		# clear FPCR
  14156
  14157	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  14158
  14159	fabs.x		%fp0,%fp1		# make a copy of result
  14160	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
  14161	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
  14162	fblt.w		fsglmul_unfl		# yes; underflow occurred
  14163
  14164#
  14165# we still don't know if underflow occurred. result is ~ equal to 2. but,
  14166# we don't know if the result was an underflow that rounded up to a 2 or
  14167# a normalized number that rounded down to a 2. so, redo the entire operation
  14168# using RZ as the rounding mode to see what the pre-rounded result is.
  14169# this case should be relatively rare.
  14170#
  14171	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
  14172
  14173	mov.l		L_SCR3(%a6),%d1
  14174	andi.b		&0xc0,%d1		# keep rnd prec
  14175	ori.b		&rz_mode*0x10,%d1	# insert RZ
  14176
  14177	fmov.l		%d1,%fpcr		# set FPCR
  14178	fmov.l		&0x0,%fpsr		# clear FPSR
  14179
  14180	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
  14181
  14182	fmov.l		&0x0,%fpcr		# clear FPCR
  14183	fabs.x		%fp1			# make absolute value
  14184	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
  14185	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
  14186	bra.w		fsglmul_unfl		# yes, underflow occurred
  14187
  14188##############################################################################
  14189
  14190#
  14191# Single Precision Multiply: inputs are not both normalized; what are they?
  14192#
  14193fsglmul_not_norm:
  14194	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
  14195	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
  14196
  14197	swbeg		&48
  14198tbl_fsglmul_op:
  14199	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
  14200	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
  14201	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
  14202	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
  14203	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
  14204	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
  14205	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  14206	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  14207
  14208	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
  14209	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
  14210	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
  14211	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
  14212	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
  14213	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
  14214	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  14215	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  14216
  14217	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
  14218	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
  14219	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
  14220	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
  14221	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
  14222	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
  14223	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  14224	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  14225
  14226	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
  14227	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
  14228	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
  14229	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
  14230	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
  14231	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
  14232	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  14233	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  14234
  14235	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
  14236	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
  14237	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
  14238	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
  14239	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
  14240	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
  14241	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  14242	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  14243
  14244	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
  14245	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
  14246	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
  14247	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
  14248	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
  14249	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
  14250	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  14251	short		tbl_fsglmul_op		- tbl_fsglmul_op #
  14252
  14253fsglmul_res_operr:
  14254	bra.l		res_operr
  14255fsglmul_res_snan:
  14256	bra.l		res_snan
  14257fsglmul_res_qnan:
  14258	bra.l		res_qnan
  14259fsglmul_zero:
  14260	bra.l		fmul_zero
  14261fsglmul_inf_src:
  14262	bra.l		fmul_inf_src
  14263fsglmul_inf_dst:
  14264	bra.l		fmul_inf_dst
  14265
  14266#########################################################################
  14267# XDEF ****************************************************************	#
  14268#	fsgldiv(): emulates the fsgldiv instruction			#
  14269#									#
  14270# XREF ****************************************************************	#
  14271#	scale_to_zero_src() - scale src exponent to zero		#
  14272#	scale_to_zero_dst() - scale dst exponent to zero		#
  14273#	unf_res4() - return default underflow result for sglop		#
  14274#	ovf_res() - return default overflow result			#
  14275#	res_qnan() - return QNAN result					#
  14276#	res_snan() - return SNAN result					#
  14277#									#
  14278# INPUT ***************************************************************	#
  14279#	a0 = pointer to extended precision source operand		#
  14280#	a1 = pointer to extended precision destination operand		#
  14281#	d0  rnd prec,mode						#
  14282#									#
  14283# OUTPUT **************************************************************	#
  14284#	fp0 = result							#
  14285#	fp1 = EXOP (if exception occurred)				#
  14286#									#
  14287# ALGORITHM ***********************************************************	#
  14288#	Handle NANs, infinities, and zeroes as special cases. Divide	#
  14289# norms/denorms into ext/sgl/dbl precision.				#
  14290#	For norms/denorms, scale the exponents such that a divide	#
  14291# instruction won't cause an exception. Use the regular fsgldiv to	#
  14292# compute a result. Check if the regular operands would have taken	#
  14293# an exception. If so, return the default overflow/underflow result	#
  14294# and return the EXOP if exceptions are enabled. Else, scale the	#
  14295# result operand to the proper exponent.				#
  14296#									#
  14297#########################################################################
  14298
  14299	global		fsgldiv
  14300fsgldiv:
  14301	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  14302
  14303	clr.w		%d1
  14304	mov.b		DTAG(%a6),%d1
  14305	lsl.b		&0x3,%d1
  14306	or.b		STAG(%a6),%d1		# combine src tags
  14307
  14308	bne.w		fsgldiv_not_norm	# optimize on non-norm input
  14309
  14310#
  14311# DIVIDE: NORMs and DENORMs ONLY!
  14312#
  14313fsgldiv_norm:
  14314	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
  14315	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
  14316	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
  14317
  14318	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  14319	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  14320	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  14321
  14322	bsr.l		scale_to_zero_src	# calculate scale factor 1
  14323	mov.l		%d0,-(%sp)		# save scale factor 1
  14324
  14325	bsr.l		scale_to_zero_dst	# calculate scale factor 2
  14326
  14327	neg.l		(%sp)			# S.F. = scale1 - scale2
  14328	add.l		%d0,(%sp)
  14329
  14330	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
  14331	lsr.b		&0x6,%d1
  14332	mov.l		(%sp)+,%d0
  14333	cmpi.l		%d0,&0x3fff-0x7ffe
  14334	ble.w		fsgldiv_may_ovfl
  14335
  14336	cmpi.l		%d0,&0x3fff-0x0000	# will result underflow?
  14337	beq.w		fsgldiv_may_unfl	# maybe
  14338	bgt.w		fsgldiv_unfl		# yes; go handle underflow
  14339
  14340fsgldiv_normal:
  14341	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  14342
  14343	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
  14344	fmov.l		&0x0,%fpsr		# clear FPSR
  14345
  14346	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
  14347
  14348	fmov.l		%fpsr,%d1		# save FPSR
  14349	fmov.l		&0x0,%fpcr		# clear FPCR
  14350
  14351	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  14352
  14353fsgldiv_normal_exit:
  14354	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
  14355	mov.l		%d2,-(%sp)		# save d2
  14356	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
  14357	mov.l		%d1,%d2			# make a copy
  14358	andi.l		&0x7fff,%d1		# strip sign
  14359	andi.w		&0x8000,%d2		# keep old sign
  14360	sub.l		%d0,%d1			# add scale factor
  14361	or.w		%d2,%d1			# concat old sign,new exp
  14362	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  14363	mov.l		(%sp)+,%d2		# restore d2
  14364	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
  14365	rts
  14366
  14367fsgldiv_may_ovfl:
  14368	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  14369
  14370	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  14371	fmov.l		&0x0,%fpsr		# set FPSR
  14372
  14373	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
  14374
  14375	fmov.l		%fpsr,%d1
  14376	fmov.l		&0x0,%fpcr
  14377
  14378	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
  14379
  14380	fmovm.x		&0x01,-(%sp)		# save result to stack
  14381	mov.w		(%sp),%d1		# fetch new exponent
  14382	add.l		&0xc,%sp		# clear result
  14383	andi.l		&0x7fff,%d1		# strip sign
  14384	sub.l		%d0,%d1			# add scale factor
  14385	cmp.l		%d1,&0x7fff		# did divide overflow?
  14386	blt.b		fsgldiv_normal_exit
  14387
  14388fsgldiv_ovfl_tst:
  14389	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
  14390
  14391	mov.b		FPCR_ENABLE(%a6),%d1
  14392	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  14393	bne.b		fsgldiv_ovfl_ena	# yes
  14394
  14395fsgldiv_ovfl_dis:
  14396	btst		&neg_bit,FPSR_CC(%a6)	# is result negative
  14397	sne		%d1			# set sign param accordingly
  14398	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
  14399	andi.b		&0x30,%d0		# kill precision
  14400	bsr.l		ovf_res			# calculate default result
  14401	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
  14402	fmovm.x		(%a0),&0x80		# return default result in fp0
  14403	rts
  14404
  14405fsgldiv_ovfl_ena:
  14406	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
  14407
  14408	mov.l		%d2,-(%sp)		# save d2
  14409	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  14410	mov.l		%d1,%d2			# make a copy
  14411	andi.l		&0x7fff,%d1		# strip sign
  14412	andi.w		&0x8000,%d2		# keep old sign
  14413	sub.l		%d0,%d1			# add scale factor
  14414	subi.l		&0x6000,%d1		# subtract new bias
  14415	andi.w		&0x7fff,%d1		# clear ms bit
  14416	or.w		%d2,%d1			# concat old sign,new exp
  14417	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  14418	mov.l		(%sp)+,%d2		# restore d2
  14419	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  14420	bra.b		fsgldiv_ovfl_dis
  14421
  14422fsgldiv_unfl:
  14423	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  14424
  14425	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  14426
  14427	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
  14428	fmov.l		&0x0,%fpsr		# clear FPSR
  14429
  14430	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
  14431
  14432	fmov.l		%fpsr,%d1		# save status
  14433	fmov.l		&0x0,%fpcr		# clear FPCR
  14434
  14435	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  14436
  14437	mov.b		FPCR_ENABLE(%a6),%d1
  14438	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  14439	bne.b		fsgldiv_unfl_ena	# yes
  14440
  14441fsgldiv_unfl_dis:
  14442	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  14443
  14444	lea		FP_SCR0(%a6),%a0	# pass: result addr
  14445	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  14446	bsr.l		unf_res4		# calculate default result
  14447	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
  14448	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  14449	rts
  14450
  14451#
  14452# UNFL is enabled.
  14453#
  14454fsgldiv_unfl_ena:
  14455	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
  14456
  14457	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  14458	fmov.l		&0x0,%fpsr		# clear FPSR
  14459
  14460	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
  14461
  14462	fmov.l		&0x0,%fpcr		# clear FPCR
  14463
  14464	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
  14465	mov.l		%d2,-(%sp)		# save d2
  14466	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  14467	mov.l		%d1,%d2			# make a copy
  14468	andi.l		&0x7fff,%d1		# strip sign
  14469	andi.w		&0x8000,%d2		# keep old sign
  14470	sub.l		%d0,%d1			# add scale factor
  14471	addi.l		&0x6000,%d1		# add bias
  14472	andi.w		&0x7fff,%d1		# clear top bit
  14473	or.w		%d2,%d1			# concat old sign, new exp
  14474	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  14475	mov.l		(%sp)+,%d2		# restore d2
  14476	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  14477	bra.b		fsgldiv_unfl_dis
  14478
  14479#
  14480# the divide operation MAY underflow:
  14481#
  14482fsgldiv_may_unfl:
  14483	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  14484
  14485	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  14486	fmov.l		&0x0,%fpsr		# clear FPSR
  14487
  14488	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
  14489
  14490	fmov.l		%fpsr,%d1		# save status
  14491	fmov.l		&0x0,%fpcr		# clear FPCR
  14492
  14493	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  14494
  14495	fabs.x		%fp0,%fp1		# make a copy of result
  14496	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
  14497	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
  14498	fblt.w		fsgldiv_unfl		# yes; underflow occurred
  14499
  14500#
  14501# we still don't know if underflow occurred. result is ~ equal to 1. but,
  14502# we don't know if the result was an underflow that rounded up to a 1
  14503# or a normalized number that rounded down to a 1. so, redo the entire
  14504# operation using RZ as the rounding mode to see what the pre-rounded
  14505# result is. this case should be relatively rare.
  14506#
  14507	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
  14508
  14509	clr.l		%d1			# clear scratch register
  14510	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
  14511
  14512	fmov.l		%d1,%fpcr		# set FPCR
  14513	fmov.l		&0x0,%fpsr		# clear FPSR
  14514
  14515	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
  14516
  14517	fmov.l		&0x0,%fpcr		# clear FPCR
  14518	fabs.x		%fp1			# make absolute value
  14519	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
  14520	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
  14521	bra.w		fsgldiv_unfl		# yes; underflow occurred
  14522
  14523############################################################################
  14524
  14525#
  14526# Divide: inputs are not both normalized; what are they?
  14527#
  14528fsgldiv_not_norm:
  14529	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
  14530	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
  14531
  14532	swbeg		&48
  14533tbl_fsgldiv_op:
  14534	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
  14535	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
  14536	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
  14537	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
  14538	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
  14539	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
  14540	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  14541	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  14542
  14543	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
  14544	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
  14545	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
  14546	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
  14547	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
  14548	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
  14549	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  14550	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  14551
  14552	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
  14553	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
  14554	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
  14555	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
  14556	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
  14557	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
  14558	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  14559	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  14560
  14561	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
  14562	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
  14563	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
  14564	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
  14565	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
  14566	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
  14567	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  14568	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  14569
  14570	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
  14571	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
  14572	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
  14573	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
  14574	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
  14575	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
  14576	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  14577	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  14578
  14579	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
  14580	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
  14581	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
  14582	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
  14583	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
  14584	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
  14585	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  14586	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
  14587
  14588fsgldiv_res_qnan:
  14589	bra.l		res_qnan
  14590fsgldiv_res_snan:
  14591	bra.l		res_snan
  14592fsgldiv_res_operr:
  14593	bra.l		res_operr
  14594fsgldiv_inf_load:
  14595	bra.l		fdiv_inf_load
  14596fsgldiv_zero_load:
  14597	bra.l		fdiv_zero_load
  14598fsgldiv_inf_dst:
  14599	bra.l		fdiv_inf_dst
  14600
  14601#########################################################################
  14602# XDEF ****************************************************************	#
  14603#	fadd(): emulates the fadd instruction				#
  14604#	fsadd(): emulates the fadd instruction				#
  14605#	fdadd(): emulates the fdadd instruction				#
  14606#									#
  14607# XREF ****************************************************************	#
  14608#	addsub_scaler2() - scale the operands so they won't take exc	#
  14609#	ovf_res() - return default overflow result			#
  14610#	unf_res() - return default underflow result			#
  14611#	res_qnan() - set QNAN result					#
  14612#	res_snan() - set SNAN result					#
  14613#	res_operr() - set OPERR result					#
  14614#	scale_to_zero_src() - set src operand exponent equal to zero	#
  14615#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
  14616#									#
  14617# INPUT ***************************************************************	#
  14618#	a0 = pointer to extended precision source operand		#
  14619#	a1 = pointer to extended precision destination operand		#
  14620#									#
  14621# OUTPUT **************************************************************	#
  14622#	fp0 = result							#
  14623#	fp1 = EXOP (if exception occurred)				#
  14624#									#
  14625# ALGORITHM ***********************************************************	#
  14626#	Handle NANs, infinities, and zeroes as special cases. Divide	#
  14627# norms into extended, single, and double precision.			#
  14628#	Do addition after scaling exponents such that exception won't	#
  14629# occur. Then, check result exponent to see if exception would have	#
  14630# occurred. If so, return default result and maybe EXOP. Else, insert	#
  14631# the correct result exponent and return. Set FPSR bits as appropriate.	#
  14632#									#
  14633#########################################################################
  14634
  14635	global		fsadd
  14636fsadd:
  14637	andi.b		&0x30,%d0		# clear rnd prec
  14638	ori.b		&s_mode*0x10,%d0	# insert sgl prec
  14639	bra.b		fadd
  14640
  14641	global		fdadd
  14642fdadd:
  14643	andi.b		&0x30,%d0		# clear rnd prec
  14644	ori.b		&d_mode*0x10,%d0	# insert dbl prec
  14645
  14646	global		fadd
  14647fadd:
  14648	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  14649
  14650	clr.w		%d1
  14651	mov.b		DTAG(%a6),%d1
  14652	lsl.b		&0x3,%d1
  14653	or.b		STAG(%a6),%d1		# combine src tags
  14654
  14655	bne.w		fadd_not_norm		# optimize on non-norm input
  14656
  14657#
  14658# ADD: norms and denorms
  14659#
  14660fadd_norm:
  14661	bsr.l		addsub_scaler2		# scale exponents
  14662
  14663fadd_zero_entry:
  14664	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  14665
  14666	fmov.l		&0x0,%fpsr		# clear FPSR
  14667	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  14668
  14669	fadd.x		FP_SCR0(%a6),%fp0	# execute add
  14670
  14671	fmov.l		&0x0,%fpcr		# clear FPCR
  14672	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
  14673
  14674	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
  14675
  14676	fbeq.w		fadd_zero_exit		# if result is zero, end now
  14677
  14678	mov.l		%d2,-(%sp)		# save d2
  14679
  14680	fmovm.x		&0x01,-(%sp)		# save result to stack
  14681
  14682	mov.w		2+L_SCR3(%a6),%d1
  14683	lsr.b		&0x6,%d1
  14684
  14685	mov.w		(%sp),%d2		# fetch new sign, exp
  14686	andi.l		&0x7fff,%d2		# strip sign
  14687	sub.l		%d0,%d2			# add scale factor
  14688
  14689	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
  14690	bge.b		fadd_ovfl		# yes
  14691
  14692	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
  14693	blt.w		fadd_unfl		# yes
  14694	beq.w		fadd_may_unfl		# maybe; go find out
  14695
  14696fadd_normal:
  14697	mov.w		(%sp),%d1
  14698	andi.w		&0x8000,%d1		# keep sign
  14699	or.w		%d2,%d1			# concat sign,new exp
  14700	mov.w		%d1,(%sp)		# insert new exponent
  14701
  14702	fmovm.x		(%sp)+,&0x80		# return result in fp0
  14703
  14704	mov.l		(%sp)+,%d2		# restore d2
  14705	rts
  14706
  14707fadd_zero_exit:
  14708#	fmov.s		&0x00000000,%fp0	# return zero in fp0
  14709	rts
  14710
  14711tbl_fadd_ovfl:
  14712	long		0x7fff			# ext ovfl
  14713	long		0x407f			# sgl ovfl
  14714	long		0x43ff			# dbl ovfl
  14715
  14716tbl_fadd_unfl:
  14717	long	        0x0000			# ext unfl
  14718	long		0x3f81			# sgl unfl
  14719	long		0x3c01			# dbl unfl
  14720
  14721fadd_ovfl:
  14722	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  14723
  14724	mov.b		FPCR_ENABLE(%a6),%d1
  14725	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  14726	bne.b		fadd_ovfl_ena		# yes
  14727
  14728	add.l		&0xc,%sp
  14729fadd_ovfl_dis:
  14730	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
  14731	sne		%d1			# set sign param accordingly
  14732	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
  14733	bsr.l		ovf_res			# calculate default result
  14734	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
  14735	fmovm.x		(%a0),&0x80		# return default result in fp0
  14736	mov.l		(%sp)+,%d2		# restore d2
  14737	rts
  14738
  14739fadd_ovfl_ena:
  14740	mov.b		L_SCR3(%a6),%d1
  14741	andi.b		&0xc0,%d1		# is precision extended?
  14742	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
  14743
  14744fadd_ovfl_ena_cont:
  14745	mov.w		(%sp),%d1
  14746	andi.w		&0x8000,%d1		# keep sign
  14747	subi.l		&0x6000,%d2		# add extra bias
  14748	andi.w		&0x7fff,%d2
  14749	or.w		%d2,%d1			# concat sign,new exp
  14750	mov.w		%d1,(%sp)		# insert new exponent
  14751
  14752	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
  14753	bra.b		fadd_ovfl_dis
  14754
  14755fadd_ovfl_ena_sd:
  14756	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  14757
  14758	mov.l		L_SCR3(%a6),%d1
  14759	andi.b		&0x30,%d1		# keep rnd mode
  14760	fmov.l		%d1,%fpcr		# set FPCR
  14761
  14762	fadd.x		FP_SCR0(%a6),%fp0	# execute add
  14763
  14764	fmov.l		&0x0,%fpcr		# clear FPCR
  14765
  14766	add.l		&0xc,%sp
  14767	fmovm.x		&0x01,-(%sp)
  14768	bra.b		fadd_ovfl_ena_cont
  14769
  14770fadd_unfl:
  14771	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  14772
  14773	add.l		&0xc,%sp
  14774
  14775	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  14776
  14777	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
  14778	fmov.l		&0x0,%fpsr		# clear FPSR
  14779
  14780	fadd.x		FP_SCR0(%a6),%fp0	# execute add
  14781
  14782	fmov.l		&0x0,%fpcr		# clear FPCR
  14783	fmov.l		%fpsr,%d1		# save status
  14784
  14785	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
  14786
  14787	mov.b		FPCR_ENABLE(%a6),%d1
  14788	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  14789	bne.b		fadd_unfl_ena		# yes
  14790
  14791fadd_unfl_dis:
  14792	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  14793
  14794	lea		FP_SCR0(%a6),%a0	# pass: result addr
  14795	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  14796	bsr.l		unf_res			# calculate default result
  14797	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
  14798	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  14799	mov.l		(%sp)+,%d2		# restore d2
  14800	rts
  14801
  14802fadd_unfl_ena:
  14803	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
  14804
  14805	mov.l		L_SCR3(%a6),%d1
  14806	andi.b		&0xc0,%d1		# is precision extended?
  14807	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
  14808
  14809	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  14810
  14811fadd_unfl_ena_cont:
  14812	fmov.l		&0x0,%fpsr		# clear FPSR
  14813
  14814	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
  14815
  14816	fmov.l		&0x0,%fpcr		# clear FPCR
  14817
  14818	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
  14819	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  14820	mov.l		%d1,%d2			# make a copy
  14821	andi.l		&0x7fff,%d1		# strip sign
  14822	andi.w		&0x8000,%d2		# keep old sign
  14823	sub.l		%d0,%d1			# add scale factor
  14824	addi.l		&0x6000,%d1		# add new bias
  14825	andi.w		&0x7fff,%d1		# clear top bit
  14826	or.w		%d2,%d1			# concat sign,new exp
  14827	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  14828	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  14829	bra.w		fadd_unfl_dis
  14830
  14831fadd_unfl_ena_sd:
  14832	mov.l		L_SCR3(%a6),%d1
  14833	andi.b		&0x30,%d1		# use only rnd mode
  14834	fmov.l		%d1,%fpcr		# set FPCR
  14835
  14836	bra.b		fadd_unfl_ena_cont
  14837
  14838#
  14839# result is equal to the smallest normalized number in the selected precision
  14840# if the precision is extended, this result could not have come from an
  14841# underflow that rounded up.
  14842#
  14843fadd_may_unfl:
  14844	mov.l		L_SCR3(%a6),%d1
  14845	andi.b		&0xc0,%d1
  14846	beq.w		fadd_normal		# yes; no underflow occurred
  14847
  14848	mov.l		0x4(%sp),%d1		# extract hi(man)
  14849	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
  14850	bne.w		fadd_normal		# no; no underflow occurred
  14851
  14852	tst.l		0x8(%sp)		# is lo(man) = 0x0?
  14853	bne.w		fadd_normal		# no; no underflow occurred
  14854
  14855	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
  14856	beq.w		fadd_normal		# no; no underflow occurred
  14857
  14858#
  14859# ok, so now the result has a exponent equal to the smallest normalized
  14860# exponent for the selected precision. also, the mantissa is equal to
  14861# 0x8000000000000000 and this mantissa is the result of rounding non-zero
  14862# g,r,s.
  14863# now, we must determine whether the pre-rounded result was an underflow
  14864# rounded "up" or a normalized number rounded "down".
  14865# so, we do this be re-executing the add using RZ as the rounding mode and
  14866# seeing if the new result is smaller or equal to the current result.
  14867#
  14868	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
  14869
  14870	mov.l		L_SCR3(%a6),%d1
  14871	andi.b		&0xc0,%d1		# keep rnd prec
  14872	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
  14873	fmov.l		%d1,%fpcr		# set FPCR
  14874	fmov.l		&0x0,%fpsr		# clear FPSR
  14875
  14876	fadd.x		FP_SCR0(%a6),%fp1	# execute add
  14877
  14878	fmov.l		&0x0,%fpcr		# clear FPCR
  14879
  14880	fabs.x		%fp0			# compare absolute values
  14881	fabs.x		%fp1
  14882	fcmp.x		%fp0,%fp1		# is first result > second?
  14883
  14884	fbgt.w		fadd_unfl		# yes; it's an underflow
  14885	bra.w		fadd_normal		# no; it's not an underflow
  14886
  14887##########################################################################
  14888
  14889#
  14890# Add: inputs are not both normalized; what are they?
  14891#
  14892fadd_not_norm:
  14893	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
  14894	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
  14895
  14896	swbeg		&48
  14897tbl_fadd_op:
  14898	short		fadd_norm	- tbl_fadd_op # NORM + NORM
  14899	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
  14900	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
  14901	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
  14902	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
  14903	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
  14904	short		tbl_fadd_op	- tbl_fadd_op #
  14905	short		tbl_fadd_op	- tbl_fadd_op #
  14906
  14907	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
  14908	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
  14909	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
  14910	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
  14911	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
  14912	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
  14913	short		tbl_fadd_op	- tbl_fadd_op #
  14914	short		tbl_fadd_op	- tbl_fadd_op #
  14915
  14916	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
  14917	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
  14918	short		fadd_inf_2	- tbl_fadd_op # INF + INF
  14919	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
  14920	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
  14921	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
  14922	short		tbl_fadd_op	- tbl_fadd_op #
  14923	short		tbl_fadd_op	- tbl_fadd_op #
  14924
  14925	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
  14926	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
  14927	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
  14928	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
  14929	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
  14930	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
  14931	short		tbl_fadd_op	- tbl_fadd_op #
  14932	short		tbl_fadd_op	- tbl_fadd_op #
  14933
  14934	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
  14935	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
  14936	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
  14937	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
  14938	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
  14939	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
  14940	short		tbl_fadd_op	- tbl_fadd_op #
  14941	short		tbl_fadd_op	- tbl_fadd_op #
  14942
  14943	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
  14944	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
  14945	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
  14946	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
  14947	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
  14948	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
  14949	short		tbl_fadd_op	- tbl_fadd_op #
  14950	short		tbl_fadd_op	- tbl_fadd_op #
  14951
  14952fadd_res_qnan:
  14953	bra.l		res_qnan
  14954fadd_res_snan:
  14955	bra.l		res_snan
  14956
  14957#
  14958# both operands are ZEROes
  14959#
  14960fadd_zero_2:
  14961	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
  14962	mov.b		DST_EX(%a1),%d1
  14963	eor.b		%d0,%d1
  14964	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
  14965
  14966# the signs are the same. so determine whether they are positive or negative
  14967# and return the appropriately signed zero.
  14968	tst.b		%d0			# are ZEROes positive or negative?
  14969	bmi.b		fadd_zero_rm		# negative
  14970	fmov.s		&0x00000000,%fp0	# return +ZERO
  14971	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
  14972	rts
  14973
  14974#
  14975# the ZEROes have opposite signs:
  14976# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
  14977# - -ZERO is returned in the case of RM.
  14978#
  14979fadd_zero_2_chk_rm:
  14980	mov.b		3+L_SCR3(%a6),%d1
  14981	andi.b		&0x30,%d1		# extract rnd mode
  14982	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
  14983	beq.b		fadd_zero_rm		# yes
  14984	fmov.s		&0x00000000,%fp0	# return +ZERO
  14985	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
  14986	rts
  14987
  14988fadd_zero_rm:
  14989	fmov.s		&0x80000000,%fp0	# return -ZERO
  14990	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
  14991	rts
  14992
  14993#
  14994# one operand is a ZERO and the other is a DENORM or NORM. scale
  14995# the DENORM or NORM and jump to the regular fadd routine.
  14996#
  14997fadd_zero_dst:
  14998	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  14999	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  15000	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  15001	bsr.l		scale_to_zero_src	# scale the operand
  15002	clr.w		FP_SCR1_EX(%a6)
  15003	clr.l		FP_SCR1_HI(%a6)
  15004	clr.l		FP_SCR1_LO(%a6)
  15005	bra.w		fadd_zero_entry		# go execute fadd
  15006
  15007fadd_zero_src:
  15008	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
  15009	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
  15010	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
  15011	bsr.l		scale_to_zero_dst	# scale the operand
  15012	clr.w		FP_SCR0_EX(%a6)
  15013	clr.l		FP_SCR0_HI(%a6)
  15014	clr.l		FP_SCR0_LO(%a6)
  15015	bra.w		fadd_zero_entry		# go execute fadd
  15016
  15017#
  15018# both operands are INFs. an OPERR will result if the INFs have
  15019# different signs. else, an INF of the same sign is returned
  15020#
  15021fadd_inf_2:
  15022	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
  15023	mov.b		DST_EX(%a1),%d1
  15024	eor.b		%d1,%d0
  15025	bmi.l		res_operr		# weed out (-INF)+(+INF)
  15026
  15027# ok, so it's not an OPERR. but, we do have to remember to return the
  15028# src INF since that's where the 881/882 gets the j-bit from...
  15029
  15030#
  15031# operands are INF and one of {ZERO, INF, DENORM, NORM}
  15032#
  15033fadd_inf_src:
  15034	fmovm.x		SRC(%a0),&0x80		# return src INF
  15035	tst.b		SRC_EX(%a0)		# is INF positive?
  15036	bpl.b		fadd_inf_done		# yes; we're done
  15037	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
  15038	rts
  15039
  15040#
  15041# operands are INF and one of {ZERO, INF, DENORM, NORM}
  15042#
  15043fadd_inf_dst:
  15044	fmovm.x		DST(%a1),&0x80		# return dst INF
  15045	tst.b		DST_EX(%a1)		# is INF positive?
  15046	bpl.b		fadd_inf_done		# yes; we're done
  15047	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
  15048	rts
  15049
  15050fadd_inf_done:
  15051	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
  15052	rts
  15053
  15054#########################################################################
  15055# XDEF ****************************************************************	#
  15056#	fsub(): emulates the fsub instruction				#
  15057#	fssub(): emulates the fssub instruction				#
  15058#	fdsub(): emulates the fdsub instruction				#
  15059#									#
  15060# XREF ****************************************************************	#
  15061#	addsub_scaler2() - scale the operands so they won't take exc	#
  15062#	ovf_res() - return default overflow result			#
  15063#	unf_res() - return default underflow result			#
  15064#	res_qnan() - set QNAN result					#
  15065#	res_snan() - set SNAN result					#
  15066#	res_operr() - set OPERR result					#
  15067#	scale_to_zero_src() - set src operand exponent equal to zero	#
  15068#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
  15069#									#
  15070# INPUT ***************************************************************	#
  15071#	a0 = pointer to extended precision source operand		#
  15072#	a1 = pointer to extended precision destination operand		#
  15073#									#
  15074# OUTPUT **************************************************************	#
  15075#	fp0 = result							#
  15076#	fp1 = EXOP (if exception occurred)				#
  15077#									#
  15078# ALGORITHM ***********************************************************	#
  15079#	Handle NANs, infinities, and zeroes as special cases. Divide	#
  15080# norms into extended, single, and double precision.			#
  15081#	Do subtraction after scaling exponents such that exception won't#
  15082# occur. Then, check result exponent to see if exception would have	#
  15083# occurred. If so, return default result and maybe EXOP. Else, insert	#
  15084# the correct result exponent and return. Set FPSR bits as appropriate.	#
  15085#									#
  15086#########################################################################
  15087
  15088	global		fssub
  15089fssub:
  15090	andi.b		&0x30,%d0		# clear rnd prec
  15091	ori.b		&s_mode*0x10,%d0	# insert sgl prec
  15092	bra.b		fsub
  15093
  15094	global		fdsub
  15095fdsub:
  15096	andi.b		&0x30,%d0		# clear rnd prec
  15097	ori.b		&d_mode*0x10,%d0	# insert dbl prec
  15098
  15099	global		fsub
  15100fsub:
  15101	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  15102
  15103	clr.w		%d1
  15104	mov.b		DTAG(%a6),%d1
  15105	lsl.b		&0x3,%d1
  15106	or.b		STAG(%a6),%d1		# combine src tags
  15107
  15108	bne.w		fsub_not_norm		# optimize on non-norm input
  15109
  15110#
  15111# SUB: norms and denorms
  15112#
  15113fsub_norm:
  15114	bsr.l		addsub_scaler2		# scale exponents
  15115
  15116fsub_zero_entry:
  15117	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  15118
  15119	fmov.l		&0x0,%fpsr		# clear FPSR
  15120	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  15121
  15122	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
  15123
  15124	fmov.l		&0x0,%fpcr		# clear FPCR
  15125	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
  15126
  15127	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
  15128
  15129	fbeq.w		fsub_zero_exit		# if result zero, end now
  15130
  15131	mov.l		%d2,-(%sp)		# save d2
  15132
  15133	fmovm.x		&0x01,-(%sp)		# save result to stack
  15134
  15135	mov.w		2+L_SCR3(%a6),%d1
  15136	lsr.b		&0x6,%d1
  15137
  15138	mov.w		(%sp),%d2		# fetch new exponent
  15139	andi.l		&0x7fff,%d2		# strip sign
  15140	sub.l		%d0,%d2			# add scale factor
  15141
  15142	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
  15143	bge.b		fsub_ovfl		# yes
  15144
  15145	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
  15146	blt.w		fsub_unfl		# yes
  15147	beq.w		fsub_may_unfl		# maybe; go find out
  15148
  15149fsub_normal:
  15150	mov.w		(%sp),%d1
  15151	andi.w		&0x8000,%d1		# keep sign
  15152	or.w		%d2,%d1			# insert new exponent
  15153	mov.w		%d1,(%sp)		# insert new exponent
  15154
  15155	fmovm.x		(%sp)+,&0x80		# return result in fp0
  15156
  15157	mov.l		(%sp)+,%d2		# restore d2
  15158	rts
  15159
  15160fsub_zero_exit:
  15161#	fmov.s		&0x00000000,%fp0	# return zero in fp0
  15162	rts
  15163
  15164tbl_fsub_ovfl:
  15165	long		0x7fff			# ext ovfl
  15166	long		0x407f			# sgl ovfl
  15167	long		0x43ff			# dbl ovfl
  15168
  15169tbl_fsub_unfl:
  15170	long	        0x0000			# ext unfl
  15171	long		0x3f81			# sgl unfl
  15172	long		0x3c01			# dbl unfl
  15173
  15174fsub_ovfl:
  15175	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  15176
  15177	mov.b		FPCR_ENABLE(%a6),%d1
  15178	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  15179	bne.b		fsub_ovfl_ena		# yes
  15180
  15181	add.l		&0xc,%sp
  15182fsub_ovfl_dis:
  15183	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
  15184	sne		%d1			# set sign param accordingly
  15185	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
  15186	bsr.l		ovf_res			# calculate default result
  15187	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
  15188	fmovm.x		(%a0),&0x80		# return default result in fp0
  15189	mov.l		(%sp)+,%d2		# restore d2
  15190	rts
  15191
  15192fsub_ovfl_ena:
  15193	mov.b		L_SCR3(%a6),%d1
  15194	andi.b		&0xc0,%d1		# is precision extended?
  15195	bne.b		fsub_ovfl_ena_sd	# no
  15196
  15197fsub_ovfl_ena_cont:
  15198	mov.w		(%sp),%d1		# fetch {sgn,exp}
  15199	andi.w		&0x8000,%d1		# keep sign
  15200	subi.l		&0x6000,%d2		# subtract new bias
  15201	andi.w		&0x7fff,%d2		# clear top bit
  15202	or.w		%d2,%d1			# concat sign,exp
  15203	mov.w		%d1,(%sp)		# insert new exponent
  15204
  15205	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
  15206	bra.b		fsub_ovfl_dis
  15207
  15208fsub_ovfl_ena_sd:
  15209	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  15210
  15211	mov.l		L_SCR3(%a6),%d1
  15212	andi.b		&0x30,%d1		# clear rnd prec
  15213	fmov.l		%d1,%fpcr		# set FPCR
  15214
  15215	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
  15216
  15217	fmov.l		&0x0,%fpcr		# clear FPCR
  15218
  15219	add.l		&0xc,%sp
  15220	fmovm.x		&0x01,-(%sp)
  15221	bra.b		fsub_ovfl_ena_cont
  15222
  15223fsub_unfl:
  15224	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  15225
  15226	add.l		&0xc,%sp
  15227
  15228	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
  15229
  15230	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
  15231	fmov.l		&0x0,%fpsr		# clear FPSR
  15232
  15233	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
  15234
  15235	fmov.l		&0x0,%fpcr		# clear FPCR
  15236	fmov.l		%fpsr,%d1		# save status
  15237
  15238	or.l		%d1,USER_FPSR(%a6)
  15239
  15240	mov.b		FPCR_ENABLE(%a6),%d1
  15241	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  15242	bne.b		fsub_unfl_ena		# yes
  15243
  15244fsub_unfl_dis:
  15245	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  15246
  15247	lea		FP_SCR0(%a6),%a0	# pass: result addr
  15248	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  15249	bsr.l		unf_res			# calculate default result
  15250	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
  15251	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  15252	mov.l		(%sp)+,%d2		# restore d2
  15253	rts
  15254
  15255fsub_unfl_ena:
  15256	fmovm.x		FP_SCR1(%a6),&0x40
  15257
  15258	mov.l		L_SCR3(%a6),%d1
  15259	andi.b		&0xc0,%d1		# is precision extended?
  15260	bne.b		fsub_unfl_ena_sd	# no
  15261
  15262	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  15263
  15264fsub_unfl_ena_cont:
  15265	fmov.l		&0x0,%fpsr		# clear FPSR
  15266
  15267	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
  15268
  15269	fmov.l		&0x0,%fpcr		# clear FPCR
  15270
  15271	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
  15272	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  15273	mov.l		%d1,%d2			# make a copy
  15274	andi.l		&0x7fff,%d1		# strip sign
  15275	andi.w		&0x8000,%d2		# keep old sign
  15276	sub.l		%d0,%d1			# add scale factor
  15277	addi.l		&0x6000,%d1		# subtract new bias
  15278	andi.w		&0x7fff,%d1		# clear top bit
  15279	or.w		%d2,%d1			# concat sgn,exp
  15280	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  15281	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  15282	bra.w		fsub_unfl_dis
  15283
  15284fsub_unfl_ena_sd:
  15285	mov.l		L_SCR3(%a6),%d1
  15286	andi.b		&0x30,%d1		# clear rnd prec
  15287	fmov.l		%d1,%fpcr		# set FPCR
  15288
  15289	bra.b		fsub_unfl_ena_cont
  15290
  15291#
  15292# result is equal to the smallest normalized number in the selected precision
  15293# if the precision is extended, this result could not have come from an
  15294# underflow that rounded up.
  15295#
  15296fsub_may_unfl:
  15297	mov.l		L_SCR3(%a6),%d1
  15298	andi.b		&0xc0,%d1		# fetch rnd prec
  15299	beq.w		fsub_normal		# yes; no underflow occurred
  15300
  15301	mov.l		0x4(%sp),%d1
  15302	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
  15303	bne.w		fsub_normal		# no; no underflow occurred
  15304
  15305	tst.l		0x8(%sp)		# is lo(man) = 0x0?
  15306	bne.w		fsub_normal		# no; no underflow occurred
  15307
  15308	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
  15309	beq.w		fsub_normal		# no; no underflow occurred
  15310
  15311#
  15312# ok, so now the result has a exponent equal to the smallest normalized
  15313# exponent for the selected precision. also, the mantissa is equal to
  15314# 0x8000000000000000 and this mantissa is the result of rounding non-zero
  15315# g,r,s.
  15316# now, we must determine whether the pre-rounded result was an underflow
  15317# rounded "up" or a normalized number rounded "down".
  15318# so, we do this be re-executing the add using RZ as the rounding mode and
  15319# seeing if the new result is smaller or equal to the current result.
  15320#
  15321	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
  15322
  15323	mov.l		L_SCR3(%a6),%d1
  15324	andi.b		&0xc0,%d1		# keep rnd prec
  15325	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
  15326	fmov.l		%d1,%fpcr		# set FPCR
  15327	fmov.l		&0x0,%fpsr		# clear FPSR
  15328
  15329	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
  15330
  15331	fmov.l		&0x0,%fpcr		# clear FPCR
  15332
  15333	fabs.x		%fp0			# compare absolute values
  15334	fabs.x		%fp1
  15335	fcmp.x		%fp0,%fp1		# is first result > second?
  15336
  15337	fbgt.w		fsub_unfl		# yes; it's an underflow
  15338	bra.w		fsub_normal		# no; it's not an underflow
  15339
  15340##########################################################################
  15341
  15342#
  15343# Sub: inputs are not both normalized; what are they?
  15344#
  15345fsub_not_norm:
  15346	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
  15347	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
  15348
  15349	swbeg		&48
  15350tbl_fsub_op:
  15351	short		fsub_norm	- tbl_fsub_op # NORM - NORM
  15352	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
  15353	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
  15354	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
  15355	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
  15356	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
  15357	short		tbl_fsub_op	- tbl_fsub_op #
  15358	short		tbl_fsub_op	- tbl_fsub_op #
  15359
  15360	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
  15361	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
  15362	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
  15363	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
  15364	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
  15365	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
  15366	short		tbl_fsub_op	- tbl_fsub_op #
  15367	short		tbl_fsub_op	- tbl_fsub_op #
  15368
  15369	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
  15370	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
  15371	short		fsub_inf_2	- tbl_fsub_op # INF - INF
  15372	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
  15373	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
  15374	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
  15375	short		tbl_fsub_op	- tbl_fsub_op #
  15376	short		tbl_fsub_op	- tbl_fsub_op #
  15377
  15378	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
  15379	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
  15380	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
  15381	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
  15382	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
  15383	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
  15384	short		tbl_fsub_op	- tbl_fsub_op #
  15385	short		tbl_fsub_op	- tbl_fsub_op #
  15386
  15387	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
  15388	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
  15389	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
  15390	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
  15391	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
  15392	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
  15393	short		tbl_fsub_op	- tbl_fsub_op #
  15394	short		tbl_fsub_op	- tbl_fsub_op #
  15395
  15396	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
  15397	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
  15398	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
  15399	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
  15400	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
  15401	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
  15402	short		tbl_fsub_op	- tbl_fsub_op #
  15403	short		tbl_fsub_op	- tbl_fsub_op #
  15404
  15405fsub_res_qnan:
  15406	bra.l		res_qnan
  15407fsub_res_snan:
  15408	bra.l		res_snan
  15409
  15410#
  15411# both operands are ZEROes
  15412#
  15413fsub_zero_2:
  15414	mov.b		SRC_EX(%a0),%d0
  15415	mov.b		DST_EX(%a1),%d1
  15416	eor.b		%d1,%d0
  15417	bpl.b		fsub_zero_2_chk_rm
  15418
  15419# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
  15420	tst.b		%d0			# is dst negative?
  15421	bmi.b		fsub_zero_2_rm		# yes
  15422	fmov.s		&0x00000000,%fp0	# no; return +ZERO
  15423	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
  15424	rts
  15425
  15426#
  15427# the ZEROes have the same signs:
  15428# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
  15429# - -ZERO is returned in the case of RM.
  15430#
  15431fsub_zero_2_chk_rm:
  15432	mov.b		3+L_SCR3(%a6),%d1
  15433	andi.b		&0x30,%d1		# extract rnd mode
  15434	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
  15435	beq.b		fsub_zero_2_rm		# yes
  15436	fmov.s		&0x00000000,%fp0	# no; return +ZERO
  15437	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
  15438	rts
  15439
  15440fsub_zero_2_rm:
  15441	fmov.s		&0x80000000,%fp0	# return -ZERO
  15442	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
  15443	rts
  15444
  15445#
  15446# one operand is a ZERO and the other is a DENORM or a NORM.
  15447# scale the DENORM or NORM and jump to the regular fsub routine.
  15448#
  15449fsub_zero_dst:
  15450	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  15451	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  15452	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  15453	bsr.l		scale_to_zero_src	# scale the operand
  15454	clr.w		FP_SCR1_EX(%a6)
  15455	clr.l		FP_SCR1_HI(%a6)
  15456	clr.l		FP_SCR1_LO(%a6)
  15457	bra.w		fsub_zero_entry		# go execute fsub
  15458
  15459fsub_zero_src:
  15460	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
  15461	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
  15462	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
  15463	bsr.l		scale_to_zero_dst	# scale the operand
  15464	clr.w		FP_SCR0_EX(%a6)
  15465	clr.l		FP_SCR0_HI(%a6)
  15466	clr.l		FP_SCR0_LO(%a6)
  15467	bra.w		fsub_zero_entry		# go execute fsub
  15468
  15469#
  15470# both operands are INFs. an OPERR will result if the INFs have the
  15471# same signs. else,
  15472#
  15473fsub_inf_2:
  15474	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
  15475	mov.b		DST_EX(%a1),%d1
  15476	eor.b		%d1,%d0
  15477	bpl.l		res_operr		# weed out (-INF)+(+INF)
  15478
  15479# ok, so it's not an OPERR. but we do have to remember to return
  15480# the src INF since that's where the 881/882 gets the j-bit.
  15481
  15482fsub_inf_src:
  15483	fmovm.x		SRC(%a0),&0x80		# return src INF
  15484	fneg.x		%fp0			# invert sign
  15485	fbge.w		fsub_inf_done		# sign is now positive
  15486	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
  15487	rts
  15488
  15489fsub_inf_dst:
  15490	fmovm.x		DST(%a1),&0x80		# return dst INF
  15491	tst.b		DST_EX(%a1)		# is INF negative?
  15492	bpl.b		fsub_inf_done		# no
  15493	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
  15494	rts
  15495
  15496fsub_inf_done:
  15497	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
  15498	rts
  15499
  15500#########################################################################
  15501# XDEF ****************************************************************	#
  15502#	fsqrt(): emulates the fsqrt instruction				#
  15503#	fssqrt(): emulates the fssqrt instruction			#
  15504#	fdsqrt(): emulates the fdsqrt instruction			#
  15505#									#
  15506# XREF ****************************************************************	#
  15507#	scale_sqrt() - scale the source operand				#
  15508#	unf_res() - return default underflow result			#
  15509#	ovf_res() - return default overflow result			#
  15510#	res_qnan_1op() - return QNAN result				#
  15511#	res_snan_1op() - return SNAN result				#
  15512#									#
  15513# INPUT ***************************************************************	#
  15514#	a0 = pointer to extended precision source operand		#
  15515#	d0  rnd prec,mode						#
  15516#									#
  15517# OUTPUT **************************************************************	#
  15518#	fp0 = result							#
  15519#	fp1 = EXOP (if exception occurred)				#
  15520#									#
  15521# ALGORITHM ***********************************************************	#
  15522#	Handle NANs, infinities, and zeroes as special cases. Divide	#
  15523# norms/denorms into ext/sgl/dbl precision.				#
  15524#	For norms/denorms, scale the exponents such that a sqrt		#
  15525# instruction won't cause an exception. Use the regular fsqrt to	#
  15526# compute a result. Check if the regular operands would have taken	#
  15527# an exception. If so, return the default overflow/underflow result	#
  15528# and return the EXOP if exceptions are enabled. Else, scale the	#
  15529# result operand to the proper exponent.				#
  15530#									#
  15531#########################################################################
  15532
  15533	global		fssqrt
  15534fssqrt:
  15535	andi.b		&0x30,%d0		# clear rnd prec
  15536	ori.b		&s_mode*0x10,%d0	# insert sgl precision
  15537	bra.b		fsqrt
  15538
  15539	global		fdsqrt
  15540fdsqrt:
  15541	andi.b		&0x30,%d0		# clear rnd prec
  15542	ori.b		&d_mode*0x10,%d0	# insert dbl precision
  15543
  15544	global		fsqrt
  15545fsqrt:
  15546	mov.l		%d0,L_SCR3(%a6)		# store rnd info
  15547	clr.w		%d1
  15548	mov.b		STAG(%a6),%d1
  15549	bne.w		fsqrt_not_norm		# optimize on non-norm input
  15550
  15551#
  15552# SQUARE ROOT: norms and denorms ONLY!
  15553#
  15554fsqrt_norm:
  15555	tst.b		SRC_EX(%a0)		# is operand negative?
  15556	bmi.l		res_operr		# yes
  15557
  15558	andi.b		&0xc0,%d0		# is precision extended?
  15559	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
  15560
  15561	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  15562	fmov.l		&0x0,%fpsr		# clear FPSR
  15563
  15564	fsqrt.x		(%a0),%fp0		# execute square root
  15565
  15566	fmov.l		%fpsr,%d1
  15567	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
  15568
  15569	rts
  15570
  15571fsqrt_denorm:
  15572	tst.b		SRC_EX(%a0)		# is operand negative?
  15573	bmi.l		res_operr		# yes
  15574
  15575	andi.b		&0xc0,%d0		# is precision extended?
  15576	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
  15577
  15578	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  15579	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  15580	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  15581
  15582	bsr.l		scale_sqrt		# calculate scale factor
  15583
  15584	bra.w		fsqrt_sd_normal
  15585
  15586#
  15587# operand is either single or double
  15588#
  15589fsqrt_not_ext:
  15590	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
  15591	bne.w		fsqrt_dbl
  15592
  15593#
  15594# operand is to be rounded to single precision
  15595#
  15596fsqrt_sgl:
  15597	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  15598	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  15599	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  15600
  15601	bsr.l		scale_sqrt		# calculate scale factor
  15602
  15603	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
  15604	beq.w		fsqrt_sd_may_unfl
  15605	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
  15606	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
  15607	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
  15608	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
  15609
  15610#
  15611# operand will NOT overflow or underflow when moved in to the fp reg file
  15612#
  15613fsqrt_sd_normal:
  15614	fmov.l		&0x0,%fpsr		# clear FPSR
  15615	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  15616
  15617	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
  15618
  15619	fmov.l		%fpsr,%d1		# save FPSR
  15620	fmov.l		&0x0,%fpcr		# clear FPCR
  15621
  15622	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  15623
  15624fsqrt_sd_normal_exit:
  15625	mov.l		%d2,-(%sp)		# save d2
  15626	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  15627	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
  15628	mov.l		%d1,%d2			# make a copy
  15629	andi.l		&0x7fff,%d1		# strip sign
  15630	sub.l		%d0,%d1			# add scale factor
  15631	andi.w		&0x8000,%d2		# keep old sign
  15632	or.w		%d1,%d2			# concat old sign,new exp
  15633	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
  15634	mov.l		(%sp)+,%d2		# restore d2
  15635	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
  15636	rts
  15637
  15638#
  15639# operand is to be rounded to double precision
  15640#
  15641fsqrt_dbl:
  15642	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  15643	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  15644	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  15645
  15646	bsr.l		scale_sqrt		# calculate scale factor
  15647
  15648	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
  15649	beq.w		fsqrt_sd_may_unfl
  15650	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
  15651	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
  15652	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
  15653	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
  15654	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
  15655
  15656# we're on the line here and the distinguising characteristic is whether
  15657# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
  15658# elsewise fall through to underflow.
  15659fsqrt_sd_may_unfl:
  15660	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
  15661	bne.w		fsqrt_sd_normal		# yes, so no underflow
  15662
  15663#
  15664# operand WILL underflow when moved in to the fp register file
  15665#
  15666fsqrt_sd_unfl:
  15667	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
  15668
  15669	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
  15670	fmov.l		&0x0,%fpsr		# clear FPSR
  15671
  15672	fsqrt.x		FP_SCR0(%a6),%fp0	# execute square root
  15673
  15674	fmov.l		%fpsr,%d1		# save status
  15675	fmov.l		&0x0,%fpcr		# clear FPCR
  15676
  15677	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  15678
  15679# if underflow or inexact is enabled, go calculate EXOP first.
  15680	mov.b		FPCR_ENABLE(%a6),%d1
  15681	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
  15682	bne.b		fsqrt_sd_unfl_ena	# yes
  15683
  15684fsqrt_sd_unfl_dis:
  15685	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
  15686
  15687	lea		FP_SCR0(%a6),%a0	# pass: result addr
  15688	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  15689	bsr.l		unf_res			# calculate default result
  15690	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
  15691	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
  15692	rts
  15693
  15694#
  15695# operand will underflow AND underflow is enabled.
  15696# Therefore, we must return the result rounded to extended precision.
  15697#
  15698fsqrt_sd_unfl_ena:
  15699	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
  15700	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
  15701	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
  15702
  15703	mov.l		%d2,-(%sp)		# save d2
  15704	mov.l		%d1,%d2			# make a copy
  15705	andi.l		&0x7fff,%d1		# strip sign
  15706	andi.w		&0x8000,%d2		# keep old sign
  15707	sub.l		%d0,%d1			# subtract scale factor
  15708	addi.l		&0x6000,%d1		# add new bias
  15709	andi.w		&0x7fff,%d1
  15710	or.w		%d2,%d1			# concat new sign,new exp
  15711	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
  15712	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
  15713	mov.l		(%sp)+,%d2		# restore d2
  15714	bra.b		fsqrt_sd_unfl_dis
  15715
  15716#
  15717# operand WILL overflow.
  15718#
  15719fsqrt_sd_ovfl:
  15720	fmov.l		&0x0,%fpsr		# clear FPSR
  15721	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  15722
  15723	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
  15724
  15725	fmov.l		&0x0,%fpcr		# clear FPCR
  15726	fmov.l		%fpsr,%d1		# save FPSR
  15727
  15728	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  15729
  15730fsqrt_sd_ovfl_tst:
  15731	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
  15732
  15733	mov.b		FPCR_ENABLE(%a6),%d1
  15734	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
  15735	bne.b		fsqrt_sd_ovfl_ena	# yes
  15736
  15737#
  15738# OVFL is not enabled; therefore, we must create the default result by
  15739# calling ovf_res().
  15740#
  15741fsqrt_sd_ovfl_dis:
  15742	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
  15743	sne		%d1			# set sign param accordingly
  15744	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
  15745	bsr.l		ovf_res			# calculate default result
  15746	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
  15747	fmovm.x		(%a0),&0x80		# return default result in fp0
  15748	rts
  15749
  15750#
  15751# OVFL is enabled.
  15752# the INEX2 bit has already been updated by the round to the correct precision.
  15753# now, round to extended(and don't alter the FPSR).
  15754#
  15755fsqrt_sd_ovfl_ena:
  15756	mov.l		%d2,-(%sp)		# save d2
  15757	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
  15758	mov.l		%d1,%d2			# make a copy
  15759	andi.l		&0x7fff,%d1		# strip sign
  15760	andi.w		&0x8000,%d2		# keep old sign
  15761	sub.l		%d0,%d1			# add scale factor
  15762	subi.l		&0x6000,%d1		# subtract bias
  15763	andi.w		&0x7fff,%d1
  15764	or.w		%d2,%d1			# concat sign,exp
  15765	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
  15766	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  15767	mov.l		(%sp)+,%d2		# restore d2
  15768	bra.b		fsqrt_sd_ovfl_dis
  15769
  15770#
  15771# the move in MAY underflow. so...
  15772#
  15773fsqrt_sd_may_ovfl:
  15774	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
  15775	bne.w		fsqrt_sd_ovfl		# yes, so overflow
  15776
  15777	fmov.l		&0x0,%fpsr		# clear FPSR
  15778	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  15779
  15780	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
  15781
  15782	fmov.l		%fpsr,%d1		# save status
  15783	fmov.l		&0x0,%fpcr		# clear FPCR
  15784
  15785	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
  15786
  15787	fmov.x		%fp0,%fp1		# make a copy of result
  15788	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
  15789	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
  15790
  15791# no, it didn't overflow; we have correct result
  15792	bra.w		fsqrt_sd_normal_exit
  15793
  15794##########################################################################
  15795
  15796#
  15797# input is not normalized; what is it?
  15798#
  15799fsqrt_not_norm:
  15800	cmpi.b		%d1,&DENORM		# weed out DENORM
  15801	beq.w		fsqrt_denorm
  15802	cmpi.b		%d1,&ZERO		# weed out ZERO
  15803	beq.b		fsqrt_zero
  15804	cmpi.b		%d1,&INF		# weed out INF
  15805	beq.b		fsqrt_inf
  15806	cmpi.b		%d1,&SNAN		# weed out SNAN
  15807	beq.l		res_snan_1op
  15808	bra.l		res_qnan_1op
  15809
  15810#
  15811#	fsqrt(+0) = +0
  15812#	fsqrt(-0) = -0
  15813#	fsqrt(+INF) = +INF
  15814#	fsqrt(-INF) = OPERR
  15815#
  15816fsqrt_zero:
  15817	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
  15818	bmi.b		fsqrt_zero_m		# negative
  15819fsqrt_zero_p:
  15820	fmov.s		&0x00000000,%fp0	# return +ZERO
  15821	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
  15822	rts
  15823fsqrt_zero_m:
  15824	fmov.s		&0x80000000,%fp0	# return -ZERO
  15825	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
  15826	rts
  15827
  15828fsqrt_inf:
  15829	tst.b		SRC_EX(%a0)		# is INF positive or negative?
  15830	bmi.l		res_operr		# negative
  15831fsqrt_inf_p:
  15832	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
  15833	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
  15834	rts
  15835
  15836##########################################################################
  15837
  15838#########################################################################
  15839# XDEF ****************************************************************	#
  15840#	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
  15841#			  OVFL/UNFL exceptions will result		#
  15842#									#
  15843# XREF ****************************************************************	#
  15844#	norm() - normalize mantissa after adjusting exponent		#
  15845#									#
  15846# INPUT ***************************************************************	#
  15847#	FP_SRC(a6) = fp op1(src)					#
  15848#	FP_DST(a6) = fp op2(dst)					#
  15849#									#
  15850# OUTPUT **************************************************************	#
  15851#	FP_SRC(a6) = fp op1 scaled(src)					#
  15852#	FP_DST(a6) = fp op2 scaled(dst)					#
  15853#	d0         = scale amount					#
  15854#									#
  15855# ALGORITHM ***********************************************************	#
  15856#	If the DST exponent is > the SRC exponent, set the DST exponent	#
  15857# equal to 0x3fff and scale the SRC exponent by the value that the	#
  15858# DST exponent was scaled by. If the SRC exponent is greater or equal,	#
  15859# do the opposite. Return this scale factor in d0.			#
  15860#	If the two exponents differ by > the number of mantissa bits	#
  15861# plus two, then set the smallest exponent to a very small value as a	#
  15862# quick shortcut.							#
  15863#									#
  15864#########################################################################
  15865
  15866	global		addsub_scaler2
  15867addsub_scaler2:
  15868	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  15869	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
  15870	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  15871	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
  15872	mov.w		SRC_EX(%a0),%d0
  15873	mov.w		DST_EX(%a1),%d1
  15874	mov.w		%d0,FP_SCR0_EX(%a6)
  15875	mov.w		%d1,FP_SCR1_EX(%a6)
  15876
  15877	andi.w		&0x7fff,%d0
  15878	andi.w		&0x7fff,%d1
  15879	mov.w		%d0,L_SCR1(%a6)		# store src exponent
  15880	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
  15881
  15882	cmp.w		%d0, %d1		# is src exp >= dst exp?
  15883	bge.l		src_exp_ge2
  15884
  15885# dst exp is >  src exp; scale dst to exp = 0x3fff
  15886dst_exp_gt2:
  15887	bsr.l		scale_to_zero_dst
  15888	mov.l		%d0,-(%sp)		# save scale factor
  15889
  15890	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
  15891	bne.b		cmpexp12
  15892
  15893	lea		FP_SCR0(%a6),%a0
  15894	bsr.l		norm			# normalize the denorm; result is new exp
  15895	neg.w		%d0			# new exp = -(shft val)
  15896	mov.w		%d0,L_SCR1(%a6)		# inset new exp
  15897
  15898cmpexp12:
  15899	mov.w		2+L_SCR1(%a6),%d0
  15900	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
  15901
  15902	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
  15903	bge.b		quick_scale12
  15904
  15905	mov.w		L_SCR1(%a6),%d0
  15906	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
  15907	mov.w		FP_SCR0_EX(%a6),%d1
  15908	and.w		&0x8000,%d1
  15909	or.w		%d1,%d0			# concat {sgn,new exp}
  15910	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
  15911
  15912	mov.l		(%sp)+,%d0		# return SCALE factor
  15913	rts
  15914
  15915quick_scale12:
  15916	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
  15917	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
  15918
  15919	mov.l		(%sp)+,%d0		# return SCALE factor
  15920	rts
  15921
  15922# src exp is >= dst exp; scale src to exp = 0x3fff
  15923src_exp_ge2:
  15924	bsr.l		scale_to_zero_src
  15925	mov.l		%d0,-(%sp)		# save scale factor
  15926
  15927	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
  15928	bne.b		cmpexp22
  15929	lea		FP_SCR1(%a6),%a0
  15930	bsr.l		norm			# normalize the denorm; result is new exp
  15931	neg.w		%d0			# new exp = -(shft val)
  15932	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
  15933
  15934cmpexp22:
  15935	mov.w		L_SCR1(%a6),%d0
  15936	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
  15937
  15938	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
  15939	bge.b		quick_scale22
  15940
  15941	mov.w		2+L_SCR1(%a6),%d0
  15942	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
  15943	mov.w		FP_SCR1_EX(%a6),%d1
  15944	andi.w		&0x8000,%d1
  15945	or.w		%d1,%d0			# concat {sgn,new exp}
  15946	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
  15947
  15948	mov.l		(%sp)+,%d0		# return SCALE factor
  15949	rts
  15950
  15951quick_scale22:
  15952	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
  15953	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
  15954
  15955	mov.l		(%sp)+,%d0		# return SCALE factor
  15956	rts
  15957
  15958##########################################################################
  15959
  15960#########################################################################
  15961# XDEF ****************************************************************	#
  15962#	scale_to_zero_src(): scale the exponent of extended precision	#
  15963#			     value at FP_SCR0(a6).			#
  15964#									#
  15965# XREF ****************************************************************	#
  15966#	norm() - normalize the mantissa if the operand was a DENORM	#
  15967#									#
  15968# INPUT ***************************************************************	#
  15969#	FP_SCR0(a6) = extended precision operand to be scaled		#
  15970#									#
  15971# OUTPUT **************************************************************	#
  15972#	FP_SCR0(a6) = scaled extended precision operand			#
  15973#	d0	    = scale value					#
  15974#									#
  15975# ALGORITHM ***********************************************************	#
  15976#	Set the exponent of the input operand to 0x3fff. Save the value	#
  15977# of the difference between the original and new exponent. Then,	#
  15978# normalize the operand if it was a DENORM. Add this normalization	#
  15979# value to the previous value. Return the result.			#
  15980#									#
  15981#########################################################################
  15982
  15983	global		scale_to_zero_src
  15984scale_to_zero_src:
  15985	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
  15986	mov.w		%d1,%d0			# make a copy
  15987
  15988	andi.l		&0x7fff,%d1		# extract operand's exponent
  15989
  15990	andi.w		&0x8000,%d0		# extract operand's sgn
  15991	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
  15992
  15993	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
  15994
  15995	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
  15996	beq.b		stzs_denorm		# normalize the DENORM
  15997
  15998stzs_norm:
  15999	mov.l		&0x3fff,%d0
  16000	sub.l		%d1,%d0			# scale = BIAS + (-exp)
  16001
  16002	rts
  16003
  16004stzs_denorm:
  16005	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
  16006	bsr.l		norm			# normalize denorm
  16007	neg.l		%d0			# new exponent = -(shft val)
  16008	mov.l		%d0,%d1			# prepare for op_norm call
  16009	bra.b		stzs_norm		# finish scaling
  16010
  16011###
  16012
  16013#########################################################################
  16014# XDEF ****************************************************************	#
  16015#	scale_sqrt(): scale the input operand exponent so a subsequent	#
  16016#		      fsqrt operation won't take an exception.		#
  16017#									#
  16018# XREF ****************************************************************	#
  16019#	norm() - normalize the mantissa if the operand was a DENORM	#
  16020#									#
  16021# INPUT ***************************************************************	#
  16022#	FP_SCR0(a6) = extended precision operand to be scaled		#
  16023#									#
  16024# OUTPUT **************************************************************	#
  16025#	FP_SCR0(a6) = scaled extended precision operand			#
  16026#	d0	    = scale value					#
  16027#									#
  16028# ALGORITHM ***********************************************************	#
  16029#	If the input operand is a DENORM, normalize it.			#
  16030#	If the exponent of the input operand is even, set the exponent	#
  16031# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the	#
  16032# exponent of the input operand is off, set the exponent to ox3fff and	#
  16033# return a scale factor of "(exp-0x3fff)/2".				#
  16034#									#
  16035#########################################################################
  16036
  16037	global		scale_sqrt
  16038scale_sqrt:
  16039	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
  16040	beq.b		ss_denorm		# normalize the DENORM
  16041
  16042	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
  16043	andi.l		&0x7fff,%d1		# extract operand's exponent
  16044
  16045	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
  16046
  16047	btst		&0x0,%d1		# is exp even or odd?
  16048	beq.b		ss_norm_even
  16049
  16050	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
  16051
  16052	mov.l		&0x3fff,%d0
  16053	sub.l		%d1,%d0			# scale = BIAS + (-exp)
  16054	asr.l		&0x1,%d0		# divide scale factor by 2
  16055	rts
  16056
  16057ss_norm_even:
  16058	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
  16059
  16060	mov.l		&0x3ffe,%d0
  16061	sub.l		%d1,%d0			# scale = BIAS + (-exp)
  16062	asr.l		&0x1,%d0		# divide scale factor by 2
  16063	rts
  16064
  16065ss_denorm:
  16066	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
  16067	bsr.l		norm			# normalize denorm
  16068
  16069	btst		&0x0,%d0		# is exp even or odd?
  16070	beq.b		ss_denorm_even
  16071
  16072	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
  16073
  16074	add.l		&0x3fff,%d0
  16075	asr.l		&0x1,%d0		# divide scale factor by 2
  16076	rts
  16077
  16078ss_denorm_even:
  16079	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
  16080
  16081	add.l		&0x3ffe,%d0
  16082	asr.l		&0x1,%d0		# divide scale factor by 2
  16083	rts
  16084
  16085###
  16086
  16087#########################################################################
  16088# XDEF ****************************************************************	#
  16089#	scale_to_zero_dst(): scale the exponent of extended precision	#
  16090#			     value at FP_SCR1(a6).			#
  16091#									#
  16092# XREF ****************************************************************	#
  16093#	norm() - normalize the mantissa if the operand was a DENORM	#
  16094#									#
  16095# INPUT ***************************************************************	#
  16096#	FP_SCR1(a6) = extended precision operand to be scaled		#
  16097#									#
  16098# OUTPUT **************************************************************	#
  16099#	FP_SCR1(a6) = scaled extended precision operand			#
  16100#	d0	    = scale value					#
  16101#									#
  16102# ALGORITHM ***********************************************************	#
  16103#	Set the exponent of the input operand to 0x3fff. Save the value	#
  16104# of the difference between the original and new exponent. Then,	#
  16105# normalize the operand if it was a DENORM. Add this normalization	#
  16106# value to the previous value. Return the result.			#
  16107#									#
  16108#########################################################################
  16109
  16110	global		scale_to_zero_dst
  16111scale_to_zero_dst:
  16112	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
  16113	mov.w		%d1,%d0			# make a copy
  16114
  16115	andi.l		&0x7fff,%d1		# extract operand's exponent
  16116
  16117	andi.w		&0x8000,%d0		# extract operand's sgn
  16118	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
  16119
  16120	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
  16121
  16122	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
  16123	beq.b		stzd_denorm		# normalize the DENORM
  16124
  16125stzd_norm:
  16126	mov.l		&0x3fff,%d0
  16127	sub.l		%d1,%d0			# scale = BIAS + (-exp)
  16128	rts
  16129
  16130stzd_denorm:
  16131	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
  16132	bsr.l		norm			# normalize denorm
  16133	neg.l		%d0			# new exponent = -(shft val)
  16134	mov.l		%d0,%d1			# prepare for op_norm call
  16135	bra.b		stzd_norm		# finish scaling
  16136
  16137##########################################################################
  16138
  16139#########################################################################
  16140# XDEF ****************************************************************	#
  16141#	res_qnan(): return default result w/ QNAN operand for dyadic	#
  16142#	res_snan(): return default result w/ SNAN operand for dyadic	#
  16143#	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
  16144#	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
  16145#									#
  16146# XREF ****************************************************************	#
  16147#	None								#
  16148#									#
  16149# INPUT ***************************************************************	#
  16150#	FP_SRC(a6) = pointer to extended precision src operand		#
  16151#	FP_DST(a6) = pointer to extended precision dst operand		#
  16152#									#
  16153# OUTPUT **************************************************************	#
  16154#	fp0 = default result						#
  16155#									#
  16156# ALGORITHM ***********************************************************	#
  16157#	If either operand (but not both operands) of an operation is a	#
  16158# nonsignalling NAN, then that NAN is returned as the result. If both	#
  16159# operands are nonsignalling NANs, then the destination operand		#
  16160# nonsignalling NAN is returned as the result.				#
  16161#	If either operand to an operation is a signalling NAN (SNAN),	#
  16162# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
  16163# enable bit is set in the FPCR, then the trap is taken and the		#
  16164# destination is not modified. If the SNAN trap enable bit is not set,	#
  16165# then the SNAN is converted to a nonsignalling NAN (by setting the	#
  16166# SNAN bit in the operand to one), and the operation continues as	#
  16167# described in the preceding paragraph, for nonsignalling NANs.		#
  16168#	Make sure the appropriate FPSR bits are set before exiting.	#
  16169#									#
  16170#########################################################################
  16171
  16172	global		res_qnan
  16173	global		res_snan
  16174res_qnan:
  16175res_snan:
  16176	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
  16177	beq.b		dst_snan2
  16178	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
  16179	beq.b		dst_qnan2
  16180src_nan:
  16181	cmp.b		STAG(%a6), &QNAN
  16182	beq.b		src_qnan2
  16183	global		res_snan_1op
  16184res_snan_1op:
  16185src_snan2:
  16186	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
  16187	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
  16188	lea		FP_SRC(%a6), %a0
  16189	bra.b		nan_comp
  16190	global		res_qnan_1op
  16191res_qnan_1op:
  16192src_qnan2:
  16193	or.l		&nan_mask, USER_FPSR(%a6)
  16194	lea		FP_SRC(%a6), %a0
  16195	bra.b		nan_comp
  16196dst_snan2:
  16197	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
  16198	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
  16199	lea		FP_DST(%a6), %a0
  16200	bra.b		nan_comp
  16201dst_qnan2:
  16202	lea		FP_DST(%a6), %a0
  16203	cmp.b		STAG(%a6), &SNAN
  16204	bne		nan_done
  16205	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
  16206nan_done:
  16207	or.l		&nan_mask, USER_FPSR(%a6)
  16208nan_comp:
  16209	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
  16210	beq.b		nan_not_neg
  16211	or.l		&neg_mask, USER_FPSR(%a6)
  16212nan_not_neg:
  16213	fmovm.x		(%a0), &0x80
  16214	rts
  16215
  16216#########################################################################
  16217# XDEF ****************************************************************	#
  16218#	res_operr(): return default result during operand error		#
  16219#									#
  16220# XREF ****************************************************************	#
  16221#	None								#
  16222#									#
  16223# INPUT ***************************************************************	#
  16224#	None								#
  16225#									#
  16226# OUTPUT **************************************************************	#
  16227#	fp0 = default operand error result				#
  16228#									#
  16229# ALGORITHM ***********************************************************	#
  16230#	An nonsignalling NAN is returned as the default result when	#
  16231# an operand error occurs for the following cases:			#
  16232#									#
  16233#	Multiply: (Infinity x Zero)					#
  16234#	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
  16235#									#
  16236#########################################################################
  16237
  16238	global		res_operr
  16239res_operr:
  16240	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
  16241	fmovm.x		nan_return(%pc), &0x80
  16242	rts
  16243
  16244nan_return:
  16245	long		0x7fff0000, 0xffffffff, 0xffffffff
  16246
  16247#########################################################################
  16248# fdbcc(): routine to emulate the fdbcc instruction			#
  16249#									#
  16250# XDEF **************************************************************** #
  16251#	_fdbcc()							#
  16252#									#
  16253# XREF **************************************************************** #
  16254#	fetch_dreg() - fetch Dn value					#
  16255#	store_dreg_l() - store updated Dn value				#
  16256#									#
  16257# INPUT ***************************************************************	#
  16258#	d0 = displacement						#
  16259#									#
  16260# OUTPUT ************************************************************** #
  16261#	none								#
  16262#									#
  16263# ALGORITHM ***********************************************************	#
  16264#	This routine checks which conditional predicate is specified by	#
  16265# the stacked fdbcc instruction opcode and then branches to a routine	#
  16266# for that predicate. The corresponding fbcc instruction is then used	#
  16267# to see whether the condition (specified by the stacked FPSR) is true	#
  16268# or false.								#
  16269#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
  16270# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
  16271# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
  16272# enabled BSUN should not be flagged and the predicate is true, then	#
  16273# Dn is fetched and decremented by one. If Dn is not equal to -1, add	#
  16274# the displacement value to the stacked PC so that when an "rte" is	#
  16275# finally executed, the branch occurs.					#
  16276#									#
  16277#########################################################################
  16278	global		_fdbcc
  16279_fdbcc:
  16280	mov.l		%d0,L_SCR1(%a6)		# save displacement
  16281
  16282	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
  16283
  16284	clr.l		%d1			# clear scratch reg
  16285	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
  16286	ror.l		&0x8,%d1		# rotate to top byte
  16287	fmov.l		%d1,%fpsr		# insert into FPSR
  16288
  16289	mov.w		(tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table
  16290	jmp		(tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine
  16291
  16292tbl_fdbcc:
  16293	short		fdbcc_f		-	tbl_fdbcc	# 00
  16294	short		fdbcc_eq	-	tbl_fdbcc	# 01
  16295	short		fdbcc_ogt	-	tbl_fdbcc	# 02
  16296	short		fdbcc_oge	-	tbl_fdbcc	# 03
  16297	short		fdbcc_olt	-	tbl_fdbcc	# 04
  16298	short		fdbcc_ole	-	tbl_fdbcc	# 05
  16299	short		fdbcc_ogl	-	tbl_fdbcc	# 06
  16300	short		fdbcc_or	-	tbl_fdbcc	# 07
  16301	short		fdbcc_un	-	tbl_fdbcc	# 08
  16302	short		fdbcc_ueq	-	tbl_fdbcc	# 09
  16303	short		fdbcc_ugt	-	tbl_fdbcc	# 10
  16304	short		fdbcc_uge	-	tbl_fdbcc	# 11
  16305	short		fdbcc_ult	-	tbl_fdbcc	# 12
  16306	short		fdbcc_ule	-	tbl_fdbcc	# 13
  16307	short		fdbcc_neq	-	tbl_fdbcc	# 14
  16308	short		fdbcc_t		-	tbl_fdbcc	# 15
  16309	short		fdbcc_sf	-	tbl_fdbcc	# 16
  16310	short		fdbcc_seq	-	tbl_fdbcc	# 17
  16311	short		fdbcc_gt	-	tbl_fdbcc	# 18
  16312	short		fdbcc_ge	-	tbl_fdbcc	# 19
  16313	short		fdbcc_lt	-	tbl_fdbcc	# 20
  16314	short		fdbcc_le	-	tbl_fdbcc	# 21
  16315	short		fdbcc_gl	-	tbl_fdbcc	# 22
  16316	short		fdbcc_gle	-	tbl_fdbcc	# 23
  16317	short		fdbcc_ngle	-	tbl_fdbcc	# 24
  16318	short		fdbcc_ngl	-	tbl_fdbcc	# 25
  16319	short		fdbcc_nle	-	tbl_fdbcc	# 26
  16320	short		fdbcc_nlt	-	tbl_fdbcc	# 27
  16321	short		fdbcc_nge	-	tbl_fdbcc	# 28
  16322	short		fdbcc_ngt	-	tbl_fdbcc	# 29
  16323	short		fdbcc_sneq	-	tbl_fdbcc	# 30
  16324	short		fdbcc_st	-	tbl_fdbcc	# 31
  16325
  16326#########################################################################
  16327#									#
  16328# IEEE Nonaware tests							#
  16329#									#
  16330# For the IEEE nonaware tests, only the false branch changes the	#
  16331# counter. However, the true branch may set bsun so we check to see	#
  16332# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
  16333#									#
  16334# The cases EQ and NE are shared by the Aware and Nonaware groups	#
  16335# and are incapable of setting the BSUN exception bit.			#
  16336#									#
  16337# Typically, only one of the two possible branch directions could	#
  16338# have the NAN bit set.							#
  16339# (This is assuming the mutual exclusiveness of FPSR cc bit groupings	#
  16340#  is preserved.)							#
  16341#									#
  16342#########################################################################
  16343
  16344#
  16345# equal:
  16346#
  16347#	Z
  16348#
  16349fdbcc_eq:
  16350	fbeq.w		fdbcc_eq_yes		# equal?
  16351fdbcc_eq_no:
  16352	bra.w		fdbcc_false		# no; go handle counter
  16353fdbcc_eq_yes:
  16354	rts
  16355
  16356#
  16357# not equal:
  16358#	_
  16359#	Z
  16360#
  16361fdbcc_neq:
  16362	fbneq.w		fdbcc_neq_yes		# not equal?
  16363fdbcc_neq_no:
  16364	bra.w		fdbcc_false		# no; go handle counter
  16365fdbcc_neq_yes:
  16366	rts
  16367
  16368#
  16369# greater than:
  16370#	_______
  16371#	NANvZvN
  16372#
  16373fdbcc_gt:
  16374	fbgt.w		fdbcc_gt_yes		# greater than?
  16375	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  16376	beq.w		fdbcc_false		# no;go handle counter
  16377	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16378	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16379	bne.w		fdbcc_bsun		# yes; we have an exception
  16380	bra.w		fdbcc_false		# no; go handle counter
  16381fdbcc_gt_yes:
  16382	rts					# do nothing
  16383
  16384#
  16385# not greater than:
  16386#
  16387#	NANvZvN
  16388#
  16389fdbcc_ngt:
  16390	fbngt.w		fdbcc_ngt_yes		# not greater than?
  16391fdbcc_ngt_no:
  16392	bra.w		fdbcc_false		# no; go handle counter
  16393fdbcc_ngt_yes:
  16394	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  16395	beq.b		fdbcc_ngt_done		# no;go finish
  16396	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16397	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16398	bne.w		fdbcc_bsun		# yes; we have an exception
  16399fdbcc_ngt_done:
  16400	rts					# no; do nothing
  16401
  16402#
  16403# greater than or equal:
  16404#	   _____
  16405#	Zv(NANvN)
  16406#
  16407fdbcc_ge:
  16408	fbge.w		fdbcc_ge_yes		# greater than or equal?
  16409fdbcc_ge_no:
  16410	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  16411	beq.w		fdbcc_false		# no;go handle counter
  16412	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16413	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16414	bne.w		fdbcc_bsun		# yes; we have an exception
  16415	bra.w		fdbcc_false		# no; go handle counter
  16416fdbcc_ge_yes:
  16417	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  16418	beq.b		fdbcc_ge_yes_done	# no;go do nothing
  16419	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16420	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16421	bne.w		fdbcc_bsun		# yes; we have an exception
  16422fdbcc_ge_yes_done:
  16423	rts					# do nothing
  16424
  16425#
  16426# not (greater than or equal):
  16427#	       _
  16428#	NANv(N^Z)
  16429#
  16430fdbcc_nge:
  16431	fbnge.w		fdbcc_nge_yes		# not (greater than or equal)?
  16432fdbcc_nge_no:
  16433	bra.w		fdbcc_false		# no; go handle counter
  16434fdbcc_nge_yes:
  16435	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  16436	beq.b		fdbcc_nge_done		# no;go finish
  16437	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16438	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16439	bne.w		fdbcc_bsun		# yes; we have an exception
  16440fdbcc_nge_done:
  16441	rts					# no; do nothing
  16442
  16443#
  16444# less than:
  16445#	   _____
  16446#	N^(NANvZ)
  16447#
  16448fdbcc_lt:
  16449	fblt.w		fdbcc_lt_yes		# less than?
  16450fdbcc_lt_no:
  16451	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  16452	beq.w		fdbcc_false		# no; go handle counter
  16453	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16454	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16455	bne.w		fdbcc_bsun		# yes; we have an exception
  16456	bra.w		fdbcc_false		# no; go handle counter
  16457fdbcc_lt_yes:
  16458	rts					# do nothing
  16459
  16460#
  16461# not less than:
  16462#	       _
  16463#	NANv(ZvN)
  16464#
  16465fdbcc_nlt:
  16466	fbnlt.w		fdbcc_nlt_yes		# not less than?
  16467fdbcc_nlt_no:
  16468	bra.w		fdbcc_false		# no; go handle counter
  16469fdbcc_nlt_yes:
  16470	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  16471	beq.b		fdbcc_nlt_done		# no;go finish
  16472	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16473	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16474	bne.w		fdbcc_bsun		# yes; we have an exception
  16475fdbcc_nlt_done:
  16476	rts					# no; do nothing
  16477
  16478#
  16479# less than or equal:
  16480#	     ___
  16481#	Zv(N^NAN)
  16482#
  16483fdbcc_le:
  16484	fble.w		fdbcc_le_yes		# less than or equal?
  16485fdbcc_le_no:
  16486	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  16487	beq.w		fdbcc_false		# no; go handle counter
  16488	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16489	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16490	bne.w		fdbcc_bsun		# yes; we have an exception
  16491	bra.w		fdbcc_false		# no; go handle counter
  16492fdbcc_le_yes:
  16493	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  16494	beq.b		fdbcc_le_yes_done	# no; go do nothing
  16495	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16496	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16497	bne.w		fdbcc_bsun		# yes; we have an exception
  16498fdbcc_le_yes_done:
  16499	rts					# do nothing
  16500
  16501#
  16502# not (less than or equal):
  16503#	     ___
  16504#	NANv(NvZ)
  16505#
  16506fdbcc_nle:
  16507	fbnle.w		fdbcc_nle_yes		# not (less than or equal)?
  16508fdbcc_nle_no:
  16509	bra.w		fdbcc_false		# no; go handle counter
  16510fdbcc_nle_yes:
  16511	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  16512	beq.w		fdbcc_nle_done		# no; go finish
  16513	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16514	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16515	bne.w		fdbcc_bsun		# yes; we have an exception
  16516fdbcc_nle_done:
  16517	rts					# no; do nothing
  16518
  16519#
  16520# greater or less than:
  16521#	_____
  16522#	NANvZ
  16523#
  16524fdbcc_gl:
  16525	fbgl.w		fdbcc_gl_yes		# greater or less than?
  16526fdbcc_gl_no:
  16527	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  16528	beq.w		fdbcc_false		# no; handle counter
  16529	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16530	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16531	bne.w		fdbcc_bsun		# yes; we have an exception
  16532	bra.w		fdbcc_false		# no; go handle counter
  16533fdbcc_gl_yes:
  16534	rts					# do nothing
  16535
  16536#
  16537# not (greater or less than):
  16538#
  16539#	NANvZ
  16540#
  16541fdbcc_ngl:
  16542	fbngl.w		fdbcc_ngl_yes		# not (greater or less than)?
  16543fdbcc_ngl_no:
  16544	bra.w		fdbcc_false		# no; go handle counter
  16545fdbcc_ngl_yes:
  16546	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  16547	beq.b		fdbcc_ngl_done		# no; go finish
  16548	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16549	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16550	bne.w		fdbcc_bsun		# yes; we have an exception
  16551fdbcc_ngl_done:
  16552	rts					# no; do nothing
  16553
  16554#
  16555# greater, less, or equal:
  16556#	___
  16557#	NAN
  16558#
  16559fdbcc_gle:
  16560	fbgle.w		fdbcc_gle_yes		# greater, less, or equal?
  16561fdbcc_gle_no:
  16562	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16563	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16564	bne.w		fdbcc_bsun		# yes; we have an exception
  16565	bra.w		fdbcc_false		# no; go handle counter
  16566fdbcc_gle_yes:
  16567	rts					# do nothing
  16568
  16569#
  16570# not (greater, less, or equal):
  16571#
  16572#	NAN
  16573#
  16574fdbcc_ngle:
  16575	fbngle.w	fdbcc_ngle_yes		# not (greater, less, or equal)?
  16576fdbcc_ngle_no:
  16577	bra.w		fdbcc_false		# no; go handle counter
  16578fdbcc_ngle_yes:
  16579	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16580	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16581	bne.w		fdbcc_bsun		# yes; we have an exception
  16582	rts					# no; do nothing
  16583
  16584#########################################################################
  16585#									#
  16586# Miscellaneous tests							#
  16587#									#
  16588# For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
  16589#									#
  16590#########################################################################
  16591
  16592#
  16593# false:
  16594#
  16595#	False
  16596#
  16597fdbcc_f:					# no bsun possible
  16598	bra.w		fdbcc_false		# go handle counter
  16599
  16600#
  16601# true:
  16602#
  16603#	True
  16604#
  16605fdbcc_t:					# no bsun possible
  16606	rts					# do nothing
  16607
  16608#
  16609# signalling false:
  16610#
  16611#	False
  16612#
  16613fdbcc_sf:
  16614	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
  16615	beq.w		fdbcc_false		# no;go handle counter
  16616	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16617	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16618	bne.w		fdbcc_bsun		# yes; we have an exception
  16619	bra.w		fdbcc_false		# go handle counter
  16620
  16621#
  16622# signalling true:
  16623#
  16624#	True
  16625#
  16626fdbcc_st:
  16627	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
  16628	beq.b		fdbcc_st_done		# no;go finish
  16629	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16630	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16631	bne.w		fdbcc_bsun		# yes; we have an exception
  16632fdbcc_st_done:
  16633	rts
  16634
  16635#
  16636# signalling equal:
  16637#
  16638#	Z
  16639#
  16640fdbcc_seq:
  16641	fbseq.w		fdbcc_seq_yes		# signalling equal?
  16642fdbcc_seq_no:
  16643	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
  16644	beq.w		fdbcc_false		# no;go handle counter
  16645	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16646	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16647	bne.w		fdbcc_bsun		# yes; we have an exception
  16648	bra.w		fdbcc_false		# go handle counter
  16649fdbcc_seq_yes:
  16650	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
  16651	beq.b		fdbcc_seq_yes_done	# no;go do nothing
  16652	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16653	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16654	bne.w		fdbcc_bsun		# yes; we have an exception
  16655fdbcc_seq_yes_done:
  16656	rts					# yes; do nothing
  16657
  16658#
  16659# signalling not equal:
  16660#	_
  16661#	Z
  16662#
  16663fdbcc_sneq:
  16664	fbsneq.w	fdbcc_sneq_yes		# signalling not equal?
  16665fdbcc_sneq_no:
  16666	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
  16667	beq.w		fdbcc_false		# no;go handle counter
  16668	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16669	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16670	bne.w		fdbcc_bsun		# yes; we have an exception
  16671	bra.w		fdbcc_false		# go handle counter
  16672fdbcc_sneq_yes:
  16673	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
  16674	beq.w		fdbcc_sneq_done		# no;go finish
  16675	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  16676	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
  16677	bne.w		fdbcc_bsun		# yes; we have an exception
  16678fdbcc_sneq_done:
  16679	rts
  16680
  16681#########################################################################
  16682#									#
  16683# IEEE Aware tests							#
  16684#									#
  16685# For the IEEE aware tests, action is only taken if the result is false.#
  16686# Therefore, the opposite branch type is used to jump to the decrement	#
  16687# routine.								#
  16688# The BSUN exception will not be set for any of these tests.		#
  16689#									#
  16690#########################################################################
  16691
  16692#
  16693# ordered greater than:
  16694#	_______
  16695#	NANvZvN
  16696#
  16697fdbcc_ogt:
  16698	fbogt.w		fdbcc_ogt_yes		# ordered greater than?
  16699fdbcc_ogt_no:
  16700	bra.w		fdbcc_false		# no; go handle counter
  16701fdbcc_ogt_yes:
  16702	rts					# yes; do nothing
  16703
  16704#
  16705# unordered or less or equal:
  16706#	_______
  16707#	NANvZvN
  16708#
  16709fdbcc_ule:
  16710	fbule.w		fdbcc_ule_yes		# unordered or less or equal?
  16711fdbcc_ule_no:
  16712	bra.w		fdbcc_false		# no; go handle counter
  16713fdbcc_ule_yes:
  16714	rts					# yes; do nothing
  16715
  16716#
  16717# ordered greater than or equal:
  16718#	   _____
  16719#	Zv(NANvN)
  16720#
  16721fdbcc_oge:
  16722	fboge.w		fdbcc_oge_yes		# ordered greater than or equal?
  16723fdbcc_oge_no:
  16724	bra.w		fdbcc_false		# no; go handle counter
  16725fdbcc_oge_yes:
  16726	rts					# yes; do nothing
  16727
  16728#
  16729# unordered or less than:
  16730#	       _
  16731#	NANv(N^Z)
  16732#
  16733fdbcc_ult:
  16734	fbult.w		fdbcc_ult_yes		# unordered or less than?
  16735fdbcc_ult_no:
  16736	bra.w		fdbcc_false		# no; go handle counter
  16737fdbcc_ult_yes:
  16738	rts					# yes; do nothing
  16739
  16740#
  16741# ordered less than:
  16742#	   _____
  16743#	N^(NANvZ)
  16744#
  16745fdbcc_olt:
  16746	fbolt.w		fdbcc_olt_yes		# ordered less than?
  16747fdbcc_olt_no:
  16748	bra.w		fdbcc_false		# no; go handle counter
  16749fdbcc_olt_yes:
  16750	rts					# yes; do nothing
  16751
  16752#
  16753# unordered or greater or equal:
  16754#
  16755#	NANvZvN
  16756#
  16757fdbcc_uge:
  16758	fbuge.w		fdbcc_uge_yes		# unordered or greater than?
  16759fdbcc_uge_no:
  16760	bra.w		fdbcc_false		# no; go handle counter
  16761fdbcc_uge_yes:
  16762	rts					# yes; do nothing
  16763
  16764#
  16765# ordered less than or equal:
  16766#	     ___
  16767#	Zv(N^NAN)
  16768#
  16769fdbcc_ole:
  16770	fbole.w		fdbcc_ole_yes		# ordered greater or less than?
  16771fdbcc_ole_no:
  16772	bra.w		fdbcc_false		# no; go handle counter
  16773fdbcc_ole_yes:
  16774	rts					# yes; do nothing
  16775
  16776#
  16777# unordered or greater than:
  16778#	     ___
  16779#	NANv(NvZ)
  16780#
  16781fdbcc_ugt:
  16782	fbugt.w		fdbcc_ugt_yes		# unordered or greater than?
  16783fdbcc_ugt_no:
  16784	bra.w		fdbcc_false		# no; go handle counter
  16785fdbcc_ugt_yes:
  16786	rts					# yes; do nothing
  16787
  16788#
  16789# ordered greater or less than:
  16790#	_____
  16791#	NANvZ
  16792#
  16793fdbcc_ogl:
  16794	fbogl.w		fdbcc_ogl_yes		# ordered greater or less than?
  16795fdbcc_ogl_no:
  16796	bra.w		fdbcc_false		# no; go handle counter
  16797fdbcc_ogl_yes:
  16798	rts					# yes; do nothing
  16799
  16800#
  16801# unordered or equal:
  16802#
  16803#	NANvZ
  16804#
  16805fdbcc_ueq:
  16806	fbueq.w		fdbcc_ueq_yes		# unordered or equal?
  16807fdbcc_ueq_no:
  16808	bra.w		fdbcc_false		# no; go handle counter
  16809fdbcc_ueq_yes:
  16810	rts					# yes; do nothing
  16811
  16812#
  16813# ordered:
  16814#	___
  16815#	NAN
  16816#
  16817fdbcc_or:
  16818	fbor.w		fdbcc_or_yes		# ordered?
  16819fdbcc_or_no:
  16820	bra.w		fdbcc_false		# no; go handle counter
  16821fdbcc_or_yes:
  16822	rts					# yes; do nothing
  16823
  16824#
  16825# unordered:
  16826#
  16827#	NAN
  16828#
  16829fdbcc_un:
  16830	fbun.w		fdbcc_un_yes		# unordered?
  16831fdbcc_un_no:
  16832	bra.w		fdbcc_false		# no; go handle counter
  16833fdbcc_un_yes:
  16834	rts					# yes; do nothing
  16835
  16836#######################################################################
  16837
  16838#
  16839# the bsun exception bit was not set.
  16840#
  16841# (1) subtract 1 from the count register
  16842# (2) if (cr == -1) then
  16843#	pc = pc of next instruction
  16844#     else
  16845#	pc += sign_ext(16-bit displacement)
  16846#
  16847fdbcc_false:
  16848	mov.b		1+EXC_OPWORD(%a6), %d1	# fetch lo opword
  16849	andi.w		&0x7, %d1		# extract count register
  16850
  16851	bsr.l		fetch_dreg		# fetch count value
  16852# make sure that d0 isn't corrupted between calls...
  16853
  16854	subq.w		&0x1, %d0		# Dn - 1 -> Dn
  16855
  16856	bsr.l		store_dreg_l		# store new count value
  16857
  16858	cmpi.w		%d0, &-0x1		# is (Dn == -1)?
  16859	bne.b		fdbcc_false_cont	# no;
  16860	rts
  16861
  16862fdbcc_false_cont:
  16863	mov.l		L_SCR1(%a6),%d0		# fetch displacement
  16864	add.l		USER_FPIAR(%a6),%d0	# add instruction PC
  16865	addq.l		&0x4,%d0		# add instruction length
  16866	mov.l		%d0,EXC_PC(%a6)		# set new PC
  16867	rts
  16868
  16869# the emulation routine set bsun and BSUN was enabled. have to
  16870# fix stack and jump to the bsun handler.
  16871# let the caller of this routine shift the stack frame up to
  16872# eliminate the effective address field.
  16873fdbcc_bsun:
  16874	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
  16875	rts
  16876
  16877#########################################################################
  16878# ftrapcc(): routine to emulate the ftrapcc instruction			#
  16879#									#
  16880# XDEF ****************************************************************	#
  16881#	_ftrapcc()							#
  16882#									#
  16883# XREF ****************************************************************	#
  16884#	none								#
  16885#									#
  16886# INPUT *************************************************************** #
  16887#	none								#
  16888#									#
  16889# OUTPUT ************************************************************** #
  16890#	none								#
  16891#									#
  16892# ALGORITHM *********************************************************** #
  16893#	This routine checks which conditional predicate is specified by	#
  16894# the stacked ftrapcc instruction opcode and then branches to a routine	#
  16895# for that predicate. The corresponding fbcc instruction is then used	#
  16896# to see whether the condition (specified by the stacked FPSR) is true	#
  16897# or false.								#
  16898#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
  16899# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
  16900# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
  16901# enabled BSUN should not be flagged and the predicate is true, then	#
  16902# the ftrapcc_flg is set in the SPCOND_FLG location. These special	#
  16903# flags indicate to the calling routine to emulate the exceptional	#
  16904# condition.								#
  16905#									#
  16906#########################################################################
  16907
  16908	global		_ftrapcc
  16909_ftrapcc:
  16910	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
  16911
  16912	clr.l		%d1			# clear scratch reg
  16913	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
  16914	ror.l		&0x8,%d1		# rotate to top byte
  16915	fmov.l		%d1,%fpsr		# insert into FPSR
  16916
  16917	mov.w		(tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table
  16918	jmp		(tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine
  16919
  16920tbl_ftrapcc:
  16921	short		ftrapcc_f	-	tbl_ftrapcc	# 00
  16922	short		ftrapcc_eq	-	tbl_ftrapcc	# 01
  16923	short		ftrapcc_ogt	-	tbl_ftrapcc	# 02
  16924	short		ftrapcc_oge	-	tbl_ftrapcc	# 03
  16925	short		ftrapcc_olt	-	tbl_ftrapcc	# 04
  16926	short		ftrapcc_ole	-	tbl_ftrapcc	# 05
  16927	short		ftrapcc_ogl	-	tbl_ftrapcc	# 06
  16928	short		ftrapcc_or	-	tbl_ftrapcc	# 07
  16929	short		ftrapcc_un	-	tbl_ftrapcc	# 08
  16930	short		ftrapcc_ueq	-	tbl_ftrapcc	# 09
  16931	short		ftrapcc_ugt	-	tbl_ftrapcc	# 10
  16932	short		ftrapcc_uge	-	tbl_ftrapcc	# 11
  16933	short		ftrapcc_ult	-	tbl_ftrapcc	# 12
  16934	short		ftrapcc_ule	-	tbl_ftrapcc	# 13
  16935	short		ftrapcc_neq	-	tbl_ftrapcc	# 14
  16936	short		ftrapcc_t	-	tbl_ftrapcc	# 15
  16937	short		ftrapcc_sf	-	tbl_ftrapcc	# 16
  16938	short		ftrapcc_seq	-	tbl_ftrapcc	# 17
  16939	short		ftrapcc_gt	-	tbl_ftrapcc	# 18
  16940	short		ftrapcc_ge	-	tbl_ftrapcc	# 19
  16941	short		ftrapcc_lt	-	tbl_ftrapcc	# 20
  16942	short		ftrapcc_le	-	tbl_ftrapcc	# 21
  16943	short		ftrapcc_gl	-	tbl_ftrapcc	# 22
  16944	short		ftrapcc_gle	-	tbl_ftrapcc	# 23
  16945	short		ftrapcc_ngle	-	tbl_ftrapcc	# 24
  16946	short		ftrapcc_ngl	-	tbl_ftrapcc	# 25
  16947	short		ftrapcc_nle	-	tbl_ftrapcc	# 26
  16948	short		ftrapcc_nlt	-	tbl_ftrapcc	# 27
  16949	short		ftrapcc_nge	-	tbl_ftrapcc	# 28
  16950	short		ftrapcc_ngt	-	tbl_ftrapcc	# 29
  16951	short		ftrapcc_sneq	-	tbl_ftrapcc	# 30
  16952	short		ftrapcc_st	-	tbl_ftrapcc	# 31
  16953
  16954#########################################################################
  16955#									#
  16956# IEEE Nonaware tests							#
  16957#									#
  16958# For the IEEE nonaware tests, we set the result based on the		#
  16959# floating point condition codes. In addition, we check to see		#
  16960# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
  16961#									#
  16962# The cases EQ and NE are shared by the Aware and Nonaware groups	#
  16963# and are incapable of setting the BSUN exception bit.			#
  16964#									#
  16965# Typically, only one of the two possible branch directions could	#
  16966# have the NAN bit set.							#
  16967#									#
  16968#########################################################################
  16969
  16970#
  16971# equal:
  16972#
  16973#	Z
  16974#
  16975ftrapcc_eq:
  16976	fbeq.w		ftrapcc_trap		# equal?
  16977ftrapcc_eq_no:
  16978	rts					# do nothing
  16979
  16980#
  16981# not equal:
  16982#	_
  16983#	Z
  16984#
  16985ftrapcc_neq:
  16986	fbneq.w		ftrapcc_trap		# not equal?
  16987ftrapcc_neq_no:
  16988	rts					# do nothing
  16989
  16990#
  16991# greater than:
  16992#	_______
  16993#	NANvZvN
  16994#
  16995ftrapcc_gt:
  16996	fbgt.w		ftrapcc_trap		# greater than?
  16997ftrapcc_gt_no:
  16998	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  16999	beq.b		ftrapcc_gt_done		# no
  17000	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17001	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17002	bne.w		ftrapcc_bsun		# yes
  17003ftrapcc_gt_done:
  17004	rts					# no; do nothing
  17005
  17006#
  17007# not greater than:
  17008#
  17009#	NANvZvN
  17010#
  17011ftrapcc_ngt:
  17012	fbngt.w		ftrapcc_ngt_yes		# not greater than?
  17013ftrapcc_ngt_no:
  17014	rts					# do nothing
  17015ftrapcc_ngt_yes:
  17016	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17017	beq.w		ftrapcc_trap		# no; go take trap
  17018	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17019	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17020	bne.w		ftrapcc_bsun		# yes
  17021	bra.w		ftrapcc_trap		# no; go take trap
  17022
  17023#
  17024# greater than or equal:
  17025#	   _____
  17026#	Zv(NANvN)
  17027#
  17028ftrapcc_ge:
  17029	fbge.w		ftrapcc_ge_yes		# greater than or equal?
  17030ftrapcc_ge_no:
  17031	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17032	beq.b		ftrapcc_ge_done		# no; go finish
  17033	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17034	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17035	bne.w		ftrapcc_bsun		# yes
  17036ftrapcc_ge_done:
  17037	rts					# no; do nothing
  17038ftrapcc_ge_yes:
  17039	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17040	beq.w		ftrapcc_trap		# no; go take trap
  17041	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17042	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17043	bne.w		ftrapcc_bsun		# yes
  17044	bra.w		ftrapcc_trap		# no; go take trap
  17045
  17046#
  17047# not (greater than or equal):
  17048#	       _
  17049#	NANv(N^Z)
  17050#
  17051ftrapcc_nge:
  17052	fbnge.w		ftrapcc_nge_yes		# not (greater than or equal)?
  17053ftrapcc_nge_no:
  17054	rts					# do nothing
  17055ftrapcc_nge_yes:
  17056	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17057	beq.w		ftrapcc_trap		# no; go take trap
  17058	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17059	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17060	bne.w		ftrapcc_bsun		# yes
  17061	bra.w		ftrapcc_trap		# no; go take trap
  17062
  17063#
  17064# less than:
  17065#	   _____
  17066#	N^(NANvZ)
  17067#
  17068ftrapcc_lt:
  17069	fblt.w		ftrapcc_trap		# less than?
  17070ftrapcc_lt_no:
  17071	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17072	beq.b		ftrapcc_lt_done		# no; go finish
  17073	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17074	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17075	bne.w		ftrapcc_bsun		# yes
  17076ftrapcc_lt_done:
  17077	rts					# no; do nothing
  17078
  17079#
  17080# not less than:
  17081#	       _
  17082#	NANv(ZvN)
  17083#
  17084ftrapcc_nlt:
  17085	fbnlt.w		ftrapcc_nlt_yes		# not less than?
  17086ftrapcc_nlt_no:
  17087	rts					# do nothing
  17088ftrapcc_nlt_yes:
  17089	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17090	beq.w		ftrapcc_trap		# no; go take trap
  17091	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17092	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17093	bne.w		ftrapcc_bsun		# yes
  17094	bra.w		ftrapcc_trap		# no; go take trap
  17095
  17096#
  17097# less than or equal:
  17098#	     ___
  17099#	Zv(N^NAN)
  17100#
  17101ftrapcc_le:
  17102	fble.w		ftrapcc_le_yes		# less than or equal?
  17103ftrapcc_le_no:
  17104	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17105	beq.b		ftrapcc_le_done		# no; go finish
  17106	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17107	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17108	bne.w		ftrapcc_bsun		# yes
  17109ftrapcc_le_done:
  17110	rts					# no; do nothing
  17111ftrapcc_le_yes:
  17112	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17113	beq.w		ftrapcc_trap		# no; go take trap
  17114	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17115	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17116	bne.w		ftrapcc_bsun		# yes
  17117	bra.w		ftrapcc_trap		# no; go take trap
  17118
  17119#
  17120# not (less than or equal):
  17121#	     ___
  17122#	NANv(NvZ)
  17123#
  17124ftrapcc_nle:
  17125	fbnle.w		ftrapcc_nle_yes		# not (less than or equal)?
  17126ftrapcc_nle_no:
  17127	rts					# do nothing
  17128ftrapcc_nle_yes:
  17129	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17130	beq.w		ftrapcc_trap		# no; go take trap
  17131	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17132	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17133	bne.w		ftrapcc_bsun		# yes
  17134	bra.w		ftrapcc_trap		# no; go take trap
  17135
  17136#
  17137# greater or less than:
  17138#	_____
  17139#	NANvZ
  17140#
  17141ftrapcc_gl:
  17142	fbgl.w		ftrapcc_trap		# greater or less than?
  17143ftrapcc_gl_no:
  17144	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17145	beq.b		ftrapcc_gl_done		# no; go finish
  17146	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17147	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17148	bne.w		ftrapcc_bsun		# yes
  17149ftrapcc_gl_done:
  17150	rts					# no; do nothing
  17151
  17152#
  17153# not (greater or less than):
  17154#
  17155#	NANvZ
  17156#
  17157ftrapcc_ngl:
  17158	fbngl.w		ftrapcc_ngl_yes		# not (greater or less than)?
  17159ftrapcc_ngl_no:
  17160	rts					# do nothing
  17161ftrapcc_ngl_yes:
  17162	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17163	beq.w		ftrapcc_trap		# no; go take trap
  17164	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17165	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17166	bne.w		ftrapcc_bsun		# yes
  17167	bra.w		ftrapcc_trap		# no; go take trap
  17168
  17169#
  17170# greater, less, or equal:
  17171#	___
  17172#	NAN
  17173#
  17174ftrapcc_gle:
  17175	fbgle.w		ftrapcc_trap		# greater, less, or equal?
  17176ftrapcc_gle_no:
  17177	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17178	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17179	bne.w		ftrapcc_bsun		# yes
  17180	rts					# no; do nothing
  17181
  17182#
  17183# not (greater, less, or equal):
  17184#
  17185#	NAN
  17186#
  17187ftrapcc_ngle:
  17188	fbngle.w	ftrapcc_ngle_yes	# not (greater, less, or equal)?
  17189ftrapcc_ngle_no:
  17190	rts					# do nothing
  17191ftrapcc_ngle_yes:
  17192	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17193	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17194	bne.w		ftrapcc_bsun		# yes
  17195	bra.w		ftrapcc_trap		# no; go take trap
  17196
  17197#########################################################################
  17198#									#
  17199# Miscellaneous tests							#
  17200#									#
  17201# For the IEEE aware tests, we only have to set the result based on the	#
  17202# floating point condition codes. The BSUN exception will not be	#
  17203# set for any of these tests.						#
  17204#									#
  17205#########################################################################
  17206
  17207#
  17208# false:
  17209#
  17210#	False
  17211#
  17212ftrapcc_f:
  17213	rts					# do nothing
  17214
  17215#
  17216# true:
  17217#
  17218#	True
  17219#
  17220ftrapcc_t:
  17221	bra.w		ftrapcc_trap		# go take trap
  17222
  17223#
  17224# signalling false:
  17225#
  17226#	False
  17227#
  17228ftrapcc_sf:
  17229	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
  17230	beq.b		ftrapcc_sf_done		# no; go finish
  17231	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17232	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17233	bne.w		ftrapcc_bsun		# yes
  17234ftrapcc_sf_done:
  17235	rts					# no; do nothing
  17236
  17237#
  17238# signalling true:
  17239#
  17240#	True
  17241#
  17242ftrapcc_st:
  17243	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
  17244	beq.w		ftrapcc_trap		# no; go take trap
  17245	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17246	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17247	bne.w		ftrapcc_bsun		# yes
  17248	bra.w		ftrapcc_trap		# no; go take trap
  17249
  17250#
  17251# signalling equal:
  17252#
  17253#	Z
  17254#
  17255ftrapcc_seq:
  17256	fbseq.w		ftrapcc_seq_yes		# signalling equal?
  17257ftrapcc_seq_no:
  17258	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
  17259	beq.w		ftrapcc_seq_done	# no; go finish
  17260	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17261	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17262	bne.w		ftrapcc_bsun		# yes
  17263ftrapcc_seq_done:
  17264	rts					# no; do nothing
  17265ftrapcc_seq_yes:
  17266	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
  17267	beq.w		ftrapcc_trap		# no; go take trap
  17268	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17269	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17270	bne.w		ftrapcc_bsun		# yes
  17271	bra.w		ftrapcc_trap		# no; go take trap
  17272
  17273#
  17274# signalling not equal:
  17275#	_
  17276#	Z
  17277#
  17278ftrapcc_sneq:
  17279	fbsneq.w	ftrapcc_sneq_yes	# signalling equal?
  17280ftrapcc_sneq_no:
  17281	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
  17282	beq.w		ftrapcc_sneq_no_done	# no; go finish
  17283	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17284	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17285	bne.w		ftrapcc_bsun		# yes
  17286ftrapcc_sneq_no_done:
  17287	rts					# do nothing
  17288ftrapcc_sneq_yes:
  17289	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
  17290	beq.w		ftrapcc_trap		# no; go take trap
  17291	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17292	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
  17293	bne.w		ftrapcc_bsun		# yes
  17294	bra.w		ftrapcc_trap		# no; go take trap
  17295
  17296#########################################################################
  17297#									#
  17298# IEEE Aware tests							#
  17299#									#
  17300# For the IEEE aware tests, we only have to set the result based on the	#
  17301# floating point condition codes. The BSUN exception will not be	#
  17302# set for any of these tests.						#
  17303#									#
  17304#########################################################################
  17305
  17306#
  17307# ordered greater than:
  17308#	_______
  17309#	NANvZvN
  17310#
  17311ftrapcc_ogt:
  17312	fbogt.w		ftrapcc_trap		# ordered greater than?
  17313ftrapcc_ogt_no:
  17314	rts					# do nothing
  17315
  17316#
  17317# unordered or less or equal:
  17318#	_______
  17319#	NANvZvN
  17320#
  17321ftrapcc_ule:
  17322	fbule.w		ftrapcc_trap		# unordered or less or equal?
  17323ftrapcc_ule_no:
  17324	rts					# do nothing
  17325
  17326#
  17327# ordered greater than or equal:
  17328#	   _____
  17329#	Zv(NANvN)
  17330#
  17331ftrapcc_oge:
  17332	fboge.w		ftrapcc_trap		# ordered greater than or equal?
  17333ftrapcc_oge_no:
  17334	rts					# do nothing
  17335
  17336#
  17337# unordered or less than:
  17338#	       _
  17339#	NANv(N^Z)
  17340#
  17341ftrapcc_ult:
  17342	fbult.w		ftrapcc_trap		# unordered or less than?
  17343ftrapcc_ult_no:
  17344	rts					# do nothing
  17345
  17346#
  17347# ordered less than:
  17348#	   _____
  17349#	N^(NANvZ)
  17350#
  17351ftrapcc_olt:
  17352	fbolt.w		ftrapcc_trap		# ordered less than?
  17353ftrapcc_olt_no:
  17354	rts					# do nothing
  17355
  17356#
  17357# unordered or greater or equal:
  17358#
  17359#	NANvZvN
  17360#
  17361ftrapcc_uge:
  17362	fbuge.w		ftrapcc_trap		# unordered or greater than?
  17363ftrapcc_uge_no:
  17364	rts					# do nothing
  17365
  17366#
  17367# ordered less than or equal:
  17368#	     ___
  17369#	Zv(N^NAN)
  17370#
  17371ftrapcc_ole:
  17372	fbole.w		ftrapcc_trap		# ordered greater or less than?
  17373ftrapcc_ole_no:
  17374	rts					# do nothing
  17375
  17376#
  17377# unordered or greater than:
  17378#	     ___
  17379#	NANv(NvZ)
  17380#
  17381ftrapcc_ugt:
  17382	fbugt.w		ftrapcc_trap		# unordered or greater than?
  17383ftrapcc_ugt_no:
  17384	rts					# do nothing
  17385
  17386#
  17387# ordered greater or less than:
  17388#	_____
  17389#	NANvZ
  17390#
  17391ftrapcc_ogl:
  17392	fbogl.w		ftrapcc_trap		# ordered greater or less than?
  17393ftrapcc_ogl_no:
  17394	rts					# do nothing
  17395
  17396#
  17397# unordered or equal:
  17398#
  17399#	NANvZ
  17400#
  17401ftrapcc_ueq:
  17402	fbueq.w		ftrapcc_trap		# unordered or equal?
  17403ftrapcc_ueq_no:
  17404	rts					# do nothing
  17405
  17406#
  17407# ordered:
  17408#	___
  17409#	NAN
  17410#
  17411ftrapcc_or:
  17412	fbor.w		ftrapcc_trap		# ordered?
  17413ftrapcc_or_no:
  17414	rts					# do nothing
  17415
  17416#
  17417# unordered:
  17418#
  17419#	NAN
  17420#
  17421ftrapcc_un:
  17422	fbun.w		ftrapcc_trap		# unordered?
  17423ftrapcc_un_no:
  17424	rts					# do nothing
  17425
  17426#######################################################################
  17427
  17428# the bsun exception bit was not set.
  17429# we will need to jump to the ftrapcc vector. the stack frame
  17430# is the same size as that of the fp unimp instruction. the
  17431# only difference is that the <ea> field should hold the PC
  17432# of the ftrapcc instruction and the vector offset field
  17433# should denote the ftrapcc trap.
  17434ftrapcc_trap:
  17435	mov.b		&ftrapcc_flg,SPCOND_FLG(%a6)
  17436	rts
  17437
  17438# the emulation routine set bsun and BSUN was enabled. have to
  17439# fix stack and jump to the bsun handler.
  17440# let the caller of this routine shift the stack frame up to
  17441# eliminate the effective address field.
  17442ftrapcc_bsun:
  17443	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
  17444	rts
  17445
  17446#########################################################################
  17447# fscc(): routine to emulate the fscc instruction			#
  17448#									#
  17449# XDEF **************************************************************** #
  17450#	_fscc()								#
  17451#									#
  17452# XREF **************************************************************** #
  17453#	store_dreg_b() - store result to data register file		#
  17454#	dec_areg() - decrement an areg for -(an) mode			#
  17455#	inc_areg() - increment an areg for (an)+ mode			#
  17456#	_dmem_write_byte() - store result to memory			#
  17457#									#
  17458# INPUT ***************************************************************	#
  17459#	none								#
  17460#									#
  17461# OUTPUT ************************************************************** #
  17462#	none								#
  17463#									#
  17464# ALGORITHM ***********************************************************	#
  17465#	This routine checks which conditional predicate is specified by	#
  17466# the stacked fscc instruction opcode and then branches to a routine	#
  17467# for that predicate. The corresponding fbcc instruction is then used	#
  17468# to see whether the condition (specified by the stacked FPSR) is true	#
  17469# or false.								#
  17470#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
  17471# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
  17472# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
  17473# enabled BSUN should not be flagged and the predicate is true, then	#
  17474# the result is stored to the data register file or memory		#
  17475#									#
  17476#########################################################################
  17477
  17478	global		_fscc
  17479_fscc:
  17480	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
  17481
  17482	clr.l		%d1			# clear scratch reg
  17483	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
  17484	ror.l		&0x8,%d1		# rotate to top byte
  17485	fmov.l		%d1,%fpsr		# insert into FPSR
  17486
  17487	mov.w		(tbl_fscc.b,%pc,%d0.w*2),%d1 # load table
  17488	jmp		(tbl_fscc.b,%pc,%d1.w)	# jump to fscc routine
  17489
  17490tbl_fscc:
  17491	short		fscc_f		-	tbl_fscc	# 00
  17492	short		fscc_eq		-	tbl_fscc	# 01
  17493	short		fscc_ogt	-	tbl_fscc	# 02
  17494	short		fscc_oge	-	tbl_fscc	# 03
  17495	short		fscc_olt	-	tbl_fscc	# 04
  17496	short		fscc_ole	-	tbl_fscc	# 05
  17497	short		fscc_ogl	-	tbl_fscc	# 06
  17498	short		fscc_or		-	tbl_fscc	# 07
  17499	short		fscc_un		-	tbl_fscc	# 08
  17500	short		fscc_ueq	-	tbl_fscc	# 09
  17501	short		fscc_ugt	-	tbl_fscc	# 10
  17502	short		fscc_uge	-	tbl_fscc	# 11
  17503	short		fscc_ult	-	tbl_fscc	# 12
  17504	short		fscc_ule	-	tbl_fscc	# 13
  17505	short		fscc_neq	-	tbl_fscc	# 14
  17506	short		fscc_t		-	tbl_fscc	# 15
  17507	short		fscc_sf		-	tbl_fscc	# 16
  17508	short		fscc_seq	-	tbl_fscc	# 17
  17509	short		fscc_gt		-	tbl_fscc	# 18
  17510	short		fscc_ge		-	tbl_fscc	# 19
  17511	short		fscc_lt		-	tbl_fscc	# 20
  17512	short		fscc_le		-	tbl_fscc	# 21
  17513	short		fscc_gl		-	tbl_fscc	# 22
  17514	short		fscc_gle	-	tbl_fscc	# 23
  17515	short		fscc_ngle	-	tbl_fscc	# 24
  17516	short		fscc_ngl	-	tbl_fscc	# 25
  17517	short		fscc_nle	-	tbl_fscc	# 26
  17518	short		fscc_nlt	-	tbl_fscc	# 27
  17519	short		fscc_nge	-	tbl_fscc	# 28
  17520	short		fscc_ngt	-	tbl_fscc	# 29
  17521	short		fscc_sneq	-	tbl_fscc	# 30
  17522	short		fscc_st		-	tbl_fscc	# 31
  17523
  17524#########################################################################
  17525#									#
  17526# IEEE Nonaware tests							#
  17527#									#
  17528# For the IEEE nonaware tests, we set the result based on the		#
  17529# floating point condition codes. In addition, we check to see		#
  17530# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
  17531#									#
  17532# The cases EQ and NE are shared by the Aware and Nonaware groups	#
  17533# and are incapable of setting the BSUN exception bit.			#
  17534#									#
  17535# Typically, only one of the two possible branch directions could	#
  17536# have the NAN bit set.							#
  17537#									#
  17538#########################################################################
  17539
  17540#
  17541# equal:
  17542#
  17543#	Z
  17544#
  17545fscc_eq:
  17546	fbeq.w		fscc_eq_yes		# equal?
  17547fscc_eq_no:
  17548	clr.b		%d0			# set false
  17549	bra.w		fscc_done		# go finish
  17550fscc_eq_yes:
  17551	st		%d0			# set true
  17552	bra.w		fscc_done		# go finish
  17553
  17554#
  17555# not equal:
  17556#	_
  17557#	Z
  17558#
  17559fscc_neq:
  17560	fbneq.w		fscc_neq_yes		# not equal?
  17561fscc_neq_no:
  17562	clr.b		%d0			# set false
  17563	bra.w		fscc_done		# go finish
  17564fscc_neq_yes:
  17565	st		%d0			# set true
  17566	bra.w		fscc_done		# go finish
  17567
  17568#
  17569# greater than:
  17570#	_______
  17571#	NANvZvN
  17572#
  17573fscc_gt:
  17574	fbgt.w		fscc_gt_yes		# greater than?
  17575fscc_gt_no:
  17576	clr.b		%d0			# set false
  17577	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17578	beq.w		fscc_done		# no;go finish
  17579	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17580	bra.w		fscc_chk_bsun		# go finish
  17581fscc_gt_yes:
  17582	st		%d0			# set true
  17583	bra.w		fscc_done		# go finish
  17584
  17585#
  17586# not greater than:
  17587#
  17588#	NANvZvN
  17589#
  17590fscc_ngt:
  17591	fbngt.w		fscc_ngt_yes		# not greater than?
  17592fscc_ngt_no:
  17593	clr.b		%d0			# set false
  17594	bra.w		fscc_done		# go finish
  17595fscc_ngt_yes:
  17596	st		%d0			# set true
  17597	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17598	beq.w		fscc_done		# no;go finish
  17599	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17600	bra.w		fscc_chk_bsun		# go finish
  17601
  17602#
  17603# greater than or equal:
  17604#	   _____
  17605#	Zv(NANvN)
  17606#
  17607fscc_ge:
  17608	fbge.w		fscc_ge_yes		# greater than or equal?
  17609fscc_ge_no:
  17610	clr.b		%d0			# set false
  17611	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17612	beq.w		fscc_done		# no;go finish
  17613	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17614	bra.w		fscc_chk_bsun		# go finish
  17615fscc_ge_yes:
  17616	st		%d0			# set true
  17617	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17618	beq.w		fscc_done		# no;go finish
  17619	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17620	bra.w		fscc_chk_bsun		# go finish
  17621
  17622#
  17623# not (greater than or equal):
  17624#	       _
  17625#	NANv(N^Z)
  17626#
  17627fscc_nge:
  17628	fbnge.w		fscc_nge_yes		# not (greater than or equal)?
  17629fscc_nge_no:
  17630	clr.b		%d0			# set false
  17631	bra.w		fscc_done		# go finish
  17632fscc_nge_yes:
  17633	st		%d0			# set true
  17634	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17635	beq.w		fscc_done		# no;go finish
  17636	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17637	bra.w		fscc_chk_bsun		# go finish
  17638
  17639#
  17640# less than:
  17641#	   _____
  17642#	N^(NANvZ)
  17643#
  17644fscc_lt:
  17645	fblt.w		fscc_lt_yes		# less than?
  17646fscc_lt_no:
  17647	clr.b		%d0			# set false
  17648	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17649	beq.w		fscc_done		# no;go finish
  17650	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17651	bra.w		fscc_chk_bsun		# go finish
  17652fscc_lt_yes:
  17653	st		%d0			# set true
  17654	bra.w		fscc_done		# go finish
  17655
  17656#
  17657# not less than:
  17658#	       _
  17659#	NANv(ZvN)
  17660#
  17661fscc_nlt:
  17662	fbnlt.w		fscc_nlt_yes		# not less than?
  17663fscc_nlt_no:
  17664	clr.b		%d0			# set false
  17665	bra.w		fscc_done		# go finish
  17666fscc_nlt_yes:
  17667	st		%d0			# set true
  17668	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17669	beq.w		fscc_done		# no;go finish
  17670	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17671	bra.w		fscc_chk_bsun		# go finish
  17672
  17673#
  17674# less than or equal:
  17675#	     ___
  17676#	Zv(N^NAN)
  17677#
  17678fscc_le:
  17679	fble.w		fscc_le_yes		# less than or equal?
  17680fscc_le_no:
  17681	clr.b		%d0			# set false
  17682	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17683	beq.w		fscc_done		# no;go finish
  17684	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17685	bra.w		fscc_chk_bsun		# go finish
  17686fscc_le_yes:
  17687	st		%d0			# set true
  17688	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17689	beq.w		fscc_done		# no;go finish
  17690	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17691	bra.w		fscc_chk_bsun		# go finish
  17692
  17693#
  17694# not (less than or equal):
  17695#	     ___
  17696#	NANv(NvZ)
  17697#
  17698fscc_nle:
  17699	fbnle.w		fscc_nle_yes		# not (less than or equal)?
  17700fscc_nle_no:
  17701	clr.b		%d0			# set false
  17702	bra.w		fscc_done		# go finish
  17703fscc_nle_yes:
  17704	st		%d0			# set true
  17705	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17706	beq.w		fscc_done		# no;go finish
  17707	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17708	bra.w		fscc_chk_bsun		# go finish
  17709
  17710#
  17711# greater or less than:
  17712#	_____
  17713#	NANvZ
  17714#
  17715fscc_gl:
  17716	fbgl.w		fscc_gl_yes		# greater or less than?
  17717fscc_gl_no:
  17718	clr.b		%d0			# set false
  17719	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17720	beq.w		fscc_done		# no;go finish
  17721	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17722	bra.w		fscc_chk_bsun		# go finish
  17723fscc_gl_yes:
  17724	st		%d0			# set true
  17725	bra.w		fscc_done		# go finish
  17726
  17727#
  17728# not (greater or less than):
  17729#
  17730#	NANvZ
  17731#
  17732fscc_ngl:
  17733	fbngl.w		fscc_ngl_yes		# not (greater or less than)?
  17734fscc_ngl_no:
  17735	clr.b		%d0			# set false
  17736	bra.w		fscc_done		# go finish
  17737fscc_ngl_yes:
  17738	st		%d0			# set true
  17739	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
  17740	beq.w		fscc_done		# no;go finish
  17741	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17742	bra.w		fscc_chk_bsun		# go finish
  17743
  17744#
  17745# greater, less, or equal:
  17746#	___
  17747#	NAN
  17748#
  17749fscc_gle:
  17750	fbgle.w		fscc_gle_yes		# greater, less, or equal?
  17751fscc_gle_no:
  17752	clr.b		%d0			# set false
  17753	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17754	bra.w		fscc_chk_bsun		# go finish
  17755fscc_gle_yes:
  17756	st		%d0			# set true
  17757	bra.w		fscc_done		# go finish
  17758
  17759#
  17760# not (greater, less, or equal):
  17761#
  17762#	NAN
  17763#
  17764fscc_ngle:
  17765	fbngle.w		fscc_ngle_yes	# not (greater, less, or equal)?
  17766fscc_ngle_no:
  17767	clr.b		%d0			# set false
  17768	bra.w		fscc_done		# go finish
  17769fscc_ngle_yes:
  17770	st		%d0			# set true
  17771	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17772	bra.w		fscc_chk_bsun		# go finish
  17773
  17774#########################################################################
  17775#									#
  17776# Miscellaneous tests							#
  17777#									#
  17778# For the IEEE aware tests, we only have to set the result based on the	#
  17779# floating point condition codes. The BSUN exception will not be	#
  17780# set for any of these tests.						#
  17781#									#
  17782#########################################################################
  17783
  17784#
  17785# false:
  17786#
  17787#	False
  17788#
  17789fscc_f:
  17790	clr.b		%d0			# set false
  17791	bra.w		fscc_done		# go finish
  17792
  17793#
  17794# true:
  17795#
  17796#	True
  17797#
  17798fscc_t:
  17799	st		%d0			# set true
  17800	bra.w		fscc_done		# go finish
  17801
  17802#
  17803# signalling false:
  17804#
  17805#	False
  17806#
  17807fscc_sf:
  17808	clr.b		%d0			# set false
  17809	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
  17810	beq.w		fscc_done		# no;go finish
  17811	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17812	bra.w		fscc_chk_bsun		# go finish
  17813
  17814#
  17815# signalling true:
  17816#
  17817#	True
  17818#
  17819fscc_st:
  17820	st		%d0			# set false
  17821	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
  17822	beq.w		fscc_done		# no;go finish
  17823	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17824	bra.w		fscc_chk_bsun		# go finish
  17825
  17826#
  17827# signalling equal:
  17828#
  17829#	Z
  17830#
  17831fscc_seq:
  17832	fbseq.w		fscc_seq_yes		# signalling equal?
  17833fscc_seq_no:
  17834	clr.b		%d0			# set false
  17835	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
  17836	beq.w		fscc_done		# no;go finish
  17837	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17838	bra.w		fscc_chk_bsun		# go finish
  17839fscc_seq_yes:
  17840	st		%d0			# set true
  17841	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
  17842	beq.w		fscc_done		# no;go finish
  17843	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17844	bra.w		fscc_chk_bsun		# go finish
  17845
  17846#
  17847# signalling not equal:
  17848#	_
  17849#	Z
  17850#
  17851fscc_sneq:
  17852	fbsneq.w	fscc_sneq_yes		# signalling equal?
  17853fscc_sneq_no:
  17854	clr.b		%d0			# set false
  17855	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
  17856	beq.w		fscc_done		# no;go finish
  17857	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17858	bra.w		fscc_chk_bsun		# go finish
  17859fscc_sneq_yes:
  17860	st		%d0			# set true
  17861	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
  17862	beq.w		fscc_done		# no;go finish
  17863	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
  17864	bra.w		fscc_chk_bsun		# go finish
  17865
  17866#########################################################################
  17867#									#
  17868# IEEE Aware tests							#
  17869#									#
  17870# For the IEEE aware tests, we only have to set the result based on the	#
  17871# floating point condition codes. The BSUN exception will not be	#
  17872# set for any of these tests.						#
  17873#									#
  17874#########################################################################
  17875
  17876#
  17877# ordered greater than:
  17878#	_______
  17879#	NANvZvN
  17880#
  17881fscc_ogt:
  17882	fbogt.w		fscc_ogt_yes		# ordered greater than?
  17883fscc_ogt_no:
  17884	clr.b		%d0			# set false
  17885	bra.w		fscc_done		# go finish
  17886fscc_ogt_yes:
  17887	st		%d0			# set true
  17888	bra.w		fscc_done		# go finish
  17889
  17890#
  17891# unordered or less or equal:
  17892#	_______
  17893#	NANvZvN
  17894#
  17895fscc_ule:
  17896	fbule.w		fscc_ule_yes		# unordered or less or equal?
  17897fscc_ule_no:
  17898	clr.b		%d0			# set false
  17899	bra.w		fscc_done		# go finish
  17900fscc_ule_yes:
  17901	st		%d0			# set true
  17902	bra.w		fscc_done		# go finish
  17903
  17904#
  17905# ordered greater than or equal:
  17906#	   _____
  17907#	Zv(NANvN)
  17908#
  17909fscc_oge:
  17910	fboge.w		fscc_oge_yes		# ordered greater than or equal?
  17911fscc_oge_no:
  17912	clr.b		%d0			# set false
  17913	bra.w		fscc_done		# go finish
  17914fscc_oge_yes:
  17915	st		%d0			# set true
  17916	bra.w		fscc_done		# go finish
  17917
  17918#
  17919# unordered or less than:
  17920#	       _
  17921#	NANv(N^Z)
  17922#
  17923fscc_ult:
  17924	fbult.w		fscc_ult_yes		# unordered or less than?
  17925fscc_ult_no:
  17926	clr.b		%d0			# set false
  17927	bra.w		fscc_done		# go finish
  17928fscc_ult_yes:
  17929	st		%d0			# set true
  17930	bra.w		fscc_done		# go finish
  17931
  17932#
  17933# ordered less than:
  17934#	   _____
  17935#	N^(NANvZ)
  17936#
  17937fscc_olt:
  17938	fbolt.w		fscc_olt_yes		# ordered less than?
  17939fscc_olt_no:
  17940	clr.b		%d0			# set false
  17941	bra.w		fscc_done		# go finish
  17942fscc_olt_yes:
  17943	st		%d0			# set true
  17944	bra.w		fscc_done		# go finish
  17945
  17946#
  17947# unordered or greater or equal:
  17948#
  17949#	NANvZvN
  17950#
  17951fscc_uge:
  17952	fbuge.w		fscc_uge_yes		# unordered or greater than?
  17953fscc_uge_no:
  17954	clr.b		%d0			# set false
  17955	bra.w		fscc_done		# go finish
  17956fscc_uge_yes:
  17957	st		%d0			# set true
  17958	bra.w		fscc_done		# go finish
  17959
  17960#
  17961# ordered less than or equal:
  17962#	     ___
  17963#	Zv(N^NAN)
  17964#
  17965fscc_ole:
  17966	fbole.w		fscc_ole_yes		# ordered greater or less than?
  17967fscc_ole_no:
  17968	clr.b		%d0			# set false
  17969	bra.w		fscc_done		# go finish
  17970fscc_ole_yes:
  17971	st		%d0			# set true
  17972	bra.w		fscc_done		# go finish
  17973
  17974#
  17975# unordered or greater than:
  17976#	     ___
  17977#	NANv(NvZ)
  17978#
  17979fscc_ugt:
  17980	fbugt.w		fscc_ugt_yes		# unordered or greater than?
  17981fscc_ugt_no:
  17982	clr.b		%d0			# set false
  17983	bra.w		fscc_done		# go finish
  17984fscc_ugt_yes:
  17985	st		%d0			# set true
  17986	bra.w		fscc_done		# go finish
  17987
  17988#
  17989# ordered greater or less than:
  17990#	_____
  17991#	NANvZ
  17992#
  17993fscc_ogl:
  17994	fbogl.w		fscc_ogl_yes		# ordered greater or less than?
  17995fscc_ogl_no:
  17996	clr.b		%d0			# set false
  17997	bra.w		fscc_done		# go finish
  17998fscc_ogl_yes:
  17999	st		%d0			# set true
  18000	bra.w		fscc_done		# go finish
  18001
  18002#
  18003# unordered or equal:
  18004#
  18005#	NANvZ
  18006#
  18007fscc_ueq:
  18008	fbueq.w		fscc_ueq_yes		# unordered or equal?
  18009fscc_ueq_no:
  18010	clr.b		%d0			# set false
  18011	bra.w		fscc_done		# go finish
  18012fscc_ueq_yes:
  18013	st		%d0			# set true
  18014	bra.w		fscc_done		# go finish
  18015
  18016#
  18017# ordered:
  18018#	___
  18019#	NAN
  18020#
  18021fscc_or:
  18022	fbor.w		fscc_or_yes		# ordered?
  18023fscc_or_no:
  18024	clr.b		%d0			# set false
  18025	bra.w		fscc_done		# go finish
  18026fscc_or_yes:
  18027	st		%d0			# set true
  18028	bra.w		fscc_done		# go finish
  18029
  18030#
  18031# unordered:
  18032#
  18033#	NAN
  18034#
  18035fscc_un:
  18036	fbun.w		fscc_un_yes		# unordered?
  18037fscc_un_no:
  18038	clr.b		%d0			# set false
  18039	bra.w		fscc_done		# go finish
  18040fscc_un_yes:
  18041	st		%d0			# set true
  18042	bra.w		fscc_done		# go finish
  18043
  18044#######################################################################
  18045
  18046#
  18047# the bsun exception bit was set. now, check to see is BSUN
  18048# is enabled. if so, don't store result and correct stack frame
  18049# for a bsun exception.
  18050#
  18051fscc_chk_bsun:
  18052	btst		&bsun_bit,FPCR_ENABLE(%a6) # was BSUN set?
  18053	bne.w		fscc_bsun
  18054
  18055#
  18056# the bsun exception bit was not set.
  18057# the result has been selected.
  18058# now, check to see if the result is to be stored in the data register
  18059# file or in memory.
  18060#
  18061fscc_done:
  18062	mov.l		%d0,%a0			# save result for a moment
  18063
  18064	mov.b		1+EXC_OPWORD(%a6),%d1	# fetch lo opword
  18065	mov.l		%d1,%d0			# make a copy
  18066	andi.b		&0x38,%d1		# extract src mode
  18067
  18068	bne.b		fscc_mem_op		# it's a memory operation
  18069
  18070	mov.l		%d0,%d1
  18071	andi.w		&0x7,%d1		# pass index in d1
  18072	mov.l		%a0,%d0			# pass result in d0
  18073	bsr.l		store_dreg_b		# save result in regfile
  18074	rts
  18075
  18076#
  18077# the stacked <ea> is correct with the exception of:
  18078#	-> Dn : <ea> is garbage
  18079#
  18080# if the addressing mode is post-increment or pre-decrement,
  18081# then the address registers have not been updated.
  18082#
  18083fscc_mem_op:
  18084	cmpi.b		%d1,&0x18		# is <ea> (An)+ ?
  18085	beq.b		fscc_mem_inc		# yes
  18086	cmpi.b		%d1,&0x20		# is <ea> -(An) ?
  18087	beq.b		fscc_mem_dec		# yes
  18088
  18089	mov.l		%a0,%d0			# pass result in d0
  18090	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
  18091	bsr.l		_dmem_write_byte	# write result byte
  18092
  18093	tst.l		%d1			# did dstore fail?
  18094	bne.w		fscc_err		# yes
  18095
  18096	rts
  18097
  18098# addressing mode is post-increment. write the result byte. if the write
  18099# fails then don't update the address register. if write passes then
  18100# call inc_areg() to update the address register.
  18101fscc_mem_inc:
  18102	mov.l		%a0,%d0			# pass result in d0
  18103	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
  18104	bsr.l		_dmem_write_byte	# write result byte
  18105
  18106	tst.l		%d1			# did dstore fail?
  18107	bne.w		fscc_err		# yes
  18108
  18109	mov.b		0x1+EXC_OPWORD(%a6),%d1	# fetch opword
  18110	andi.w		&0x7,%d1		# pass index in d1
  18111	movq.l		&0x1,%d0		# pass amt to inc by
  18112	bsr.l		inc_areg		# increment address register
  18113
  18114	rts
  18115
  18116# addressing mode is pre-decrement. write the result byte. if the write
  18117# fails then don't update the address register. if the write passes then
  18118# call dec_areg() to update the address register.
  18119fscc_mem_dec:
  18120	mov.l		%a0,%d0			# pass result in d0
  18121	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
  18122	bsr.l		_dmem_write_byte	# write result byte
  18123
  18124	tst.l		%d1			# did dstore fail?
  18125	bne.w		fscc_err		# yes
  18126
  18127	mov.b		0x1+EXC_OPWORD(%a6),%d1	# fetch opword
  18128	andi.w		&0x7,%d1		# pass index in d1
  18129	movq.l		&0x1,%d0		# pass amt to dec by
  18130	bsr.l		dec_areg		# decrement address register
  18131
  18132	rts
  18133
  18134# the emulation routine set bsun and BSUN was enabled. have to
  18135# fix stack and jump to the bsun handler.
  18136# let the caller of this routine shift the stack frame up to
  18137# eliminate the effective address field.
  18138fscc_bsun:
  18139	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
  18140	rts
  18141
  18142# the byte write to memory has failed. pass the failing effective address
  18143# and a FSLW to funimp_dacc().
  18144fscc_err:
  18145	mov.w		&0x00a1,EXC_VOFF(%a6)
  18146	bra.l		facc_finish
  18147
  18148#########################################################################
  18149# XDEF ****************************************************************	#
  18150#	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
  18151#									#
  18152# XREF ****************************************************************	#
  18153#	fetch_dreg() - fetch data register				#
  18154#	{i,d,}mem_read() - fetch data from memory			#
  18155#	_mem_write() - write data to memory				#
  18156#	iea_iacc() - instruction memory access error occurred		#
  18157#	iea_dacc() - data memory access error occurred			#
  18158#	restore() - restore An index regs if access error occurred	#
  18159#									#
  18160# INPUT ***************************************************************	#
  18161#	None								#
  18162#									#
  18163# OUTPUT **************************************************************	#
  18164#	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
  18165#		d0 = size of dump					#
  18166#		d1 = Dn							#
  18167#	Else if instruction access error,				#
  18168#		d0 = FSLW						#
  18169#	Else if data access error,					#
  18170#		d0 = FSLW						#
  18171#		a0 = address of fault					#
  18172#	Else								#
  18173#		none.							#
  18174#									#
  18175# ALGORITHM ***********************************************************	#
  18176#	The effective address must be calculated since this is entered	#
  18177# from an "Unimplemented Effective Address" exception handler. So, we	#
  18178# have our own fcalc_ea() routine here. If an access error is flagged	#
  18179# by a _{i,d,}mem_read() call, we must exit through the special		#
  18180# handler.								#
  18181#	The data register is determined and its value loaded to get the	#
  18182# string of FP registers affected. This value is used as an index into	#
  18183# a lookup table such that we can determine the number of bytes		#
  18184# involved.								#
  18185#	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
  18186# to read in all FP values. Again, _mem_read() may fail and require a	#
  18187# special exit.								#
  18188#	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
  18189# to write all FP values. _mem_write() may also fail.			#
  18190#	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
  18191# then we return the size of the dump and the string to the caller	#
  18192# so that the move can occur outside of this routine. This special	#
  18193# case is required so that moves to the system stack are handled	#
  18194# correctly.								#
  18195#									#
  18196# DYNAMIC:								#
  18197#	fmovm.x	dn, <ea>						#
  18198#	fmovm.x	<ea>, dn						#
  18199#									#
  18200#	      <WORD 1>		      <WORD2>				#
  18201#	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
  18202#									#
  18203#	& = (0): predecrement addressing mode				#
  18204#	    (1): postincrement or control addressing mode		#
  18205#	@ = (0): move listed regs from memory to the FPU		#
  18206#	    (1): move listed regs from the FPU to memory		#
  18207#	$$$    : index of data register holding reg select mask		#
  18208#									#
  18209# NOTES:								#
  18210#	If the data register holds a zero, then the			#
  18211#	instruction is a nop.						#
  18212#									#
  18213#########################################################################
  18214
  18215	global		fmovm_dynamic
  18216fmovm_dynamic:
  18217
  18218# extract the data register in which the bit string resides...
  18219	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
  18220	andi.w		&0x70,%d1		# extract reg bits
  18221	lsr.b		&0x4,%d1		# shift into lo bits
  18222
  18223# fetch the bit string into d0...
  18224	bsr.l		fetch_dreg		# fetch reg string
  18225
  18226	andi.l		&0x000000ff,%d0		# keep only lo byte
  18227
  18228	mov.l		%d0,-(%sp)		# save strg
  18229	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
  18230	mov.l		%d0,-(%sp)		# save size
  18231	bsr.l		fmovm_calc_ea		# calculate <ea>
  18232	mov.l		(%sp)+,%d0		# restore size
  18233	mov.l		(%sp)+,%d1		# restore strg
  18234
  18235# if the bit string is a zero, then the operation is a no-op
  18236# but, make sure that we've calculated ea and advanced the opword pointer
  18237	beq.w		fmovm_data_done
  18238
  18239# separate move ins from move outs...
  18240	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
  18241	beq.w		fmovm_data_in		# it's a move out
  18242
  18243#############
  18244# MOVE OUT: #
  18245#############
  18246fmovm_data_out:
  18247	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
  18248	bne.w		fmovm_out_ctrl		# control
  18249
  18250############################
  18251fmovm_out_predec:
  18252# for predecrement mode, the bit string is the opposite of both control
  18253# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
  18254# here, we convert it to be just like the others...
  18255	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
  18256
  18257	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
  18258	beq.b		fmovm_out_ctrl		# user
  18259
  18260fmovm_out_predec_s:
  18261	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
  18262	bne.b		fmovm_out_ctrl
  18263
  18264# the operation was unfortunately an: fmovm.x dn,-(sp)
  18265# called from supervisor mode.
  18266# we're also passing "size" and "strg" back to the calling routine
  18267	rts
  18268
  18269############################
  18270fmovm_out_ctrl:
  18271	mov.l		%a0,%a1			# move <ea> to a1
  18272
  18273	sub.l		%d0,%sp			# subtract size of dump
  18274	lea		(%sp),%a0
  18275
  18276	tst.b		%d1			# should FP0 be moved?
  18277	bpl.b		fmovm_out_ctrl_fp1	# no
  18278
  18279	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
  18280	mov.l		0x4+EXC_FP0(%a6),(%a0)+
  18281	mov.l		0x8+EXC_FP0(%a6),(%a0)+
  18282
  18283fmovm_out_ctrl_fp1:
  18284	lsl.b		&0x1,%d1		# should FP1 be moved?
  18285	bpl.b		fmovm_out_ctrl_fp2	# no
  18286
  18287	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
  18288	mov.l		0x4+EXC_FP1(%a6),(%a0)+
  18289	mov.l		0x8+EXC_FP1(%a6),(%a0)+
  18290
  18291fmovm_out_ctrl_fp2:
  18292	lsl.b		&0x1,%d1		# should FP2 be moved?
  18293	bpl.b		fmovm_out_ctrl_fp3	# no
  18294
  18295	fmovm.x		&0x20,(%a0)		# yes
  18296	add.l		&0xc,%a0
  18297
  18298fmovm_out_ctrl_fp3:
  18299	lsl.b		&0x1,%d1		# should FP3 be moved?
  18300	bpl.b		fmovm_out_ctrl_fp4	# no
  18301
  18302	fmovm.x		&0x10,(%a0)		# yes
  18303	add.l		&0xc,%a0
  18304
  18305fmovm_out_ctrl_fp4:
  18306	lsl.b		&0x1,%d1		# should FP4 be moved?
  18307	bpl.b		fmovm_out_ctrl_fp5	# no
  18308
  18309	fmovm.x		&0x08,(%a0)		# yes
  18310	add.l		&0xc,%a0
  18311
  18312fmovm_out_ctrl_fp5:
  18313	lsl.b		&0x1,%d1		# should FP5 be moved?
  18314	bpl.b		fmovm_out_ctrl_fp6	# no
  18315
  18316	fmovm.x		&0x04,(%a0)		# yes
  18317	add.l		&0xc,%a0
  18318
  18319fmovm_out_ctrl_fp6:
  18320	lsl.b		&0x1,%d1		# should FP6 be moved?
  18321	bpl.b		fmovm_out_ctrl_fp7	# no
  18322
  18323	fmovm.x		&0x02,(%a0)		# yes
  18324	add.l		&0xc,%a0
  18325
  18326fmovm_out_ctrl_fp7:
  18327	lsl.b		&0x1,%d1		# should FP7 be moved?
  18328	bpl.b		fmovm_out_ctrl_done	# no
  18329
  18330	fmovm.x		&0x01,(%a0)		# yes
  18331	add.l		&0xc,%a0
  18332
  18333fmovm_out_ctrl_done:
  18334	mov.l		%a1,L_SCR1(%a6)
  18335
  18336	lea		(%sp),%a0		# pass: supervisor src
  18337	mov.l		%d0,-(%sp)		# save size
  18338	bsr.l		_dmem_write		# copy data to user mem
  18339
  18340	mov.l		(%sp)+,%d0
  18341	add.l		%d0,%sp			# clear fpreg data from stack
  18342
  18343	tst.l		%d1			# did dstore err?
  18344	bne.w		fmovm_out_err		# yes
  18345
  18346	rts
  18347
  18348############
  18349# MOVE IN: #
  18350############
  18351fmovm_data_in:
  18352	mov.l		%a0,L_SCR1(%a6)
  18353
  18354	sub.l		%d0,%sp			# make room for fpregs
  18355	lea		(%sp),%a1
  18356
  18357	mov.l		%d1,-(%sp)		# save bit string for later
  18358	mov.l		%d0,-(%sp)		# save # of bytes
  18359
  18360	bsr.l		_dmem_read		# copy data from user mem
  18361
  18362	mov.l		(%sp)+,%d0		# retrieve # of bytes
  18363
  18364	tst.l		%d1			# did dfetch fail?
  18365	bne.w		fmovm_in_err		# yes
  18366
  18367	mov.l		(%sp)+,%d1		# load bit string
  18368
  18369	lea		(%sp),%a0		# addr of stack
  18370
  18371	tst.b		%d1			# should FP0 be moved?
  18372	bpl.b		fmovm_data_in_fp1	# no
  18373
  18374	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
  18375	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
  18376	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
  18377
  18378fmovm_data_in_fp1:
  18379	lsl.b		&0x1,%d1		# should FP1 be moved?
  18380	bpl.b		fmovm_data_in_fp2	# no
  18381
  18382	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
  18383	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
  18384	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
  18385
  18386fmovm_data_in_fp2:
  18387	lsl.b		&0x1,%d1		# should FP2 be moved?
  18388	bpl.b		fmovm_data_in_fp3	# no
  18389
  18390	fmovm.x		(%a0)+,&0x20		# yes
  18391
  18392fmovm_data_in_fp3:
  18393	lsl.b		&0x1,%d1		# should FP3 be moved?
  18394	bpl.b		fmovm_data_in_fp4	# no
  18395
  18396	fmovm.x		(%a0)+,&0x10		# yes
  18397
  18398fmovm_data_in_fp4:
  18399	lsl.b		&0x1,%d1		# should FP4 be moved?
  18400	bpl.b		fmovm_data_in_fp5	# no
  18401
  18402	fmovm.x		(%a0)+,&0x08		# yes
  18403
  18404fmovm_data_in_fp5:
  18405	lsl.b		&0x1,%d1		# should FP5 be moved?
  18406	bpl.b		fmovm_data_in_fp6	# no
  18407
  18408	fmovm.x		(%a0)+,&0x04		# yes
  18409
  18410fmovm_data_in_fp6:
  18411	lsl.b		&0x1,%d1		# should FP6 be moved?
  18412	bpl.b		fmovm_data_in_fp7	# no
  18413
  18414	fmovm.x		(%a0)+,&0x02		# yes
  18415
  18416fmovm_data_in_fp7:
  18417	lsl.b		&0x1,%d1		# should FP7 be moved?
  18418	bpl.b		fmovm_data_in_done	# no
  18419
  18420	fmovm.x		(%a0)+,&0x01		# yes
  18421
  18422fmovm_data_in_done:
  18423	add.l		%d0,%sp			# remove fpregs from stack
  18424	rts
  18425
  18426#####################################
  18427
  18428fmovm_data_done:
  18429	rts
  18430
  18431##############################################################################
  18432
  18433#
  18434# table indexed by the operation's bit string that gives the number
  18435# of bytes that will be moved.
  18436#
  18437# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
  18438#
  18439tbl_fmovm_size:
  18440	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
  18441	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
  18442	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
  18443	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  18444	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
  18445	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  18446	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  18447	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  18448	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
  18449	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  18450	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  18451	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  18452	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  18453	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  18454	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  18455	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
  18456	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
  18457	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  18458	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  18459	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  18460	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  18461	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  18462	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  18463	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
  18464	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
  18465	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  18466	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  18467	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
  18468	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
  18469	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
  18470	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
  18471	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
  18472
  18473#
  18474# table to convert a pre-decrement bit string into a post-increment
  18475# or control bit string.
  18476# ex:	0x00	==>	0x00
  18477#	0x01	==>	0x80
  18478#	0x02	==>	0x40
  18479#		.
  18480#		.
  18481#	0xfd	==>	0xbf
  18482#	0xfe	==>	0x7f
  18483#	0xff	==>	0xff
  18484#
  18485tbl_fmovm_convert:
  18486	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
  18487	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
  18488	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
  18489	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
  18490	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
  18491	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
  18492	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
  18493	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
  18494	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
  18495	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
  18496	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
  18497	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
  18498	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
  18499	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
  18500	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
  18501	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
  18502	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
  18503	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
  18504	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
  18505	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
  18506	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
  18507	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
  18508	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
  18509	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
  18510	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
  18511	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
  18512	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
  18513	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
  18514	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
  18515	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
  18516	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
  18517	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
  18518
  18519	global		fmovm_calc_ea
  18520###############################################
  18521# _fmovm_calc_ea: calculate effective address #
  18522###############################################
  18523fmovm_calc_ea:
  18524	mov.l		%d0,%a0			# move # bytes to a0
  18525
  18526# currently, MODE and REG are taken from the EXC_OPWORD. this could be
  18527# easily changed if they were inputs passed in registers.
  18528	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
  18529	mov.w		%d0,%d1			# make a copy
  18530
  18531	andi.w		&0x3f,%d0		# extract mode field
  18532	andi.l		&0x7,%d1		# extract reg  field
  18533
  18534# jump to the corresponding function for each {MODE,REG} pair.
  18535	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
  18536	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
  18537
  18538	swbeg		&64
  18539tbl_fea_mode:
  18540	short		tbl_fea_mode	-	tbl_fea_mode
  18541	short		tbl_fea_mode	-	tbl_fea_mode
  18542	short		tbl_fea_mode	-	tbl_fea_mode
  18543	short		tbl_fea_mode	-	tbl_fea_mode
  18544	short		tbl_fea_mode	-	tbl_fea_mode
  18545	short		tbl_fea_mode	-	tbl_fea_mode
  18546	short		tbl_fea_mode	-	tbl_fea_mode
  18547	short		tbl_fea_mode	-	tbl_fea_mode
  18548
  18549	short		tbl_fea_mode	-	tbl_fea_mode
  18550	short		tbl_fea_mode	-	tbl_fea_mode
  18551	short		tbl_fea_mode	-	tbl_fea_mode
  18552	short		tbl_fea_mode	-	tbl_fea_mode
  18553	short		tbl_fea_mode	-	tbl_fea_mode
  18554	short		tbl_fea_mode	-	tbl_fea_mode
  18555	short		tbl_fea_mode	-	tbl_fea_mode
  18556	short		tbl_fea_mode	-	tbl_fea_mode
  18557
  18558	short		faddr_ind_a0	-	tbl_fea_mode
  18559	short		faddr_ind_a1	-	tbl_fea_mode
  18560	short		faddr_ind_a2	-	tbl_fea_mode
  18561	short		faddr_ind_a3	-	tbl_fea_mode
  18562	short		faddr_ind_a4	-	tbl_fea_mode
  18563	short		faddr_ind_a5	-	tbl_fea_mode
  18564	short		faddr_ind_a6	-	tbl_fea_mode
  18565	short		faddr_ind_a7	-	tbl_fea_mode
  18566
  18567	short		faddr_ind_p_a0	-	tbl_fea_mode
  18568	short		faddr_ind_p_a1	-	tbl_fea_mode
  18569	short		faddr_ind_p_a2	-	tbl_fea_mode
  18570	short		faddr_ind_p_a3	-	tbl_fea_mode
  18571	short		faddr_ind_p_a4	-	tbl_fea_mode
  18572	short		faddr_ind_p_a5	-	tbl_fea_mode
  18573	short		faddr_ind_p_a6	-	tbl_fea_mode
  18574	short		faddr_ind_p_a7	-	tbl_fea_mode
  18575
  18576	short		faddr_ind_m_a0	-	tbl_fea_mode
  18577	short		faddr_ind_m_a1	-	tbl_fea_mode
  18578	short		faddr_ind_m_a2	-	tbl_fea_mode
  18579	short		faddr_ind_m_a3	-	tbl_fea_mode
  18580	short		faddr_ind_m_a4	-	tbl_fea_mode
  18581	short		faddr_ind_m_a5	-	tbl_fea_mode
  18582	short		faddr_ind_m_a6	-	tbl_fea_mode
  18583	short		faddr_ind_m_a7	-	tbl_fea_mode
  18584
  18585	short		faddr_ind_disp_a0	-	tbl_fea_mode
  18586	short		faddr_ind_disp_a1	-	tbl_fea_mode
  18587	short		faddr_ind_disp_a2	-	tbl_fea_mode
  18588	short		faddr_ind_disp_a3	-	tbl_fea_mode
  18589	short		faddr_ind_disp_a4	-	tbl_fea_mode
  18590	short		faddr_ind_disp_a5	-	tbl_fea_mode
  18591	short		faddr_ind_disp_a6	-	tbl_fea_mode
  18592	short		faddr_ind_disp_a7	-	tbl_fea_mode
  18593
  18594	short		faddr_ind_ext	-	tbl_fea_mode
  18595	short		faddr_ind_ext	-	tbl_fea_mode
  18596	short		faddr_ind_ext	-	tbl_fea_mode
  18597	short		faddr_ind_ext	-	tbl_fea_mode
  18598	short		faddr_ind_ext	-	tbl_fea_mode
  18599	short		faddr_ind_ext	-	tbl_fea_mode
  18600	short		faddr_ind_ext	-	tbl_fea_mode
  18601	short		faddr_ind_ext	-	tbl_fea_mode
  18602
  18603	short		fabs_short	-	tbl_fea_mode
  18604	short		fabs_long	-	tbl_fea_mode
  18605	short		fpc_ind		-	tbl_fea_mode
  18606	short		fpc_ind_ext	-	tbl_fea_mode
  18607	short		tbl_fea_mode	-	tbl_fea_mode
  18608	short		tbl_fea_mode	-	tbl_fea_mode
  18609	short		tbl_fea_mode	-	tbl_fea_mode
  18610	short		tbl_fea_mode	-	tbl_fea_mode
  18611
  18612###################################
  18613# Address register indirect: (An) #
  18614###################################
  18615faddr_ind_a0:
  18616	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
  18617	rts
  18618
  18619faddr_ind_a1:
  18620	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
  18621	rts
  18622
  18623faddr_ind_a2:
  18624	mov.l		%a2,%a0			# Get current a2
  18625	rts
  18626
  18627faddr_ind_a3:
  18628	mov.l		%a3,%a0			# Get current a3
  18629	rts
  18630
  18631faddr_ind_a4:
  18632	mov.l		%a4,%a0			# Get current a4
  18633	rts
  18634
  18635faddr_ind_a5:
  18636	mov.l		%a5,%a0			# Get current a5
  18637	rts
  18638
  18639faddr_ind_a6:
  18640	mov.l		(%a6),%a0		# Get current a6
  18641	rts
  18642
  18643faddr_ind_a7:
  18644	mov.l		EXC_A7(%a6),%a0		# Get current a7
  18645	rts
  18646
  18647#####################################################
  18648# Address register indirect w/ postincrement: (An)+ #
  18649#####################################################
  18650faddr_ind_p_a0:
  18651	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
  18652	mov.l		%d0,%d1
  18653	add.l		%a0,%d1			# Increment
  18654	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
  18655	mov.l		%d0,%a0
  18656	rts
  18657
  18658faddr_ind_p_a1:
  18659	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
  18660	mov.l		%d0,%d1
  18661	add.l		%a0,%d1			# Increment
  18662	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
  18663	mov.l		%d0,%a0
  18664	rts
  18665
  18666faddr_ind_p_a2:
  18667	mov.l		%a2,%d0			# Get current a2
  18668	mov.l		%d0,%d1
  18669	add.l		%a0,%d1			# Increment
  18670	mov.l		%d1,%a2			# Save incr value
  18671	mov.l		%d0,%a0
  18672	rts
  18673
  18674faddr_ind_p_a3:
  18675	mov.l		%a3,%d0			# Get current a3
  18676	mov.l		%d0,%d1
  18677	add.l		%a0,%d1			# Increment
  18678	mov.l		%d1,%a3			# Save incr value
  18679	mov.l		%d0,%a0
  18680	rts
  18681
  18682faddr_ind_p_a4:
  18683	mov.l		%a4,%d0			# Get current a4
  18684	mov.l		%d0,%d1
  18685	add.l		%a0,%d1			# Increment
  18686	mov.l		%d1,%a4			# Save incr value
  18687	mov.l		%d0,%a0
  18688	rts
  18689
  18690faddr_ind_p_a5:
  18691	mov.l		%a5,%d0			# Get current a5
  18692	mov.l		%d0,%d1
  18693	add.l		%a0,%d1			# Increment
  18694	mov.l		%d1,%a5			# Save incr value
  18695	mov.l		%d0,%a0
  18696	rts
  18697
  18698faddr_ind_p_a6:
  18699	mov.l		(%a6),%d0		# Get current a6
  18700	mov.l		%d0,%d1
  18701	add.l		%a0,%d1			# Increment
  18702	mov.l		%d1,(%a6)		# Save incr value
  18703	mov.l		%d0,%a0
  18704	rts
  18705
  18706faddr_ind_p_a7:
  18707	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
  18708
  18709	mov.l		EXC_A7(%a6),%d0		# Get current a7
  18710	mov.l		%d0,%d1
  18711	add.l		%a0,%d1			# Increment
  18712	mov.l		%d1,EXC_A7(%a6)		# Save incr value
  18713	mov.l		%d0,%a0
  18714	rts
  18715
  18716####################################################
  18717# Address register indirect w/ predecrement: -(An) #
  18718####################################################
  18719faddr_ind_m_a0:
  18720	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
  18721	sub.l		%a0,%d0			# Decrement
  18722	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
  18723	mov.l		%d0,%a0
  18724	rts
  18725
  18726faddr_ind_m_a1:
  18727	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
  18728	sub.l		%a0,%d0			# Decrement
  18729	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
  18730	mov.l		%d0,%a0
  18731	rts
  18732
  18733faddr_ind_m_a2:
  18734	mov.l		%a2,%d0			# Get current a2
  18735	sub.l		%a0,%d0			# Decrement
  18736	mov.l		%d0,%a2			# Save decr value
  18737	mov.l		%d0,%a0
  18738	rts
  18739
  18740faddr_ind_m_a3:
  18741	mov.l		%a3,%d0			# Get current a3
  18742	sub.l		%a0,%d0			# Decrement
  18743	mov.l		%d0,%a3			# Save decr value
  18744	mov.l		%d0,%a0
  18745	rts
  18746
  18747faddr_ind_m_a4:
  18748	mov.l		%a4,%d0			# Get current a4
  18749	sub.l		%a0,%d0			# Decrement
  18750	mov.l		%d0,%a4			# Save decr value
  18751	mov.l		%d0,%a0
  18752	rts
  18753
  18754faddr_ind_m_a5:
  18755	mov.l		%a5,%d0			# Get current a5
  18756	sub.l		%a0,%d0			# Decrement
  18757	mov.l		%d0,%a5			# Save decr value
  18758	mov.l		%d0,%a0
  18759	rts
  18760
  18761faddr_ind_m_a6:
  18762	mov.l		(%a6),%d0		# Get current a6
  18763	sub.l		%a0,%d0			# Decrement
  18764	mov.l		%d0,(%a6)		# Save decr value
  18765	mov.l		%d0,%a0
  18766	rts
  18767
  18768faddr_ind_m_a7:
  18769	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
  18770
  18771	mov.l		EXC_A7(%a6),%d0		# Get current a7
  18772	sub.l		%a0,%d0			# Decrement
  18773	mov.l		%d0,EXC_A7(%a6)		# Save decr value
  18774	mov.l		%d0,%a0
  18775	rts
  18776
  18777########################################################
  18778# Address register indirect w/ displacement: (d16, An) #
  18779########################################################
  18780faddr_ind_disp_a0:
  18781	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  18782	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
  18783	bsr.l		_imem_read_word
  18784
  18785	tst.l		%d1			# did ifetch fail?
  18786	bne.l		iea_iacc		# yes
  18787
  18788	mov.w		%d0,%a0			# sign extend displacement
  18789
  18790	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
  18791	rts
  18792
  18793faddr_ind_disp_a1:
  18794	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  18795	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
  18796	bsr.l		_imem_read_word
  18797
  18798	tst.l		%d1			# did ifetch fail?
  18799	bne.l		iea_iacc		# yes
  18800
  18801	mov.w		%d0,%a0			# sign extend displacement
  18802
  18803	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
  18804	rts
  18805
  18806faddr_ind_disp_a2:
  18807	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  18808	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
  18809	bsr.l		_imem_read_word
  18810
  18811	tst.l		%d1			# did ifetch fail?
  18812	bne.l		iea_iacc		# yes
  18813
  18814	mov.w		%d0,%a0			# sign extend displacement
  18815
  18816	add.l		%a2,%a0			# a2 + d16
  18817	rts
  18818
  18819faddr_ind_disp_a3:
  18820	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  18821	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
  18822	bsr.l		_imem_read_word
  18823
  18824	tst.l		%d1			# did ifetch fail?
  18825	bne.l		iea_iacc		# yes
  18826
  18827	mov.w		%d0,%a0			# sign extend displacement
  18828
  18829	add.l		%a3,%a0			# a3 + d16
  18830	rts
  18831
  18832faddr_ind_disp_a4:
  18833	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  18834	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
  18835	bsr.l		_imem_read_word
  18836
  18837	tst.l		%d1			# did ifetch fail?
  18838	bne.l		iea_iacc		# yes
  18839
  18840	mov.w		%d0,%a0			# sign extend displacement
  18841
  18842	add.l		%a4,%a0			# a4 + d16
  18843	rts
  18844
  18845faddr_ind_disp_a5:
  18846	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  18847	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
  18848	bsr.l		_imem_read_word
  18849
  18850	tst.l		%d1			# did ifetch fail?
  18851	bne.l		iea_iacc		# yes
  18852
  18853	mov.w		%d0,%a0			# sign extend displacement
  18854
  18855	add.l		%a5,%a0			# a5 + d16
  18856	rts
  18857
  18858faddr_ind_disp_a6:
  18859	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  18860	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
  18861	bsr.l		_imem_read_word
  18862
  18863	tst.l		%d1			# did ifetch fail?
  18864	bne.l		iea_iacc		# yes
  18865
  18866	mov.w		%d0,%a0			# sign extend displacement
  18867
  18868	add.l		(%a6),%a0		# a6 + d16
  18869	rts
  18870
  18871faddr_ind_disp_a7:
  18872	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  18873	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
  18874	bsr.l		_imem_read_word
  18875
  18876	tst.l		%d1			# did ifetch fail?
  18877	bne.l		iea_iacc		# yes
  18878
  18879	mov.w		%d0,%a0			# sign extend displacement
  18880
  18881	add.l		EXC_A7(%a6),%a0		# a7 + d16
  18882	rts
  18883
  18884########################################################################
  18885# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
  18886#    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
  18887# Memory indirect postindexed: ([bd, An], Xn, od)		       #
  18888# Memory indirect preindexed: ([bd, An, Xn], od)		       #
  18889########################################################################
  18890faddr_ind_ext:
  18891	addq.l		&0x8,%d1
  18892	bsr.l		fetch_dreg		# fetch base areg
  18893	mov.l		%d0,-(%sp)
  18894
  18895	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  18896	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
  18897	bsr.l		_imem_read_word		# fetch extword in d0
  18898
  18899	tst.l		%d1			# did ifetch fail?
  18900	bne.l		iea_iacc		# yes
  18901
  18902	mov.l		(%sp)+,%a0
  18903
  18904	btst		&0x8,%d0
  18905	bne.w		fcalc_mem_ind
  18906
  18907	mov.l		%d0,L_SCR1(%a6)		# hold opword
  18908
  18909	mov.l		%d0,%d1
  18910	rol.w		&0x4,%d1
  18911	andi.w		&0xf,%d1		# extract index regno
  18912
  18913# count on fetch_dreg() not to alter a0...
  18914	bsr.l		fetch_dreg		# fetch index
  18915
  18916	mov.l		%d2,-(%sp)		# save d2
  18917	mov.l		L_SCR1(%a6),%d2		# fetch opword
  18918
  18919	btst		&0xb,%d2		# is it word or long?
  18920	bne.b		faii8_long
  18921	ext.l		%d0			# sign extend word index
  18922faii8_long:
  18923	mov.l		%d2,%d1
  18924	rol.w		&0x7,%d1
  18925	andi.l		&0x3,%d1		# extract scale value
  18926
  18927	lsl.l		%d1,%d0			# shift index by scale
  18928
  18929	extb.l		%d2			# sign extend displacement
  18930	add.l		%d2,%d0			# index + disp
  18931	add.l		%d0,%a0			# An + (index + disp)
  18932
  18933	mov.l		(%sp)+,%d2		# restore old d2
  18934	rts
  18935
  18936###########################
  18937# Absolute short: (XXX).W #
  18938###########################
  18939fabs_short:
  18940	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  18941	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
  18942	bsr.l		_imem_read_word		# fetch short address
  18943
  18944	tst.l		%d1			# did ifetch fail?
  18945	bne.l		iea_iacc		# yes
  18946
  18947	mov.w		%d0,%a0			# return <ea> in a0
  18948	rts
  18949
  18950##########################
  18951# Absolute long: (XXX).L #
  18952##########################
  18953fabs_long:
  18954	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  18955	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
  18956	bsr.l		_imem_read_long		# fetch long address
  18957
  18958	tst.l		%d1			# did ifetch fail?
  18959	bne.l		iea_iacc		# yes
  18960
  18961	mov.l		%d0,%a0			# return <ea> in a0
  18962	rts
  18963
  18964#######################################################
  18965# Program counter indirect w/ displacement: (d16, PC) #
  18966#######################################################
  18967fpc_ind:
  18968	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  18969	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
  18970	bsr.l		_imem_read_word		# fetch word displacement
  18971
  18972	tst.l		%d1			# did ifetch fail?
  18973	bne.l		iea_iacc		# yes
  18974
  18975	mov.w		%d0,%a0			# sign extend displacement
  18976
  18977	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
  18978
  18979# _imem_read_word() increased the extwptr by 2. need to adjust here.
  18980	subq.l		&0x2,%a0		# adjust <ea>
  18981	rts
  18982
  18983##########################################################
  18984# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
  18985# "     "     w/   "  (base displacement): (bd, PC, An)  #
  18986# PC memory indirect postindexed: ([bd, PC], Xn, od)     #
  18987# PC memory indirect preindexed: ([bd, PC, Xn], od)      #
  18988##########################################################
  18989fpc_ind_ext:
  18990	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  18991	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
  18992	bsr.l		_imem_read_word		# fetch ext word
  18993
  18994	tst.l		%d1			# did ifetch fail?
  18995	bne.l		iea_iacc		# yes
  18996
  18997	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
  18998	subq.l		&0x2,%a0		# adjust base
  18999
  19000	btst		&0x8,%d0		# is disp only 8 bits?
  19001	bne.w		fcalc_mem_ind		# calc memory indirect
  19002
  19003	mov.l		%d0,L_SCR1(%a6)		# store opword
  19004
  19005	mov.l		%d0,%d1			# make extword copy
  19006	rol.w		&0x4,%d1		# rotate reg num into place
  19007	andi.w		&0xf,%d1		# extract register number
  19008
  19009# count on fetch_dreg() not to alter a0...
  19010	bsr.l		fetch_dreg		# fetch index
  19011
  19012	mov.l		%d2,-(%sp)		# save d2
  19013	mov.l		L_SCR1(%a6),%d2		# fetch opword
  19014
  19015	btst		&0xb,%d2		# is index word or long?
  19016	bne.b		fpii8_long		# long
  19017	ext.l		%d0			# sign extend word index
  19018fpii8_long:
  19019	mov.l		%d2,%d1
  19020	rol.w		&0x7,%d1		# rotate scale value into place
  19021	andi.l		&0x3,%d1		# extract scale value
  19022
  19023	lsl.l		%d1,%d0			# shift index by scale
  19024
  19025	extb.l		%d2			# sign extend displacement
  19026	add.l		%d2,%d0			# disp + index
  19027	add.l		%d0,%a0			# An + (index + disp)
  19028
  19029	mov.l		(%sp)+,%d2		# restore temp register
  19030	rts
  19031
  19032# d2 = index
  19033# d3 = base
  19034# d4 = od
  19035# d5 = extword
  19036fcalc_mem_ind:
  19037	btst		&0x6,%d0		# is the index suppressed?
  19038	beq.b		fcalc_index
  19039
  19040	movm.l		&0x3c00,-(%sp)		# save d2-d5
  19041
  19042	mov.l		%d0,%d5			# put extword in d5
  19043	mov.l		%a0,%d3			# put base in d3
  19044
  19045	clr.l		%d2			# yes, so index = 0
  19046	bra.b		fbase_supp_ck
  19047
  19048# index:
  19049fcalc_index:
  19050	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
  19051	bfextu		%d0{&16:&4},%d1		# fetch dreg index
  19052	bsr.l		fetch_dreg
  19053
  19054	movm.l		&0x3c00,-(%sp)		# save d2-d5
  19055	mov.l		%d0,%d2			# put index in d2
  19056	mov.l		L_SCR1(%a6),%d5
  19057	mov.l		%a0,%d3
  19058
  19059	btst		&0xb,%d5		# is index word or long?
  19060	bne.b		fno_ext
  19061	ext.l		%d2
  19062
  19063fno_ext:
  19064	bfextu		%d5{&21:&2},%d0
  19065	lsl.l		%d0,%d2
  19066
  19067# base address (passed as parameter in d3):
  19068# we clear the value here if it should actually be suppressed.
  19069fbase_supp_ck:
  19070	btst		&0x7,%d5		# is the bd suppressed?
  19071	beq.b		fno_base_sup
  19072	clr.l		%d3
  19073
  19074# base displacement:
  19075fno_base_sup:
  19076	bfextu		%d5{&26:&2},%d0		# get bd size
  19077#	beq.l		fmovm_error		# if (size == 0) it's reserved
  19078
  19079	cmpi.b		%d0,&0x2
  19080	blt.b		fno_bd
  19081	beq.b		fget_word_bd
  19082
  19083	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  19084	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
  19085	bsr.l		_imem_read_long
  19086
  19087	tst.l		%d1			# did ifetch fail?
  19088	bne.l		fcea_iacc		# yes
  19089
  19090	bra.b		fchk_ind
  19091
  19092fget_word_bd:
  19093	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  19094	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
  19095	bsr.l		_imem_read_word
  19096
  19097	tst.l		%d1			# did ifetch fail?
  19098	bne.l		fcea_iacc		# yes
  19099
  19100	ext.l		%d0			# sign extend bd
  19101
  19102fchk_ind:
  19103	add.l		%d0,%d3			# base += bd
  19104
  19105# outer displacement:
  19106fno_bd:
  19107	bfextu		%d5{&30:&2},%d0		# is od suppressed?
  19108	beq.w		faii_bd
  19109
  19110	cmpi.b		%d0,&0x2
  19111	blt.b		fnull_od
  19112	beq.b		fword_od
  19113
  19114	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  19115	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
  19116	bsr.l		_imem_read_long
  19117
  19118	tst.l		%d1			# did ifetch fail?
  19119	bne.l		fcea_iacc		# yes
  19120
  19121	bra.b		fadd_them
  19122
  19123fword_od:
  19124	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  19125	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
  19126	bsr.l		_imem_read_word
  19127
  19128	tst.l		%d1			# did ifetch fail?
  19129	bne.l		fcea_iacc		# yes
  19130
  19131	ext.l		%d0			# sign extend od
  19132	bra.b		fadd_them
  19133
  19134fnull_od:
  19135	clr.l		%d0
  19136
  19137fadd_them:
  19138	mov.l		%d0,%d4
  19139
  19140	btst		&0x2,%d5		# pre or post indexing?
  19141	beq.b		fpre_indexed
  19142
  19143	mov.l		%d3,%a0
  19144	bsr.l		_dmem_read_long
  19145
  19146	tst.l		%d1			# did dfetch fail?
  19147	bne.w		fcea_err		# yes
  19148
  19149	add.l		%d2,%d0			# <ea> += index
  19150	add.l		%d4,%d0			# <ea> += od
  19151	bra.b		fdone_ea
  19152
  19153fpre_indexed:
  19154	add.l		%d2,%d3			# preindexing
  19155	mov.l		%d3,%a0
  19156	bsr.l		_dmem_read_long
  19157
  19158	tst.l		%d1			# did dfetch fail?
  19159	bne.w		fcea_err		# yes
  19160
  19161	add.l		%d4,%d0			# ea += od
  19162	bra.b		fdone_ea
  19163
  19164faii_bd:
  19165	add.l		%d2,%d3			# ea = (base + bd) + index
  19166	mov.l		%d3,%d0
  19167fdone_ea:
  19168	mov.l		%d0,%a0
  19169
  19170	movm.l		(%sp)+,&0x003c		# restore d2-d5
  19171	rts
  19172
  19173#########################################################
  19174fcea_err:
  19175	mov.l		%d3,%a0
  19176
  19177	movm.l		(%sp)+,&0x003c		# restore d2-d5
  19178	mov.w		&0x0101,%d0
  19179	bra.l		iea_dacc
  19180
  19181fcea_iacc:
  19182	movm.l		(%sp)+,&0x003c		# restore d2-d5
  19183	bra.l		iea_iacc
  19184
  19185fmovm_out_err:
  19186	bsr.l		restore
  19187	mov.w		&0x00e1,%d0
  19188	bra.b		fmovm_err
  19189
  19190fmovm_in_err:
  19191	bsr.l		restore
  19192	mov.w		&0x0161,%d0
  19193
  19194fmovm_err:
  19195	mov.l		L_SCR1(%a6),%a0
  19196	bra.l		iea_dacc
  19197
  19198#########################################################################
  19199# XDEF ****************************************************************	#
  19200#	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
  19201#									#
  19202# XREF ****************************************************************	#
  19203#	_imem_read_long() - read longword from memory			#
  19204#	iea_iacc() - _imem_read_long() failed; error recovery		#
  19205#									#
  19206# INPUT ***************************************************************	#
  19207#	None								#
  19208#									#
  19209# OUTPUT **************************************************************	#
  19210#	If _imem_read_long() doesn't fail:				#
  19211#		USER_FPCR(a6)  = new FPCR value				#
  19212#		USER_FPSR(a6)  = new FPSR value				#
  19213#		USER_FPIAR(a6) = new FPIAR value			#
  19214#									#
  19215# ALGORITHM ***********************************************************	#
  19216#	Decode the instruction type by looking at the extension word	#
  19217# in order to see how many control registers to fetch from memory.	#
  19218# Fetch them using _imem_read_long(). If this fetch fails, exit through	#
  19219# the special access error exit handler iea_iacc().			#
  19220#									#
  19221# Instruction word decoding:						#
  19222#									#
  19223#	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
  19224#									#
  19225#		WORD1			WORD2				#
  19226#	1111 0010 00 111100	100$ $$00 0000 0000			#
  19227#									#
  19228#	$$$ (100): FPCR							#
  19229#	    (010): FPSR							#
  19230#	    (001): FPIAR						#
  19231#	    (000): FPIAR						#
  19232#									#
  19233#########################################################################
  19234
  19235	global		fmovm_ctrl
  19236fmovm_ctrl:
  19237	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
  19238	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
  19239	beq.w		fctrl_in_7		# yes
  19240	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
  19241	beq.w		fctrl_in_6		# yes
  19242	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
  19243	beq.b		fctrl_in_5		# yes
  19244
  19245# fmovem.l #<data>, fpsr/fpiar
  19246fctrl_in_3:
  19247	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  19248	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
  19249	bsr.l		_imem_read_long		# fetch FPSR from mem
  19250
  19251	tst.l		%d1			# did ifetch fail?
  19252	bne.l		iea_iacc		# yes
  19253
  19254	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
  19255	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  19256	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
  19257	bsr.l		_imem_read_long		# fetch FPIAR from mem
  19258
  19259	tst.l		%d1			# did ifetch fail?
  19260	bne.l		iea_iacc		# yes
  19261
  19262	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
  19263	rts
  19264
  19265# fmovem.l #<data>, fpcr/fpiar
  19266fctrl_in_5:
  19267	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  19268	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
  19269	bsr.l		_imem_read_long		# fetch FPCR from mem
  19270
  19271	tst.l		%d1			# did ifetch fail?
  19272	bne.l		iea_iacc		# yes
  19273
  19274	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
  19275	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  19276	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
  19277	bsr.l		_imem_read_long		# fetch FPIAR from mem
  19278
  19279	tst.l		%d1			# did ifetch fail?
  19280	bne.l		iea_iacc		# yes
  19281
  19282	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
  19283	rts
  19284
  19285# fmovem.l #<data>, fpcr/fpsr
  19286fctrl_in_6:
  19287	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  19288	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
  19289	bsr.l		_imem_read_long		# fetch FPCR from mem
  19290
  19291	tst.l		%d1			# did ifetch fail?
  19292	bne.l		iea_iacc		# yes
  19293
  19294	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
  19295	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  19296	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
  19297	bsr.l		_imem_read_long		# fetch FPSR from mem
  19298
  19299	tst.l		%d1			# did ifetch fail?
  19300	bne.l		iea_iacc		# yes
  19301
  19302	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
  19303	rts
  19304
  19305# fmovem.l #<data>, fpcr/fpsr/fpiar
  19306fctrl_in_7:
  19307	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  19308	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
  19309	bsr.l		_imem_read_long		# fetch FPCR from mem
  19310
  19311	tst.l		%d1			# did ifetch fail?
  19312	bne.l		iea_iacc		# yes
  19313
  19314	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
  19315	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  19316	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
  19317	bsr.l		_imem_read_long		# fetch FPSR from mem
  19318
  19319	tst.l		%d1			# did ifetch fail?
  19320	bne.l		iea_iacc		# yes
  19321
  19322	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
  19323	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
  19324	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
  19325	bsr.l		_imem_read_long		# fetch FPIAR from mem
  19326
  19327	tst.l		%d1			# did ifetch fail?
  19328	bne.l		iea_iacc		# yes
  19329
  19330	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
  19331	rts
  19332
  19333#########################################################################
  19334# XDEF ****************************************************************	#
  19335#	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
  19336#									#
  19337# XREF ****************************************************************	#
  19338#	inc_areg() - increment an address register			#
  19339#	dec_areg() - decrement an address register			#
  19340#									#
  19341# INPUT ***************************************************************	#
  19342#	d0 = number of bytes to adjust <ea> by				#
  19343#									#
  19344# OUTPUT **************************************************************	#
  19345#	None								#
  19346#									#
  19347# ALGORITHM ***********************************************************	#
  19348# "Dummy" CALCulate Effective Address:					#
  19349#	The stacked <ea> for FP unimplemented instructions and opclass	#
  19350#	two packed instructions is correct with the exception of...	#
  19351#									#
  19352#	1) -(An)   : The register is not updated regardless of size.	#
  19353#		     Also, for extended precision and packed, the	#
  19354#		     stacked <ea> value is 8 bytes too big		#
  19355#	2) (An)+   : The register is not updated.			#
  19356#	3) #<data> : The upper longword of the immediate operand is	#
  19357#		     stacked b,w,l and s sizes are completely stacked.	#
  19358#		     d,x, and p are not.				#
  19359#									#
  19360#########################################################################
  19361
  19362	global		_dcalc_ea
  19363_dcalc_ea:
  19364	mov.l		%d0, %a0		# move # bytes to %a0
  19365
  19366	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
  19367	mov.l		%d0, %d1		# make a copy
  19368
  19369	andi.w		&0x38, %d0		# extract mode field
  19370	andi.l		&0x7, %d1		# extract reg  field
  19371
  19372	cmpi.b		%d0,&0x18		# is mode (An)+ ?
  19373	beq.b		dcea_pi			# yes
  19374
  19375	cmpi.b		%d0,&0x20		# is mode -(An) ?
  19376	beq.b		dcea_pd			# yes
  19377
  19378	or.w		%d1,%d0			# concat mode,reg
  19379	cmpi.b		%d0,&0x3c		# is mode #<data>?
  19380
  19381	beq.b		dcea_imm		# yes
  19382
  19383	mov.l		EXC_EA(%a6),%a0		# return <ea>
  19384	rts
  19385
  19386# need to set immediate data flag here since we'll need to do
  19387# an imem_read to fetch this later.
  19388dcea_imm:
  19389	mov.b		&immed_flg,SPCOND_FLG(%a6)
  19390	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
  19391	rts
  19392
  19393# here, the <ea> is stacked correctly. however, we must update the
  19394# address register...
  19395dcea_pi:
  19396	mov.l		%a0,%d0			# pass amt to inc by
  19397	bsr.l		inc_areg		# inc addr register
  19398
  19399	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
  19400	rts
  19401
  19402# the <ea> is stacked correctly for all but extended and packed which
  19403# the <ea>s are 8 bytes too large.
  19404# it would make no sense to have a pre-decrement to a7 in supervisor
  19405# mode so we don't even worry about this tricky case here : )
  19406dcea_pd:
  19407	mov.l		%a0,%d0			# pass amt to dec by
  19408	bsr.l		dec_areg		# dec addr register
  19409
  19410	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
  19411
  19412	cmpi.b		%d0,&0xc		# is opsize ext or packed?
  19413	beq.b		dcea_pd2		# yes
  19414	rts
  19415dcea_pd2:
  19416	sub.l		&0x8,%a0		# correct <ea>
  19417	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
  19418	rts
  19419
  19420#########################################################################
  19421# XDEF ****************************************************************	#
  19422#	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
  19423#			 and packed data opclass 3 operations.		#
  19424#									#
  19425# XREF ****************************************************************	#
  19426#	None								#
  19427#									#
  19428# INPUT ***************************************************************	#
  19429#	None								#
  19430#									#
  19431# OUTPUT **************************************************************	#
  19432#	a0 = return correct effective address				#
  19433#									#
  19434# ALGORITHM ***********************************************************	#
  19435#	For opclass 3 extended and packed data operations, the <ea>	#
  19436# stacked for the exception is incorrect for -(an) and (an)+ addressing	#
  19437# modes. Also, while we're at it, the index register itself must get	#
  19438# updated.								#
  19439#	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
  19440# and return that value as the correct <ea> and store that value in An.	#
  19441# For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
  19442#									#
  19443#########################################################################
  19444
  19445# This calc_ea is currently used to retrieve the correct <ea>
  19446# for fmove outs of type extended and packed.
  19447	global		_calc_ea_fout
  19448_calc_ea_fout:
  19449	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
  19450	mov.l		%d0,%d1			# make a copy
  19451
  19452	andi.w		&0x38,%d0		# extract mode field
  19453	andi.l		&0x7,%d1		# extract reg  field
  19454
  19455	cmpi.b		%d0,&0x18		# is mode (An)+ ?
  19456	beq.b		ceaf_pi			# yes
  19457
  19458	cmpi.b		%d0,&0x20		# is mode -(An) ?
  19459	beq.w		ceaf_pd			# yes
  19460
  19461	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
  19462	rts
  19463
  19464# (An)+ : extended and packed fmove out
  19465#	: stacked <ea> is correct
  19466#	: "An" not updated
  19467ceaf_pi:
  19468	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
  19469	mov.l		EXC_EA(%a6),%a0
  19470	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
  19471
  19472	swbeg		&0x8
  19473tbl_ceaf_pi:
  19474	short		ceaf_pi0 - tbl_ceaf_pi
  19475	short		ceaf_pi1 - tbl_ceaf_pi
  19476	short		ceaf_pi2 - tbl_ceaf_pi
  19477	short		ceaf_pi3 - tbl_ceaf_pi
  19478	short		ceaf_pi4 - tbl_ceaf_pi
  19479	short		ceaf_pi5 - tbl_ceaf_pi
  19480	short		ceaf_pi6 - tbl_ceaf_pi
  19481	short		ceaf_pi7 - tbl_ceaf_pi
  19482
  19483ceaf_pi0:
  19484	addi.l		&0xc,EXC_DREGS+0x8(%a6)
  19485	rts
  19486ceaf_pi1:
  19487	addi.l		&0xc,EXC_DREGS+0xc(%a6)
  19488	rts
  19489ceaf_pi2:
  19490	add.l		&0xc,%a2
  19491	rts
  19492ceaf_pi3:
  19493	add.l		&0xc,%a3
  19494	rts
  19495ceaf_pi4:
  19496	add.l		&0xc,%a4
  19497	rts
  19498ceaf_pi5:
  19499	add.l		&0xc,%a5
  19500	rts
  19501ceaf_pi6:
  19502	addi.l		&0xc,EXC_A6(%a6)
  19503	rts
  19504ceaf_pi7:
  19505	mov.b		&mia7_flg,SPCOND_FLG(%a6)
  19506	addi.l		&0xc,EXC_A7(%a6)
  19507	rts
  19508
  19509# -(An) : extended and packed fmove out
  19510#	: stacked <ea> = actual <ea> + 8
  19511#	: "An" not updated
  19512ceaf_pd:
  19513	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
  19514	mov.l		EXC_EA(%a6),%a0
  19515	sub.l		&0x8,%a0
  19516	sub.l		&0x8,EXC_EA(%a6)
  19517	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
  19518
  19519	swbeg		&0x8
  19520tbl_ceaf_pd:
  19521	short		ceaf_pd0 - tbl_ceaf_pd
  19522	short		ceaf_pd1 - tbl_ceaf_pd
  19523	short		ceaf_pd2 - tbl_ceaf_pd
  19524	short		ceaf_pd3 - tbl_ceaf_pd
  19525	short		ceaf_pd4 - tbl_ceaf_pd
  19526	short		ceaf_pd5 - tbl_ceaf_pd
  19527	short		ceaf_pd6 - tbl_ceaf_pd
  19528	short		ceaf_pd7 - tbl_ceaf_pd
  19529
  19530ceaf_pd0:
  19531	mov.l		%a0,EXC_DREGS+0x8(%a6)
  19532	rts
  19533ceaf_pd1:
  19534	mov.l		%a0,EXC_DREGS+0xc(%a6)
  19535	rts
  19536ceaf_pd2:
  19537	mov.l		%a0,%a2
  19538	rts
  19539ceaf_pd3:
  19540	mov.l		%a0,%a3
  19541	rts
  19542ceaf_pd4:
  19543	mov.l		%a0,%a4
  19544	rts
  19545ceaf_pd5:
  19546	mov.l		%a0,%a5
  19547	rts
  19548ceaf_pd6:
  19549	mov.l		%a0,EXC_A6(%a6)
  19550	rts
  19551ceaf_pd7:
  19552	mov.l		%a0,EXC_A7(%a6)
  19553	mov.b		&mda7_flg,SPCOND_FLG(%a6)
  19554	rts
  19555
  19556#########################################################################
  19557# XDEF ****************************************************************	#
  19558#	_load_fop(): load operand for unimplemented FP exception	#
  19559#									#
  19560# XREF ****************************************************************	#
  19561#	set_tag_x() - determine ext prec optype tag			#
  19562#	set_tag_s() - determine sgl prec optype tag			#
  19563#	set_tag_d() - determine dbl prec optype tag			#
  19564#	unnorm_fix() - convert normalized number to denorm or zero	#
  19565#	norm() - normalize a denormalized number			#
  19566#	get_packed() - fetch a packed operand from memory		#
  19567#	_dcalc_ea() - calculate <ea>, fixing An in process		#
  19568#									#
  19569#	_imem_read_{word,long}() - read from instruction memory		#
  19570#	_dmem_read() - read from data memory				#
  19571#	_dmem_read_{byte,word,long}() - read from data memory		#
  19572#									#
  19573#	facc_in_{b,w,l,d,x}() - mem read failed; special exit point	#
  19574#									#
  19575# INPUT ***************************************************************	#
  19576#	None								#
  19577#									#
  19578# OUTPUT **************************************************************	#
  19579#	If memory access doesn't fail:					#
  19580#		FP_SRC(a6) = source operand in extended precision	#
  19581#		FP_DST(a6) = destination operand in extended precision	#
  19582#									#
  19583# ALGORITHM ***********************************************************	#
  19584#	This is called from the Unimplemented FP exception handler in	#
  19585# order to load the source and maybe destination operand into		#
  19586# FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load	#
  19587# the source and destination from the FP register file. Set the optype	#
  19588# tags for both if dyadic, one for monadic. If a number is an UNNORM,	#
  19589# convert it to a DENORM or a ZERO.					#
  19590#	If the instruction is opclass two (memory->reg), then fetch	#
  19591# the destination from the register file and the source operand from	#
  19592# memory. Tag and fix both as above w/ opclass zero instructions.	#
  19593#	If the source operand is byte,word,long, or single, it may be	#
  19594# in the data register file. If it's actually out in memory, use one of	#
  19595# the mem_read() routines to fetch it. If the mem_read() access returns	#
  19596# a failing value, exit through the special facc_in() routine which	#
  19597# will create an access error exception frame from the current exception #
  19598# frame.								#
  19599#	Immediate data and regular data accesses are separated because	#
  19600# if an immediate data access fails, the resulting fault status		#
  19601# longword stacked for the access error exception must have the		#
  19602# instruction bit set.							#
  19603#									#
  19604#########################################################################
  19605
  19606	global		_load_fop
  19607_load_fop:
  19608
  19609#  15     13 12 10  9 7  6       0
  19610# /        \ /   \ /  \ /         \
  19611# ---------------------------------
  19612# | opclass | RX  | RY | EXTENSION |  (2nd word of general FP instruction)
  19613# ---------------------------------
  19614#
  19615
  19616#	bfextu		EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
  19617#	cmpi.b		%d0, &0x2		# which class is it? ('000,'010,'011)
  19618#	beq.w		op010			# handle <ea> -> fpn
  19619#	bgt.w		op011			# handle fpn -> <ea>
  19620
  19621# we're not using op011 for now...
  19622	btst		&0x6,EXC_CMDREG(%a6)
  19623	bne.b		op010
  19624
  19625############################
  19626# OPCLASS '000: reg -> reg #
  19627############################
  19628op000:
  19629	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension word lo
  19630	btst		&0x5,%d0		# testing extension bits
  19631	beq.b		op000_src		# (bit 5 == 0) => monadic
  19632	btst		&0x4,%d0		# (bit 5 == 1)
  19633	beq.b		op000_dst		# (bit 4 == 0) => dyadic
  19634	and.w		&0x007f,%d0		# extract extension bits {6:0}
  19635	cmpi.w		%d0,&0x0038		# is it an fcmp (dyadic) ?
  19636	bne.b		op000_src		# it's an fcmp
  19637
  19638op000_dst:
  19639	bfextu		EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
  19640	bsr.l		load_fpn2		# fetch dst fpreg into FP_DST
  19641
  19642	bsr.l		set_tag_x		# get dst optype tag
  19643
  19644	cmpi.b		%d0, &UNNORM		# is dst fpreg an UNNORM?
  19645	beq.b		op000_dst_unnorm	# yes
  19646op000_dst_cont:
  19647	mov.b		%d0, DTAG(%a6)		# store the dst optype tag
  19648
  19649op000_src:
  19650	bfextu		EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field
  19651	bsr.l		load_fpn1		# fetch src fpreg into FP_SRC
  19652
  19653	bsr.l		set_tag_x		# get src optype tag
  19654
  19655	cmpi.b		%d0, &UNNORM		# is src fpreg an UNNORM?
  19656	beq.b		op000_src_unnorm	# yes
  19657op000_src_cont:
  19658	mov.b		%d0, STAG(%a6)		# store the src optype tag
  19659	rts
  19660
  19661op000_dst_unnorm:
  19662	bsr.l		unnorm_fix		# fix the dst UNNORM
  19663	bra.b		op000_dst_cont
  19664op000_src_unnorm:
  19665	bsr.l		unnorm_fix		# fix the src UNNORM
  19666	bra.b		op000_src_cont
  19667
  19668#############################
  19669# OPCLASS '010: <ea> -> reg #
  19670#############################
  19671op010:
  19672	mov.w		EXC_CMDREG(%a6),%d0	# fetch extension word
  19673	btst		&0x5,%d0		# testing extension bits
  19674	beq.b		op010_src		# (bit 5 == 0) => monadic
  19675	btst		&0x4,%d0		# (bit 5 == 1)
  19676	beq.b		op010_dst		# (bit 4 == 0) => dyadic
  19677	and.w		&0x007f,%d0		# extract extension bits {6:0}
  19678	cmpi.w		%d0,&0x0038		# is it an fcmp (dyadic) ?
  19679	bne.b		op010_src		# it's an fcmp
  19680
  19681op010_dst:
  19682	bfextu		EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
  19683	bsr.l		load_fpn2		# fetch dst fpreg ptr
  19684
  19685	bsr.l		set_tag_x		# get dst type tag
  19686
  19687	cmpi.b		%d0, &UNNORM		# is dst fpreg an UNNORM?
  19688	beq.b		op010_dst_unnorm	# yes
  19689op010_dst_cont:
  19690	mov.b		%d0, DTAG(%a6)		# store the dst optype tag
  19691
  19692op010_src:
  19693	bfextu		EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field
  19694
  19695	bfextu		EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field
  19696	bne.w		fetch_from_mem		# src op is in memory
  19697
  19698op010_dreg:
  19699	clr.b		STAG(%a6)		# either NORM or ZERO
  19700	bfextu		EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field
  19701
  19702	mov.w		(tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype
  19703	jmp		(tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg
  19704
  19705op010_dst_unnorm:
  19706	bsr.l		unnorm_fix		# fix the dst UNNORM
  19707	bra.b		op010_dst_cont
  19708
  19709	swbeg		&0x8
  19710tbl_op010_dreg:
  19711	short		opd_long	- tbl_op010_dreg
  19712	short		opd_sgl		- tbl_op010_dreg
  19713	short		tbl_op010_dreg	- tbl_op010_dreg
  19714	short		tbl_op010_dreg	- tbl_op010_dreg
  19715	short		opd_word	- tbl_op010_dreg
  19716	short		tbl_op010_dreg	- tbl_op010_dreg
  19717	short		opd_byte	- tbl_op010_dreg
  19718	short		tbl_op010_dreg	- tbl_op010_dreg
  19719
  19720#
  19721# LONG: can be either NORM or ZERO...
  19722#
  19723opd_long:
  19724	bsr.l		fetch_dreg		# fetch long in d0
  19725	fmov.l		%d0, %fp0		# load a long
  19726	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
  19727	fbeq.w		opd_long_zero		# long is a ZERO
  19728	rts
  19729opd_long_zero:
  19730	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
  19731	rts
  19732
  19733#
  19734# WORD: can be either NORM or ZERO...
  19735#
  19736opd_word:
  19737	bsr.l		fetch_dreg		# fetch word in d0
  19738	fmov.w		%d0, %fp0		# load a word
  19739	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
  19740	fbeq.w		opd_word_zero		# WORD is a ZERO
  19741	rts
  19742opd_word_zero:
  19743	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
  19744	rts
  19745
  19746#
  19747# BYTE: can be either NORM or ZERO...
  19748#
  19749opd_byte:
  19750	bsr.l		fetch_dreg		# fetch word in d0
  19751	fmov.b		%d0, %fp0		# load a byte
  19752	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
  19753	fbeq.w		opd_byte_zero		# byte is a ZERO
  19754	rts
  19755opd_byte_zero:
  19756	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
  19757	rts
  19758
  19759#
  19760# SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
  19761#
  19762# separate SNANs and DENORMs so they can be loaded w/ special care.
  19763# all others can simply be moved "in" using fmove.
  19764#
  19765opd_sgl:
  19766	bsr.l		fetch_dreg		# fetch sgl in d0
  19767	mov.l		%d0,L_SCR1(%a6)
  19768
  19769	lea		L_SCR1(%a6), %a0	# pass: ptr to the sgl
  19770	bsr.l		set_tag_s		# determine sgl type
  19771	mov.b		%d0, STAG(%a6)		# save the src tag
  19772
  19773	cmpi.b		%d0, &SNAN		# is it an SNAN?
  19774	beq.w		get_sgl_snan		# yes
  19775
  19776	cmpi.b		%d0, &DENORM		# is it a DENORM?
  19777	beq.w		get_sgl_denorm		# yes
  19778
  19779	fmov.s		(%a0), %fp0		# no, so can load it regular
  19780	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
  19781	rts
  19782
  19783##############################################################################
  19784
  19785#########################################################################
  19786# fetch_from_mem():							#
  19787# - src is out in memory. must:						#
  19788#	(1) calc ea - must read AFTER you know the src type since	#
  19789#		      if the ea is -() or ()+, need to know # of bytes.	#
  19790#	(2) read it in from either user or supervisor space		#
  19791#	(3) if (b || w || l) then simply read in			#
  19792#	    if (s || d || x) then check for SNAN,UNNORM,DENORM		#
  19793#	    if (packed) then punt for now				#
  19794# INPUT:								#
  19795#	%d0 : src type field						#
  19796#########################################################################
  19797fetch_from_mem:
  19798	clr.b		STAG(%a6)		# either NORM or ZERO
  19799
  19800	mov.w		(tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field
  19801	jmp		(tbl_fp_type.b,%pc,%d0.w*1)
  19802
  19803	swbeg		&0x8
  19804tbl_fp_type:
  19805	short		load_long	- tbl_fp_type
  19806	short		load_sgl	- tbl_fp_type
  19807	short		load_ext	- tbl_fp_type
  19808	short		load_packed	- tbl_fp_type
  19809	short		load_word	- tbl_fp_type
  19810	short		load_dbl	- tbl_fp_type
  19811	short		load_byte	- tbl_fp_type
  19812	short		tbl_fp_type	- tbl_fp_type
  19813
  19814#########################################
  19815# load a LONG into %fp0:		#
  19816#	-number can't fault		#
  19817#	(1) calc ea			#
  19818#	(2) read 4 bytes into L_SCR1	#
  19819#	(3) fmov.l into %fp0		#
  19820#########################################
  19821load_long:
  19822	movq.l		&0x4, %d0		# pass: 4 (bytes)
  19823	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
  19824
  19825	cmpi.b		SPCOND_FLG(%a6),&immed_flg
  19826	beq.b		load_long_immed
  19827
  19828	bsr.l		_dmem_read_long		# fetch src operand from memory
  19829
  19830	tst.l		%d1			# did dfetch fail?
  19831	bne.l		facc_in_l		# yes
  19832
  19833load_long_cont:
  19834	fmov.l		%d0, %fp0		# read into %fp0;convert to xprec
  19835	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
  19836
  19837	fbeq.w		load_long_zero		# src op is a ZERO
  19838	rts
  19839load_long_zero:
  19840	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
  19841	rts
  19842
  19843load_long_immed:
  19844	bsr.l		_imem_read_long		# fetch src operand immed data
  19845
  19846	tst.l		%d1			# did ifetch fail?
  19847	bne.l		funimp_iacc		# yes
  19848	bra.b		load_long_cont
  19849
  19850#########################################
  19851# load a WORD into %fp0:		#
  19852#	-number can't fault		#
  19853#	(1) calc ea			#
  19854#	(2) read 2 bytes into L_SCR1	#
  19855#	(3) fmov.w into %fp0		#
  19856#########################################
  19857load_word:
  19858	movq.l		&0x2, %d0		# pass: 2 (bytes)
  19859	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
  19860
  19861	cmpi.b		SPCOND_FLG(%a6),&immed_flg
  19862	beq.b		load_word_immed
  19863
  19864	bsr.l		_dmem_read_word		# fetch src operand from memory
  19865
  19866	tst.l		%d1			# did dfetch fail?
  19867	bne.l		facc_in_w		# yes
  19868
  19869load_word_cont:
  19870	fmov.w		%d0, %fp0		# read into %fp0;convert to xprec
  19871	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
  19872
  19873	fbeq.w		load_word_zero		# src op is a ZERO
  19874	rts
  19875load_word_zero:
  19876	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
  19877	rts
  19878
  19879load_word_immed:
  19880	bsr.l		_imem_read_word		# fetch src operand immed data
  19881
  19882	tst.l		%d1			# did ifetch fail?
  19883	bne.l		funimp_iacc		# yes
  19884	bra.b		load_word_cont
  19885
  19886#########################################
  19887# load a BYTE into %fp0:		#
  19888#	-number can't fault		#
  19889#	(1) calc ea			#
  19890#	(2) read 1 byte into L_SCR1	#
  19891#	(3) fmov.b into %fp0		#
  19892#########################################
  19893load_byte:
  19894	movq.l		&0x1, %d0		# pass: 1 (byte)
  19895	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
  19896
  19897	cmpi.b		SPCOND_FLG(%a6),&immed_flg
  19898	beq.b		load_byte_immed
  19899
  19900	bsr.l		_dmem_read_byte		# fetch src operand from memory
  19901
  19902	tst.l		%d1			# did dfetch fail?
  19903	bne.l		facc_in_b		# yes
  19904
  19905load_byte_cont:
  19906	fmov.b		%d0, %fp0		# read into %fp0;convert to xprec
  19907	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
  19908
  19909	fbeq.w		load_byte_zero		# src op is a ZERO
  19910	rts
  19911load_byte_zero:
  19912	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
  19913	rts
  19914
  19915load_byte_immed:
  19916	bsr.l		_imem_read_word		# fetch src operand immed data
  19917
  19918	tst.l		%d1			# did ifetch fail?
  19919	bne.l		funimp_iacc		# yes
  19920	bra.b		load_byte_cont
  19921
  19922#########################################
  19923# load a SGL into %fp0:			#
  19924#	-number can't fault		#
  19925#	(1) calc ea			#
  19926#	(2) read 4 bytes into L_SCR1	#
  19927#	(3) fmov.s into %fp0		#
  19928#########################################
  19929load_sgl:
  19930	movq.l		&0x4, %d0		# pass: 4 (bytes)
  19931	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
  19932
  19933	cmpi.b		SPCOND_FLG(%a6),&immed_flg
  19934	beq.b		load_sgl_immed
  19935
  19936	bsr.l		_dmem_read_long		# fetch src operand from memory
  19937	mov.l		%d0, L_SCR1(%a6)	# store src op on stack
  19938
  19939	tst.l		%d1			# did dfetch fail?
  19940	bne.l		facc_in_l		# yes
  19941
  19942load_sgl_cont:
  19943	lea		L_SCR1(%a6), %a0	# pass: ptr to sgl src op
  19944	bsr.l		set_tag_s		# determine src type tag
  19945	mov.b		%d0, STAG(%a6)		# save src optype tag on stack
  19946
  19947	cmpi.b		%d0, &DENORM		# is it a sgl DENORM?
  19948	beq.w		get_sgl_denorm		# yes
  19949
  19950	cmpi.b		%d0, &SNAN		# is it a sgl SNAN?
  19951	beq.w		get_sgl_snan		# yes
  19952
  19953	fmov.s		L_SCR1(%a6), %fp0	# read into %fp0;convert to xprec
  19954	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
  19955	rts
  19956
  19957load_sgl_immed:
  19958	bsr.l		_imem_read_long		# fetch src operand immed data
  19959
  19960	tst.l		%d1			# did ifetch fail?
  19961	bne.l		funimp_iacc		# yes
  19962	bra.b		load_sgl_cont
  19963
  19964# must convert sgl denorm format to an Xprec denorm fmt suitable for
  19965# normalization...
  19966# %a0 : points to sgl denorm
  19967get_sgl_denorm:
  19968	clr.w		FP_SRC_EX(%a6)
  19969	bfextu		(%a0){&9:&23}, %d0	# fetch sgl hi(_mantissa)
  19970	lsl.l		&0x8, %d0
  19971	mov.l		%d0, FP_SRC_HI(%a6)	# set ext hi(_mantissa)
  19972	clr.l		FP_SRC_LO(%a6)		# set ext lo(_mantissa)
  19973
  19974	clr.w		FP_SRC_EX(%a6)
  19975	btst		&0x7, (%a0)		# is sgn bit set?
  19976	beq.b		sgl_dnrm_norm
  19977	bset		&0x7, FP_SRC_EX(%a6)	# set sgn of xprec value
  19978
  19979sgl_dnrm_norm:
  19980	lea		FP_SRC(%a6), %a0
  19981	bsr.l		norm			# normalize number
  19982	mov.w		&0x3f81, %d1		# xprec exp = 0x3f81
  19983	sub.w		%d0, %d1		# exp = 0x3f81 - shft amt.
  19984	or.w		%d1, FP_SRC_EX(%a6)	# {sgn,exp}
  19985
  19986	mov.b		&NORM, STAG(%a6)	# fix src type tag
  19987	rts
  19988
  19989# convert sgl to ext SNAN
  19990# %a0 : points to sgl SNAN
  19991get_sgl_snan:
  19992	mov.w		&0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
  19993	bfextu		(%a0){&9:&23}, %d0
  19994	lsl.l		&0x8, %d0		# extract and insert hi(man)
  19995	mov.l		%d0, FP_SRC_HI(%a6)
  19996	clr.l		FP_SRC_LO(%a6)
  19997
  19998	btst		&0x7, (%a0)		# see if sign of SNAN is set
  19999	beq.b		no_sgl_snan_sgn
  20000	bset		&0x7, FP_SRC_EX(%a6)
  20001no_sgl_snan_sgn:
  20002	rts
  20003
  20004#########################################
  20005# load a DBL into %fp0:			#
  20006#	-number can't fault		#
  20007#	(1) calc ea			#
  20008#	(2) read 8 bytes into L_SCR(1,2)#
  20009#	(3) fmov.d into %fp0		#
  20010#########################################
  20011load_dbl:
  20012	movq.l		&0x8, %d0		# pass: 8 (bytes)
  20013	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
  20014
  20015	cmpi.b		SPCOND_FLG(%a6),&immed_flg
  20016	beq.b		load_dbl_immed
  20017
  20018	lea		L_SCR1(%a6), %a1	# pass: ptr to input dbl tmp space
  20019	movq.l		&0x8, %d0		# pass: # bytes to read
  20020	bsr.l		_dmem_read		# fetch src operand from memory
  20021
  20022	tst.l		%d1			# did dfetch fail?
  20023	bne.l		facc_in_d		# yes
  20024
  20025load_dbl_cont:
  20026	lea		L_SCR1(%a6), %a0	# pass: ptr to input dbl
  20027	bsr.l		set_tag_d		# determine src type tag
  20028	mov.b		%d0, STAG(%a6)		# set src optype tag
  20029
  20030	cmpi.b		%d0, &DENORM		# is it a dbl DENORM?
  20031	beq.w		get_dbl_denorm		# yes
  20032
  20033	cmpi.b		%d0, &SNAN		# is it a dbl SNAN?
  20034	beq.w		get_dbl_snan		# yes
  20035
  20036	fmov.d		L_SCR1(%a6), %fp0	# read into %fp0;convert to xprec
  20037	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
  20038	rts
  20039
  20040load_dbl_immed:
  20041	lea		L_SCR1(%a6), %a1	# pass: ptr to input dbl tmp space
  20042	movq.l		&0x8, %d0		# pass: # bytes to read
  20043	bsr.l		_imem_read		# fetch src operand from memory
  20044
  20045	tst.l		%d1			# did ifetch fail?
  20046	bne.l		funimp_iacc		# yes
  20047	bra.b		load_dbl_cont
  20048
  20049# must convert dbl denorm format to an Xprec denorm fmt suitable for
  20050# normalization...
  20051# %a0 : loc. of dbl denorm
  20052get_dbl_denorm:
  20053	clr.w		FP_SRC_EX(%a6)
  20054	bfextu		(%a0){&12:&31}, %d0	# fetch hi(_mantissa)
  20055	mov.l		%d0, FP_SRC_HI(%a6)
  20056	bfextu		4(%a0){&11:&21}, %d0	# fetch lo(_mantissa)
  20057	mov.l		&0xb, %d1
  20058	lsl.l		%d1, %d0
  20059	mov.l		%d0, FP_SRC_LO(%a6)
  20060
  20061	btst		&0x7, (%a0)		# is sgn bit set?
  20062	beq.b		dbl_dnrm_norm
  20063	bset		&0x7, FP_SRC_EX(%a6)	# set sgn of xprec value
  20064
  20065dbl_dnrm_norm:
  20066	lea		FP_SRC(%a6), %a0
  20067	bsr.l		norm			# normalize number
  20068	mov.w		&0x3c01, %d1		# xprec exp = 0x3c01
  20069	sub.w		%d0, %d1		# exp = 0x3c01 - shft amt.
  20070	or.w		%d1, FP_SRC_EX(%a6)	# {sgn,exp}
  20071
  20072	mov.b		&NORM, STAG(%a6)	# fix src type tag
  20073	rts
  20074
  20075# convert dbl to ext SNAN
  20076# %a0 : points to dbl SNAN
  20077get_dbl_snan:
  20078	mov.w		&0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
  20079
  20080	bfextu		(%a0){&12:&31}, %d0	# fetch hi(_mantissa)
  20081	mov.l		%d0, FP_SRC_HI(%a6)
  20082	bfextu		4(%a0){&11:&21}, %d0	# fetch lo(_mantissa)
  20083	mov.l		&0xb, %d1
  20084	lsl.l		%d1, %d0
  20085	mov.l		%d0, FP_SRC_LO(%a6)
  20086
  20087	btst		&0x7, (%a0)		# see if sign of SNAN is set
  20088	beq.b		no_dbl_snan_sgn
  20089	bset		&0x7, FP_SRC_EX(%a6)
  20090no_dbl_snan_sgn:
  20091	rts
  20092
  20093#################################################
  20094# load a Xprec into %fp0:			#
  20095#	-number can't fault			#
  20096#	(1) calc ea				#
  20097#	(2) read 12 bytes into L_SCR(1,2)	#
  20098#	(3) fmov.x into %fp0			#
  20099#################################################
  20100load_ext:
  20101	mov.l		&0xc, %d0		# pass: 12 (bytes)
  20102	bsr.l		_dcalc_ea		# calc <ea>
  20103
  20104	lea		FP_SRC(%a6), %a1	# pass: ptr to input ext tmp space
  20105	mov.l		&0xc, %d0		# pass: # of bytes to read
  20106	bsr.l		_dmem_read		# fetch src operand from memory
  20107
  20108	tst.l		%d1			# did dfetch fail?
  20109	bne.l		facc_in_x		# yes
  20110
  20111	lea		FP_SRC(%a6), %a0	# pass: ptr to src op
  20112	bsr.l		set_tag_x		# determine src type tag
  20113
  20114	cmpi.b		%d0, &UNNORM		# is the src op an UNNORM?
  20115	beq.b		load_ext_unnorm		# yes
  20116
  20117	mov.b		%d0, STAG(%a6)		# store the src optype tag
  20118	rts
  20119
  20120load_ext_unnorm:
  20121	bsr.l		unnorm_fix		# fix the src UNNORM
  20122	mov.b		%d0, STAG(%a6)		# store the src optype tag
  20123	rts
  20124
  20125#################################################
  20126# load a packed into %fp0:			#
  20127#	-number can't fault			#
  20128#	(1) calc ea				#
  20129#	(2) read 12 bytes into L_SCR(1,2,3)	#
  20130#	(3) fmov.x into %fp0			#
  20131#################################################
  20132load_packed:
  20133	bsr.l		get_packed
  20134
  20135	lea		FP_SRC(%a6),%a0		# pass ptr to src op
  20136	bsr.l		set_tag_x		# determine src type tag
  20137	cmpi.b		%d0,&UNNORM		# is the src op an UNNORM ZERO?
  20138	beq.b		load_packed_unnorm	# yes
  20139
  20140	mov.b		%d0,STAG(%a6)		# store the src optype tag
  20141	rts
  20142
  20143load_packed_unnorm:
  20144	bsr.l		unnorm_fix		# fix the UNNORM ZERO
  20145	mov.b		%d0,STAG(%a6)		# store the src optype tag
  20146	rts
  20147
  20148#########################################################################
  20149# XDEF ****************************************************************	#
  20150#	fout(): move from fp register to memory or data register	#
  20151#									#
  20152# XREF ****************************************************************	#
  20153#	_round() - needed to create EXOP for sgl/dbl precision		#
  20154#	norm() - needed to create EXOP for extended precision		#
  20155#	ovf_res() - create default overflow result for sgl/dbl precision#
  20156#	unf_res() - create default underflow result for sgl/dbl prec.	#
  20157#	dst_dbl() - create rounded dbl precision result.		#
  20158#	dst_sgl() - create rounded sgl precision result.		#
  20159#	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
  20160#	bindec() - convert FP binary number to packed number.		#
  20161#	_mem_write() - write data to memory.				#
  20162#	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
  20163#	_dmem_write_{byte,word,long}() - write data to memory.		#
  20164#	store_dreg_{b,w,l}() - store data to data register file.	#
  20165#	facc_out_{b,w,l,d,x}() - data access error occurred.		#
  20166#									#
  20167# INPUT ***************************************************************	#
  20168#	a0 = pointer to extended precision source operand		#
  20169#	d0 = round prec,mode						#
  20170#									#
  20171# OUTPUT **************************************************************	#
  20172#	fp0 : intermediate underflow or overflow result if		#
  20173#	      OVFL/UNFL occurred for a sgl or dbl operand		#
  20174#									#
  20175# ALGORITHM ***********************************************************	#
  20176#	This routine is accessed by many handlers that need to do an	#
  20177# opclass three move of an operand out to memory.			#
  20178#	Decode an fmove out (opclass 3) instruction to determine if	#
  20179# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
  20180# register or memory. The algorithm uses a standard "fmove" to create	#
  20181# the rounded result. Also, since exceptions are disabled, this also	#
  20182# create the correct OPERR default result if appropriate.		#
  20183#	For sgl or dbl precision, overflow or underflow can occur. If	#
  20184# either occurs and is enabled, the EXOP.				#
  20185#	For extended precision, the stacked <ea> must be fixed along	#
  20186# w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
  20187# the source is a denorm and if underflow is enabled, an EXOP must be	#
  20188# created.								#
  20189#	For packed, the k-factor must be fetched from the instruction	#
  20190# word or a data register. The <ea> must be fixed as w/ extended	#
  20191# precision. Then, bindec() is called to create the appropriate		#
  20192# packed result.							#
  20193#	If at any time an access error is flagged by one of the move-	#
  20194# to-memory routines, then a special exit must be made so that the	#
  20195# access error can be handled properly.					#
  20196#									#
  20197#########################################################################
  20198
  20199	global		fout
  20200fout:
  20201	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
  20202	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
  20203	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
  20204
  20205	swbeg		&0x8
  20206tbl_fout:
  20207	short		fout_long	-	tbl_fout
  20208	short		fout_sgl	-	tbl_fout
  20209	short		fout_ext	-	tbl_fout
  20210	short		fout_pack	-	tbl_fout
  20211	short		fout_word	-	tbl_fout
  20212	short		fout_dbl	-	tbl_fout
  20213	short		fout_byte	-	tbl_fout
  20214	short		fout_pack	-	tbl_fout
  20215
  20216#################################################################
  20217# fmove.b out ###################################################
  20218#################################################################
  20219
  20220# Only "Unimplemented Data Type" exceptions enter here. The operand
  20221# is either a DENORM or a NORM.
  20222fout_byte:
  20223	tst.b		STAG(%a6)		# is operand normalized?
  20224	bne.b		fout_byte_denorm	# no
  20225
  20226	fmovm.x		SRC(%a0),&0x80		# load value
  20227
  20228fout_byte_norm:
  20229	fmov.l		%d0,%fpcr		# insert rnd prec,mode
  20230
  20231	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
  20232
  20233	fmov.l		&0x0,%fpcr		# clear FPCR
  20234	fmov.l		%fpsr,%d1		# fetch FPSR
  20235	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
  20236
  20237	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
  20238	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
  20239	beq.b		fout_byte_dn		# must save to integer regfile
  20240
  20241	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
  20242	bsr.l		_dmem_write_byte	# write byte
  20243
  20244	tst.l		%d1			# did dstore fail?
  20245	bne.l		facc_out_b		# yes
  20246
  20247	rts
  20248
  20249fout_byte_dn:
  20250	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
  20251	andi.w		&0x7,%d1
  20252	bsr.l		store_dreg_b
  20253	rts
  20254
  20255fout_byte_denorm:
  20256	mov.l		SRC_EX(%a0),%d1
  20257	andi.l		&0x80000000,%d1		# keep DENORM sign
  20258	ori.l		&0x00800000,%d1		# make smallest sgl
  20259	fmov.s		%d1,%fp0
  20260	bra.b		fout_byte_norm
  20261
  20262#################################################################
  20263# fmove.w out ###################################################
  20264#################################################################
  20265
  20266# Only "Unimplemented Data Type" exceptions enter here. The operand
  20267# is either a DENORM or a NORM.
  20268fout_word:
  20269	tst.b		STAG(%a6)		# is operand normalized?
  20270	bne.b		fout_word_denorm	# no
  20271
  20272	fmovm.x		SRC(%a0),&0x80		# load value
  20273
  20274fout_word_norm:
  20275	fmov.l		%d0,%fpcr		# insert rnd prec:mode
  20276
  20277	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
  20278
  20279	fmov.l		&0x0,%fpcr		# clear FPCR
  20280	fmov.l		%fpsr,%d1		# fetch FPSR
  20281	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
  20282
  20283	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
  20284	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
  20285	beq.b		fout_word_dn		# must save to integer regfile
  20286
  20287	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
  20288	bsr.l		_dmem_write_word	# write word
  20289
  20290	tst.l		%d1			# did dstore fail?
  20291	bne.l		facc_out_w		# yes
  20292
  20293	rts
  20294
  20295fout_word_dn:
  20296	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
  20297	andi.w		&0x7,%d1
  20298	bsr.l		store_dreg_w
  20299	rts
  20300
  20301fout_word_denorm:
  20302	mov.l		SRC_EX(%a0),%d1
  20303	andi.l		&0x80000000,%d1		# keep DENORM sign
  20304	ori.l		&0x00800000,%d1		# make smallest sgl
  20305	fmov.s		%d1,%fp0
  20306	bra.b		fout_word_norm
  20307
  20308#################################################################
  20309# fmove.l out ###################################################
  20310#################################################################
  20311
  20312# Only "Unimplemented Data Type" exceptions enter here. The operand
  20313# is either a DENORM or a NORM.
  20314fout_long:
  20315	tst.b		STAG(%a6)		# is operand normalized?
  20316	bne.b		fout_long_denorm	# no
  20317
  20318	fmovm.x		SRC(%a0),&0x80		# load value
  20319
  20320fout_long_norm:
  20321	fmov.l		%d0,%fpcr		# insert rnd prec:mode
  20322
  20323	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
  20324
  20325	fmov.l		&0x0,%fpcr		# clear FPCR
  20326	fmov.l		%fpsr,%d1		# fetch FPSR
  20327	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
  20328
  20329fout_long_write:
  20330	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
  20331	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
  20332	beq.b		fout_long_dn		# must save to integer regfile
  20333
  20334	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
  20335	bsr.l		_dmem_write_long	# write long
  20336
  20337	tst.l		%d1			# did dstore fail?
  20338	bne.l		facc_out_l		# yes
  20339
  20340	rts
  20341
  20342fout_long_dn:
  20343	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
  20344	andi.w		&0x7,%d1
  20345	bsr.l		store_dreg_l
  20346	rts
  20347
  20348fout_long_denorm:
  20349	mov.l		SRC_EX(%a0),%d1
  20350	andi.l		&0x80000000,%d1		# keep DENORM sign
  20351	ori.l		&0x00800000,%d1		# make smallest sgl
  20352	fmov.s		%d1,%fp0
  20353	bra.b		fout_long_norm
  20354
  20355#################################################################
  20356# fmove.x out ###################################################
  20357#################################################################
  20358
  20359# Only "Unimplemented Data Type" exceptions enter here. The operand
  20360# is either a DENORM or a NORM.
  20361# The DENORM causes an Underflow exception.
  20362fout_ext:
  20363
  20364# we copy the extended precision result to FP_SCR0 so that the reserved
  20365# 16-bit field gets zeroed. we do this since we promise not to disturb
  20366# what's at SRC(a0).
  20367	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  20368	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
  20369	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  20370	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  20371
  20372	fmovm.x		SRC(%a0),&0x80		# return result
  20373
  20374	bsr.l		_calc_ea_fout		# fix stacked <ea>
  20375
  20376	mov.l		%a0,%a1			# pass: dst addr
  20377	lea		FP_SCR0(%a6),%a0	# pass: src addr
  20378	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
  20379
  20380# we must not yet write the extended precision data to the stack
  20381# in the pre-decrement case from supervisor mode or else we'll corrupt
  20382# the stack frame. so, leave it in FP_SRC for now and deal with it later...
  20383	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
  20384	beq.b		fout_ext_a7
  20385
  20386	bsr.l		_dmem_write		# write ext prec number to memory
  20387
  20388	tst.l		%d1			# did dstore fail?
  20389	bne.w		fout_ext_err		# yes
  20390
  20391	tst.b		STAG(%a6)		# is operand normalized?
  20392	bne.b		fout_ext_denorm		# no
  20393	rts
  20394
  20395# the number is a DENORM. must set the underflow exception bit
  20396fout_ext_denorm:
  20397	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
  20398
  20399	mov.b		FPCR_ENABLE(%a6),%d0
  20400	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
  20401	bne.b		fout_ext_exc		# yes
  20402	rts
  20403
  20404# we don't want to do the write if the exception occurred in supervisor mode
  20405# so _mem_write2() handles this for us.
  20406fout_ext_a7:
  20407	bsr.l		_mem_write2		# write ext prec number to memory
  20408
  20409	tst.l		%d1			# did dstore fail?
  20410	bne.w		fout_ext_err		# yes
  20411
  20412	tst.b		STAG(%a6)		# is operand normalized?
  20413	bne.b		fout_ext_denorm		# no
  20414	rts
  20415
  20416fout_ext_exc:
  20417	lea		FP_SCR0(%a6),%a0
  20418	bsr.l		norm			# normalize the mantissa
  20419	neg.w		%d0			# new exp = -(shft amt)
  20420	andi.w		&0x7fff,%d0
  20421	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
  20422	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
  20423	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  20424	rts
  20425
  20426fout_ext_err:
  20427	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
  20428	bra.l		facc_out_x
  20429
  20430#########################################################################
  20431# fmove.s out ###########################################################
  20432#########################################################################
  20433fout_sgl:
  20434	andi.b		&0x30,%d0		# clear rnd prec
  20435	ori.b		&s_mode*0x10,%d0	# insert sgl prec
  20436	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
  20437
  20438#
  20439# operand is a normalized number. first, we check to see if the move out
  20440# would cause either an underflow or overflow. these cases are handled
  20441# separately. otherwise, set the FPCR to the proper rounding mode and
  20442# execute the move.
  20443#
  20444	mov.w		SRC_EX(%a0),%d0		# extract exponent
  20445	andi.w		&0x7fff,%d0		# strip sign
  20446
  20447	cmpi.w		%d0,&SGL_HI		# will operand overflow?
  20448	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
  20449	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
  20450	cmpi.w		%d0,&SGL_LO		# will operand underflow?
  20451	blt.w		fout_sgl_unfl		# yes; go handle underflow
  20452
  20453#
  20454# NORMs(in range) can be stored out by a simple "fmov.s"
  20455# Unnormalized inputs can come through this point.
  20456#
  20457fout_sgl_exg:
  20458	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
  20459
  20460	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  20461	fmov.l		&0x0,%fpsr		# clear FPSR
  20462
  20463	fmov.s		%fp0,%d0		# store does convert and round
  20464
  20465	fmov.l		&0x0,%fpcr		# clear FPCR
  20466	fmov.l		%fpsr,%d1		# save FPSR
  20467
  20468	or.w		%d1,2+USER_FPSR(%a6)	# set possible inex2/ainex
  20469
  20470fout_sgl_exg_write:
  20471	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
  20472	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
  20473	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
  20474
  20475	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
  20476	bsr.l		_dmem_write_long	# write long
  20477
  20478	tst.l		%d1			# did dstore fail?
  20479	bne.l		facc_out_l		# yes
  20480
  20481	rts
  20482
  20483fout_sgl_exg_write_dn:
  20484	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
  20485	andi.w		&0x7,%d1
  20486	bsr.l		store_dreg_l
  20487	rts
  20488
  20489#
  20490# here, we know that the operand would UNFL if moved out to single prec,
  20491# so, denorm and round and then use generic store single routine to
  20492# write the value to memory.
  20493#
  20494fout_sgl_unfl:
  20495	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
  20496
  20497	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  20498	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  20499	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  20500	mov.l		%a0,-(%sp)
  20501
  20502	clr.l		%d0			# pass: S.F. = 0
  20503
  20504	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
  20505	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
  20506
  20507	lea		FP_SCR0(%a6),%a0
  20508	bsr.l		norm			# normalize the DENORM
  20509
  20510fout_sgl_unfl_cont:
  20511	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
  20512	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  20513	bsr.l		unf_res			# calc default underflow result
  20514
  20515	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
  20516	bsr.l		dst_sgl			# convert to single prec
  20517
  20518	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
  20519	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
  20520	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
  20521
  20522	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
  20523	bsr.l		_dmem_write_long	# write long
  20524
  20525	tst.l		%d1			# did dstore fail?
  20526	bne.l		facc_out_l		# yes
  20527
  20528	bra.b		fout_sgl_unfl_chkexc
  20529
  20530fout_sgl_unfl_dn:
  20531	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
  20532	andi.w		&0x7,%d1
  20533	bsr.l		store_dreg_l
  20534
  20535fout_sgl_unfl_chkexc:
  20536	mov.b		FPCR_ENABLE(%a6),%d1
  20537	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
  20538	bne.w		fout_sd_exc_unfl	# yes
  20539	addq.l		&0x4,%sp
  20540	rts
  20541
  20542#
  20543# it's definitely an overflow so call ovf_res to get the correct answer
  20544#
  20545fout_sgl_ovfl:
  20546	tst.b		3+SRC_HI(%a0)		# is result inexact?
  20547	bne.b		fout_sgl_ovfl_inex2
  20548	tst.l		SRC_LO(%a0)		# is result inexact?
  20549	bne.b		fout_sgl_ovfl_inex2
  20550	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
  20551	bra.b		fout_sgl_ovfl_cont
  20552fout_sgl_ovfl_inex2:
  20553	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
  20554
  20555fout_sgl_ovfl_cont:
  20556	mov.l		%a0,-(%sp)
  20557
  20558# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
  20559# overflow result. DON'T save the returned ccodes from ovf_res() since
  20560# fmove out doesn't alter them.
  20561	tst.b		SRC_EX(%a0)		# is operand negative?
  20562	smi		%d1			# set if so
  20563	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
  20564	bsr.l		ovf_res			# calc OVFL result
  20565	fmovm.x		(%a0),&0x80		# load default overflow result
  20566	fmov.s		%fp0,%d0		# store to single
  20567
  20568	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
  20569	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
  20570	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
  20571
  20572	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
  20573	bsr.l		_dmem_write_long	# write long
  20574
  20575	tst.l		%d1			# did dstore fail?
  20576	bne.l		facc_out_l		# yes
  20577
  20578	bra.b		fout_sgl_ovfl_chkexc
  20579
  20580fout_sgl_ovfl_dn:
  20581	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
  20582	andi.w		&0x7,%d1
  20583	bsr.l		store_dreg_l
  20584
  20585fout_sgl_ovfl_chkexc:
  20586	mov.b		FPCR_ENABLE(%a6),%d1
  20587	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
  20588	bne.w		fout_sd_exc_ovfl	# yes
  20589	addq.l		&0x4,%sp
  20590	rts
  20591
  20592#
  20593# move out MAY overflow:
  20594# (1) force the exp to 0x3fff
  20595# (2) do a move w/ appropriate rnd mode
  20596# (3) if exp still equals zero, then insert original exponent
  20597#	for the correct result.
  20598#     if exp now equals one, then it overflowed so call ovf_res.
  20599#
  20600fout_sgl_may_ovfl:
  20601	mov.w		SRC_EX(%a0),%d1		# fetch current sign
  20602	andi.w		&0x8000,%d1		# keep it,clear exp
  20603	ori.w		&0x3fff,%d1		# insert exp = 0
  20604	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
  20605	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
  20606	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
  20607
  20608	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  20609
  20610	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
  20611	fmov.l		&0x0,%fpcr		# clear FPCR
  20612
  20613	fabs.x		%fp0			# need absolute value
  20614	fcmp.b		%fp0,&0x2		# did exponent increase?
  20615	fblt.w		fout_sgl_exg		# no; go finish NORM
  20616	bra.w		fout_sgl_ovfl		# yes; go handle overflow
  20617
  20618################
  20619
  20620fout_sd_exc_unfl:
  20621	mov.l		(%sp)+,%a0
  20622
  20623	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  20624	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  20625	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  20626
  20627	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
  20628	bne.b		fout_sd_exc_cont	# no
  20629
  20630	lea		FP_SCR0(%a6),%a0
  20631	bsr.l		norm
  20632	neg.l		%d0
  20633	andi.w		&0x7fff,%d0
  20634	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
  20635	bra.b		fout_sd_exc_cont
  20636
  20637fout_sd_exc:
  20638fout_sd_exc_ovfl:
  20639	mov.l		(%sp)+,%a0		# restore a0
  20640
  20641	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  20642	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  20643	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  20644
  20645fout_sd_exc_cont:
  20646	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
  20647	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
  20648	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
  20649
  20650	mov.b		3+L_SCR3(%a6),%d1
  20651	lsr.b		&0x4,%d1
  20652	andi.w		&0x0c,%d1
  20653	swap		%d1
  20654	mov.b		3+L_SCR3(%a6),%d1
  20655	lsr.b		&0x4,%d1
  20656	andi.w		&0x03,%d1
  20657	clr.l		%d0			# pass: zero g,r,s
  20658	bsr.l		_round			# round the DENORM
  20659
  20660	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
  20661	beq.b		fout_sd_exc_done	# no
  20662	bset		&0x7,FP_SCR0_EX(%a6)	# yes
  20663
  20664fout_sd_exc_done:
  20665	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
  20666	rts
  20667
  20668#################################################################
  20669# fmove.d out ###################################################
  20670#################################################################
  20671fout_dbl:
  20672	andi.b		&0x30,%d0		# clear rnd prec
  20673	ori.b		&d_mode*0x10,%d0	# insert dbl prec
  20674	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
  20675
  20676#
  20677# operand is a normalized number. first, we check to see if the move out
  20678# would cause either an underflow or overflow. these cases are handled
  20679# separately. otherwise, set the FPCR to the proper rounding mode and
  20680# execute the move.
  20681#
  20682	mov.w		SRC_EX(%a0),%d0		# extract exponent
  20683	andi.w		&0x7fff,%d0		# strip sign
  20684
  20685	cmpi.w		%d0,&DBL_HI		# will operand overflow?
  20686	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
  20687	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
  20688	cmpi.w		%d0,&DBL_LO		# will operand underflow?
  20689	blt.w		fout_dbl_unfl		# yes; go handle underflow
  20690
  20691#
  20692# NORMs(in range) can be stored out by a simple "fmov.d"
  20693# Unnormalized inputs can come through this point.
  20694#
  20695fout_dbl_exg:
  20696	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
  20697
  20698	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  20699	fmov.l		&0x0,%fpsr		# clear FPSR
  20700
  20701	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
  20702
  20703	fmov.l		&0x0,%fpcr		# clear FPCR
  20704	fmov.l		%fpsr,%d0		# save FPSR
  20705
  20706	or.w		%d0,2+USER_FPSR(%a6)	# set possible inex2/ainex
  20707
  20708	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
  20709	lea		L_SCR1(%a6),%a0		# pass: src addr
  20710	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
  20711	bsr.l		_dmem_write		# store dbl fop to memory
  20712
  20713	tst.l		%d1			# did dstore fail?
  20714	bne.l		facc_out_d		# yes
  20715
  20716	rts					# no; so we're finished
  20717
  20718#
  20719# here, we know that the operand would UNFL if moved out to double prec,
  20720# so, denorm and round and then use generic store double routine to
  20721# write the value to memory.
  20722#
  20723fout_dbl_unfl:
  20724	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
  20725
  20726	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
  20727	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
  20728	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
  20729	mov.l		%a0,-(%sp)
  20730
  20731	clr.l		%d0			# pass: S.F. = 0
  20732
  20733	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
  20734	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
  20735
  20736	lea		FP_SCR0(%a6),%a0
  20737	bsr.l		norm			# normalize the DENORM
  20738
  20739fout_dbl_unfl_cont:
  20740	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
  20741	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
  20742	bsr.l		unf_res			# calc default underflow result
  20743
  20744	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
  20745	bsr.l		dst_dbl			# convert to single prec
  20746	mov.l		%d0,L_SCR1(%a6)
  20747	mov.l		%d1,L_SCR2(%a6)
  20748
  20749	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
  20750	lea		L_SCR1(%a6),%a0		# pass: src addr
  20751	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
  20752	bsr.l		_dmem_write		# store dbl fop to memory
  20753
  20754	tst.l		%d1			# did dstore fail?
  20755	bne.l		facc_out_d		# yes
  20756
  20757	mov.b		FPCR_ENABLE(%a6),%d1
  20758	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
  20759	bne.w		fout_sd_exc_unfl	# yes
  20760	addq.l		&0x4,%sp
  20761	rts
  20762
  20763#
  20764# it's definitely an overflow so call ovf_res to get the correct answer
  20765#
  20766fout_dbl_ovfl:
  20767	mov.w		2+SRC_LO(%a0),%d0
  20768	andi.w		&0x7ff,%d0
  20769	bne.b		fout_dbl_ovfl_inex2
  20770
  20771	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
  20772	bra.b		fout_dbl_ovfl_cont
  20773fout_dbl_ovfl_inex2:
  20774	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
  20775
  20776fout_dbl_ovfl_cont:
  20777	mov.l		%a0,-(%sp)
  20778
  20779# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
  20780# overflow result. DON'T save the returned ccodes from ovf_res() since
  20781# fmove out doesn't alter them.
  20782	tst.b		SRC_EX(%a0)		# is operand negative?
  20783	smi		%d1			# set if so
  20784	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
  20785	bsr.l		ovf_res			# calc OVFL result
  20786	fmovm.x		(%a0),&0x80		# load default overflow result
  20787	fmov.d		%fp0,L_SCR1(%a6)	# store to double
  20788
  20789	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
  20790	lea		L_SCR1(%a6),%a0		# pass: src addr
  20791	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
  20792	bsr.l		_dmem_write		# store dbl fop to memory
  20793
  20794	tst.l		%d1			# did dstore fail?
  20795	bne.l		facc_out_d		# yes
  20796
  20797	mov.b		FPCR_ENABLE(%a6),%d1
  20798	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
  20799	bne.w		fout_sd_exc_ovfl	# yes
  20800	addq.l		&0x4,%sp
  20801	rts
  20802
  20803#
  20804# move out MAY overflow:
  20805# (1) force the exp to 0x3fff
  20806# (2) do a move w/ appropriate rnd mode
  20807# (3) if exp still equals zero, then insert original exponent
  20808#	for the correct result.
  20809#     if exp now equals one, then it overflowed so call ovf_res.
  20810#
  20811fout_dbl_may_ovfl:
  20812	mov.w		SRC_EX(%a0),%d1		# fetch current sign
  20813	andi.w		&0x8000,%d1		# keep it,clear exp
  20814	ori.w		&0x3fff,%d1		# insert exp = 0
  20815	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
  20816	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
  20817	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
  20818
  20819	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
  20820
  20821	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
  20822	fmov.l		&0x0,%fpcr		# clear FPCR
  20823
  20824	fabs.x		%fp0			# need absolute value
  20825	fcmp.b		%fp0,&0x2		# did exponent increase?
  20826	fblt.w		fout_dbl_exg		# no; go finish NORM
  20827	bra.w		fout_dbl_ovfl		# yes; go handle overflow
  20828
  20829#########################################################################
  20830# XDEF ****************************************************************	#
  20831#	dst_dbl(): create double precision value from extended prec.	#
  20832#									#
  20833# XREF ****************************************************************	#
  20834#	None								#
  20835#									#
  20836# INPUT ***************************************************************	#
  20837#	a0 = pointer to source operand in extended precision		#
  20838#									#
  20839# OUTPUT **************************************************************	#
  20840#	d0 = hi(double precision result)				#
  20841#	d1 = lo(double precision result)				#
  20842#									#
  20843# ALGORITHM ***********************************************************	#
  20844#									#
  20845#  Changes extended precision to double precision.			#
  20846#  Note: no attempt is made to round the extended value to double.	#
  20847#	dbl_sign = ext_sign						#
  20848#	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
  20849#	get rid of ext integer bit					#
  20850#	dbl_mant = ext_mant{62:12}					#
  20851#									#
  20852#		---------------   ---------------    ---------------	#
  20853#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
  20854#		---------------   ---------------    ---------------	#
  20855#		 95	    64    63 62	      32      31     11	  0	#
  20856#				     |			     |		#
  20857#				     |			     |		#
  20858#				     |			     |		#
  20859#			             v			     v		#
  20860#			      ---------------   ---------------		#
  20861#  double   ->		      |s|exp| mant  |   |  mant       |		#
  20862#			      ---------------   ---------------		#
  20863#			      63     51   32   31	       0	#
  20864#									#
  20865#########################################################################
  20866
  20867dst_dbl:
  20868	clr.l		%d0			# clear d0
  20869	mov.w		FTEMP_EX(%a0),%d0	# get exponent
  20870	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
  20871	addi.w		&DBL_BIAS,%d0		# add double precision bias
  20872	tst.b		FTEMP_HI(%a0)		# is number a denorm?
  20873	bmi.b		dst_get_dupper		# no
  20874	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
  20875dst_get_dupper:
  20876	swap		%d0			# d0 now in upper word
  20877	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
  20878	tst.b		FTEMP_EX(%a0)		# test sign
  20879	bpl.b		dst_get_dman		# if positive, go process mantissa
  20880	bset		&0x1f,%d0		# if negative, set sign
  20881dst_get_dman:
  20882	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
  20883	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
  20884	or.l		%d1,%d0			# put these bits in ms word of double
  20885	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
  20886	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
  20887	mov.l		&21,%d0			# load shift count
  20888	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
  20889	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
  20890	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
  20891	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
  20892	mov.l		L_SCR2(%a6),%d1
  20893	or.l		%d0,%d1			# put them in double result
  20894	mov.l		L_SCR1(%a6),%d0
  20895	rts
  20896
  20897#########################################################################
  20898# XDEF ****************************************************************	#
  20899#	dst_sgl(): create single precision value from extended prec	#
  20900#									#
  20901# XREF ****************************************************************	#
  20902#									#
  20903# INPUT ***************************************************************	#
  20904#	a0 = pointer to source operand in extended precision		#
  20905#									#
  20906# OUTPUT **************************************************************	#
  20907#	d0 = single precision result					#
  20908#									#
  20909# ALGORITHM ***********************************************************	#
  20910#									#
  20911# Changes extended precision to single precision.			#
  20912#	sgl_sign = ext_sign						#
  20913#	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
  20914#	get rid of ext integer bit					#
  20915#	sgl_mant = ext_mant{62:12}					#
  20916#									#
  20917#		---------------   ---------------    ---------------	#
  20918#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
  20919#		---------------   ---------------    ---------------	#
  20920#		 95	    64    63 62	   40 32      31     12	  0	#
  20921#				     |	   |				#
  20922#				     |	   |				#
  20923#				     |	   |				#
  20924#			             v     v				#
  20925#			      ---------------				#
  20926#  single   ->		      |s|exp| mant  |				#
  20927#			      ---------------				#
  20928#			      31     22     0				#
  20929#									#
  20930#########################################################################
  20931
  20932dst_sgl:
  20933	clr.l		%d0
  20934	mov.w		FTEMP_EX(%a0),%d0	# get exponent
  20935	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
  20936	addi.w		&SGL_BIAS,%d0		# add single precision bias
  20937	tst.b		FTEMP_HI(%a0)		# is number a denorm?
  20938	bmi.b		dst_get_supper		# no
  20939	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
  20940dst_get_supper:
  20941	swap		%d0			# put exp in upper word of d0
  20942	lsl.l		&0x7,%d0		# shift it into single exp bits
  20943	tst.b		FTEMP_EX(%a0)		# test sign
  20944	bpl.b		dst_get_sman		# if positive, continue
  20945	bset		&0x1f,%d0		# if negative, put in sign first
  20946dst_get_sman:
  20947	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
  20948	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
  20949	lsr.l		&0x8,%d1		# and put them flush right
  20950	or.l		%d1,%d0			# put these bits in ms word of single
  20951	rts
  20952
  20953##############################################################################
  20954fout_pack:
  20955	bsr.l		_calc_ea_fout		# fetch the <ea>
  20956	mov.l		%a0,-(%sp)
  20957
  20958	mov.b		STAG(%a6),%d0		# fetch input type
  20959	bne.w		fout_pack_not_norm	# input is not NORM
  20960
  20961fout_pack_norm:
  20962	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
  20963	beq.b		fout_pack_s		# static
  20964
  20965fout_pack_d:
  20966	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
  20967	lsr.b		&0x4,%d1
  20968	andi.w		&0x7,%d1
  20969
  20970	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
  20971
  20972	bra.b		fout_pack_type
  20973fout_pack_s:
  20974	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
  20975
  20976fout_pack_type:
  20977	bfexts		%d0{&25:&7},%d0		# extract k-factor
  20978	mov.l	%d0,-(%sp)
  20979
  20980	lea		FP_SRC(%a6),%a0		# pass: ptr to input
  20981
  20982# bindec is currently scrambling FP_SRC for denorm inputs.
  20983# we'll have to change this, but for now, tough luck!!!
  20984	bsr.l		bindec			# convert xprec to packed
  20985
  20986#	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
  20987	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
  20988
  20989	mov.l	(%sp)+,%d0
  20990
  20991	tst.b		3+FP_SCR0_EX(%a6)
  20992	bne.b		fout_pack_set
  20993	tst.l		FP_SCR0_HI(%a6)
  20994	bne.b		fout_pack_set
  20995	tst.l		FP_SCR0_LO(%a6)
  20996	bne.b		fout_pack_set
  20997
  20998# add the extra condition that only if the k-factor was zero, too, should
  20999# we zero the exponent
  21000	tst.l		%d0
  21001	bne.b		fout_pack_set
  21002# "mantissa" is all zero which means that the answer is zero. but, the '040
  21003# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
  21004# if the mantissa is zero, I will zero the exponent, too.
  21005# the question now is whether the exponents sign bit is allowed to be non-zero
  21006# for a zero, also...
  21007	andi.w		&0xf000,FP_SCR0(%a6)
  21008
  21009fout_pack_set:
  21010
  21011	lea		FP_SCR0(%a6),%a0	# pass: src addr
  21012
  21013fout_pack_write:
  21014	mov.l		(%sp)+,%a1		# pass: dst addr
  21015	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
  21016
  21017	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
  21018	beq.b		fout_pack_a7
  21019
  21020	bsr.l		_dmem_write		# write ext prec number to memory
  21021
  21022	tst.l		%d1			# did dstore fail?
  21023	bne.w		fout_ext_err		# yes
  21024
  21025	rts
  21026
  21027# we don't want to do the write if the exception occurred in supervisor mode
  21028# so _mem_write2() handles this for us.
  21029fout_pack_a7:
  21030	bsr.l		_mem_write2		# write ext prec number to memory
  21031
  21032	tst.l		%d1			# did dstore fail?
  21033	bne.w		fout_ext_err		# yes
  21034
  21035	rts
  21036
  21037fout_pack_not_norm:
  21038	cmpi.b		%d0,&DENORM		# is it a DENORM?
  21039	beq.w		fout_pack_norm		# yes
  21040	lea		FP_SRC(%a6),%a0
  21041	clr.w		2+FP_SRC_EX(%a6)
  21042	cmpi.b		%d0,&SNAN		# is it an SNAN?
  21043	beq.b		fout_pack_snan		# yes
  21044	bra.b		fout_pack_write		# no
  21045
  21046fout_pack_snan:
  21047	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
  21048	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
  21049	bra.b		fout_pack_write
  21050
  21051#########################################################################
  21052# XDEF ****************************************************************	#
  21053#	fetch_dreg(): fetch register according to index in d1		#
  21054#									#
  21055# XREF ****************************************************************	#
  21056#	None								#
  21057#									#
  21058# INPUT ***************************************************************	#
  21059#	d1 = index of register to fetch from				#
  21060#									#
  21061# OUTPUT **************************************************************	#
  21062#	d0 = value of register fetched					#
  21063#									#
  21064# ALGORITHM ***********************************************************	#
  21065#	According to the index value in d1 which can range from zero	#
  21066# to fifteen, load the corresponding register file value (where		#
  21067# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
  21068# stack. The rest should still be in their original places.		#
  21069#									#
  21070#########################################################################
  21071
  21072# this routine leaves d1 intact for subsequent store_dreg calls.
  21073	global		fetch_dreg
  21074fetch_dreg:
  21075	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
  21076	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
  21077
  21078tbl_fdreg:
  21079	short		fdreg0 - tbl_fdreg
  21080	short		fdreg1 - tbl_fdreg
  21081	short		fdreg2 - tbl_fdreg
  21082	short		fdreg3 - tbl_fdreg
  21083	short		fdreg4 - tbl_fdreg
  21084	short		fdreg5 - tbl_fdreg
  21085	short		fdreg6 - tbl_fdreg
  21086	short		fdreg7 - tbl_fdreg
  21087	short		fdreg8 - tbl_fdreg
  21088	short		fdreg9 - tbl_fdreg
  21089	short		fdrega - tbl_fdreg
  21090	short		fdregb - tbl_fdreg
  21091	short		fdregc - tbl_fdreg
  21092	short		fdregd - tbl_fdreg
  21093	short		fdrege - tbl_fdreg
  21094	short		fdregf - tbl_fdreg
  21095
  21096fdreg0:
  21097	mov.l		EXC_DREGS+0x0(%a6),%d0
  21098	rts
  21099fdreg1:
  21100	mov.l		EXC_DREGS+0x4(%a6),%d0
  21101	rts
  21102fdreg2:
  21103	mov.l		%d2,%d0
  21104	rts
  21105fdreg3:
  21106	mov.l		%d3,%d0
  21107	rts
  21108fdreg4:
  21109	mov.l		%d4,%d0
  21110	rts
  21111fdreg5:
  21112	mov.l		%d5,%d0
  21113	rts
  21114fdreg6:
  21115	mov.l		%d6,%d0
  21116	rts
  21117fdreg7:
  21118	mov.l		%d7,%d0
  21119	rts
  21120fdreg8:
  21121	mov.l		EXC_DREGS+0x8(%a6),%d0
  21122	rts
  21123fdreg9:
  21124	mov.l		EXC_DREGS+0xc(%a6),%d0
  21125	rts
  21126fdrega:
  21127	mov.l		%a2,%d0
  21128	rts
  21129fdregb:
  21130	mov.l		%a3,%d0
  21131	rts
  21132fdregc:
  21133	mov.l		%a4,%d0
  21134	rts
  21135fdregd:
  21136	mov.l		%a5,%d0
  21137	rts
  21138fdrege:
  21139	mov.l		(%a6),%d0
  21140	rts
  21141fdregf:
  21142	mov.l		EXC_A7(%a6),%d0
  21143	rts
  21144
  21145#########################################################################
  21146# XDEF ****************************************************************	#
  21147#	store_dreg_l(): store longword to data register specified by d1	#
  21148#									#
  21149# XREF ****************************************************************	#
  21150#	None								#
  21151#									#
  21152# INPUT ***************************************************************	#
  21153#	d0 = longowrd value to store					#
  21154#	d1 = index of register to fetch from				#
  21155#									#
  21156# OUTPUT **************************************************************	#
  21157#	(data register is updated)					#
  21158#									#
  21159# ALGORITHM ***********************************************************	#
  21160#	According to the index value in d1, store the longword value	#
  21161# in d0 to the corresponding data register. D0/D1 are on the stack	#
  21162# while the rest are in their initial places.				#
  21163#									#
  21164#########################################################################
  21165
  21166	global		store_dreg_l
  21167store_dreg_l:
  21168	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
  21169	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
  21170
  21171tbl_sdregl:
  21172	short		sdregl0 - tbl_sdregl
  21173	short		sdregl1 - tbl_sdregl
  21174	short		sdregl2 - tbl_sdregl
  21175	short		sdregl3 - tbl_sdregl
  21176	short		sdregl4 - tbl_sdregl
  21177	short		sdregl5 - tbl_sdregl
  21178	short		sdregl6 - tbl_sdregl
  21179	short		sdregl7 - tbl_sdregl
  21180
  21181sdregl0:
  21182	mov.l		%d0,EXC_DREGS+0x0(%a6)
  21183	rts
  21184sdregl1:
  21185	mov.l		%d0,EXC_DREGS+0x4(%a6)
  21186	rts
  21187sdregl2:
  21188	mov.l		%d0,%d2
  21189	rts
  21190sdregl3:
  21191	mov.l		%d0,%d3
  21192	rts
  21193sdregl4:
  21194	mov.l		%d0,%d4
  21195	rts
  21196sdregl5:
  21197	mov.l		%d0,%d5
  21198	rts
  21199sdregl6:
  21200	mov.l		%d0,%d6
  21201	rts
  21202sdregl7:
  21203	mov.l		%d0,%d7
  21204	rts
  21205
  21206#########################################################################
  21207# XDEF ****************************************************************	#
  21208#	store_dreg_w(): store word to data register specified by d1	#
  21209#									#
  21210# XREF ****************************************************************	#
  21211#	None								#
  21212#									#
  21213# INPUT ***************************************************************	#
  21214#	d0 = word value to store					#
  21215#	d1 = index of register to fetch from				#
  21216#									#
  21217# OUTPUT **************************************************************	#
  21218#	(data register is updated)					#
  21219#									#
  21220# ALGORITHM ***********************************************************	#
  21221#	According to the index value in d1, store the word value	#
  21222# in d0 to the corresponding data register. D0/D1 are on the stack	#
  21223# while the rest are in their initial places.				#
  21224#									#
  21225#########################################################################
  21226
  21227	global		store_dreg_w
  21228store_dreg_w:
  21229	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
  21230	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
  21231
  21232tbl_sdregw:
  21233	short		sdregw0 - tbl_sdregw
  21234	short		sdregw1 - tbl_sdregw
  21235	short		sdregw2 - tbl_sdregw
  21236	short		sdregw3 - tbl_sdregw
  21237	short		sdregw4 - tbl_sdregw
  21238	short		sdregw5 - tbl_sdregw
  21239	short		sdregw6 - tbl_sdregw
  21240	short		sdregw7 - tbl_sdregw
  21241
  21242sdregw0:
  21243	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
  21244	rts
  21245sdregw1:
  21246	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
  21247	rts
  21248sdregw2:
  21249	mov.w		%d0,%d2
  21250	rts
  21251sdregw3:
  21252	mov.w		%d0,%d3
  21253	rts
  21254sdregw4:
  21255	mov.w		%d0,%d4
  21256	rts
  21257sdregw5:
  21258	mov.w		%d0,%d5
  21259	rts
  21260sdregw6:
  21261	mov.w		%d0,%d6
  21262	rts
  21263sdregw7:
  21264	mov.w		%d0,%d7
  21265	rts
  21266
  21267#########################################################################
  21268# XDEF ****************************************************************	#
  21269#	store_dreg_b(): store byte to data register specified by d1	#
  21270#									#
  21271# XREF ****************************************************************	#
  21272#	None								#
  21273#									#
  21274# INPUT ***************************************************************	#
  21275#	d0 = byte value to store					#
  21276#	d1 = index of register to fetch from				#
  21277#									#
  21278# OUTPUT **************************************************************	#
  21279#	(data register is updated)					#
  21280#									#
  21281# ALGORITHM ***********************************************************	#
  21282#	According to the index value in d1, store the byte value	#
  21283# in d0 to the corresponding data register. D0/D1 are on the stack	#
  21284# while the rest are in their initial places.				#
  21285#									#
  21286#########################################################################
  21287
  21288	global		store_dreg_b
  21289store_dreg_b:
  21290	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
  21291	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
  21292
  21293tbl_sdregb:
  21294	short		sdregb0 - tbl_sdregb
  21295	short		sdregb1 - tbl_sdregb
  21296	short		sdregb2 - tbl_sdregb
  21297	short		sdregb3 - tbl_sdregb
  21298	short		sdregb4 - tbl_sdregb
  21299	short		sdregb5 - tbl_sdregb
  21300	short		sdregb6 - tbl_sdregb
  21301	short		sdregb7 - tbl_sdregb
  21302
  21303sdregb0:
  21304	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
  21305	rts
  21306sdregb1:
  21307	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
  21308	rts
  21309sdregb2:
  21310	mov.b		%d0,%d2
  21311	rts
  21312sdregb3:
  21313	mov.b		%d0,%d3
  21314	rts
  21315sdregb4:
  21316	mov.b		%d0,%d4
  21317	rts
  21318sdregb5:
  21319	mov.b		%d0,%d5
  21320	rts
  21321sdregb6:
  21322	mov.b		%d0,%d6
  21323	rts
  21324sdregb7:
  21325	mov.b		%d0,%d7
  21326	rts
  21327
  21328#########################################################################
  21329# XDEF ****************************************************************	#
  21330#	inc_areg(): increment an address register by the value in d0	#
  21331#									#
  21332# XREF ****************************************************************	#
  21333#	None								#
  21334#									#
  21335# INPUT ***************************************************************	#
  21336#	d0 = amount to increment by					#
  21337#	d1 = index of address register to increment			#
  21338#									#
  21339# OUTPUT **************************************************************	#
  21340#	(address register is updated)					#
  21341#									#
  21342# ALGORITHM ***********************************************************	#
  21343#	Typically used for an instruction w/ a post-increment <ea>,	#
  21344# this routine adds the increment value in d0 to the address register	#
  21345# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
  21346# in their original places.						#
  21347#	For a7, if the increment amount is one, then we have to		#
  21348# increment by two. For any a7 update, set the mia7_flag so that if	#
  21349# an access error exception occurs later in emulation, this address	#
  21350# register update can be undone.					#
  21351#									#
  21352#########################################################################
  21353
  21354	global		inc_areg
  21355inc_areg:
  21356	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
  21357	jmp		(tbl_iareg.b,%pc,%d1.w*1)
  21358
  21359tbl_iareg:
  21360	short		iareg0 - tbl_iareg
  21361	short		iareg1 - tbl_iareg
  21362	short		iareg2 - tbl_iareg
  21363	short		iareg3 - tbl_iareg
  21364	short		iareg4 - tbl_iareg
  21365	short		iareg5 - tbl_iareg
  21366	short		iareg6 - tbl_iareg
  21367	short		iareg7 - tbl_iareg
  21368
  21369iareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
  21370	rts
  21371iareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
  21372	rts
  21373iareg2:	add.l		%d0,%a2
  21374	rts
  21375iareg3:	add.l		%d0,%a3
  21376	rts
  21377iareg4:	add.l		%d0,%a4
  21378	rts
  21379iareg5:	add.l		%d0,%a5
  21380	rts
  21381iareg6:	add.l		%d0,(%a6)
  21382	rts
  21383iareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
  21384	cmpi.b		%d0,&0x1
  21385	beq.b		iareg7b
  21386	add.l		%d0,EXC_A7(%a6)
  21387	rts
  21388iareg7b:
  21389	addq.l		&0x2,EXC_A7(%a6)
  21390	rts
  21391
  21392#########################################################################
  21393# XDEF ****************************************************************	#
  21394#	dec_areg(): decrement an address register by the value in d0	#
  21395#									#
  21396# XREF ****************************************************************	#
  21397#	None								#
  21398#									#
  21399# INPUT ***************************************************************	#
  21400#	d0 = amount to decrement by					#
  21401#	d1 = index of address register to decrement			#
  21402#									#
  21403# OUTPUT **************************************************************	#
  21404#	(address register is updated)					#
  21405#									#
  21406# ALGORITHM ***********************************************************	#
  21407#	Typically used for an instruction w/ a pre-decrement <ea>,	#
  21408# this routine adds the decrement value in d0 to the address register	#
  21409# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
  21410# in their original places.						#
  21411#	For a7, if the decrement amount is one, then we have to		#
  21412# decrement by two. For any a7 update, set the mda7_flag so that if	#
  21413# an access error exception occurs later in emulation, this address	#
  21414# register update can be undone.					#
  21415#									#
  21416#########################################################################
  21417
  21418	global		dec_areg
  21419dec_areg:
  21420	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
  21421	jmp		(tbl_dareg.b,%pc,%d1.w*1)
  21422
  21423tbl_dareg:
  21424	short		dareg0 - tbl_dareg
  21425	short		dareg1 - tbl_dareg
  21426	short		dareg2 - tbl_dareg
  21427	short		dareg3 - tbl_dareg
  21428	short		dareg4 - tbl_dareg
  21429	short		dareg5 - tbl_dareg
  21430	short		dareg6 - tbl_dareg
  21431	short		dareg7 - tbl_dareg
  21432
  21433dareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
  21434	rts
  21435dareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
  21436	rts
  21437dareg2:	sub.l		%d0,%a2
  21438	rts
  21439dareg3:	sub.l		%d0,%a3
  21440	rts
  21441dareg4:	sub.l		%d0,%a4
  21442	rts
  21443dareg5:	sub.l		%d0,%a5
  21444	rts
  21445dareg6:	sub.l		%d0,(%a6)
  21446	rts
  21447dareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
  21448	cmpi.b		%d0,&0x1
  21449	beq.b		dareg7b
  21450	sub.l		%d0,EXC_A7(%a6)
  21451	rts
  21452dareg7b:
  21453	subq.l		&0x2,EXC_A7(%a6)
  21454	rts
  21455
  21456##############################################################################
  21457
  21458#########################################################################
  21459# XDEF ****************************************************************	#
  21460#	load_fpn1(): load FP register value into FP_SRC(a6).		#
  21461#									#
  21462# XREF ****************************************************************	#
  21463#	None								#
  21464#									#
  21465# INPUT ***************************************************************	#
  21466#	d0 = index of FP register to load				#
  21467#									#
  21468# OUTPUT **************************************************************	#
  21469#	FP_SRC(a6) = value loaded from FP register file			#
  21470#									#
  21471# ALGORITHM ***********************************************************	#
  21472#	Using the index in d0, load FP_SRC(a6) with a number from the	#
  21473# FP register file.							#
  21474#									#
  21475#########################################################################
  21476
  21477	global		load_fpn1
  21478load_fpn1:
  21479	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
  21480	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
  21481
  21482tbl_load_fpn1:
  21483	short		load_fpn1_0 - tbl_load_fpn1
  21484	short		load_fpn1_1 - tbl_load_fpn1
  21485	short		load_fpn1_2 - tbl_load_fpn1
  21486	short		load_fpn1_3 - tbl_load_fpn1
  21487	short		load_fpn1_4 - tbl_load_fpn1
  21488	short		load_fpn1_5 - tbl_load_fpn1
  21489	short		load_fpn1_6 - tbl_load_fpn1
  21490	short		load_fpn1_7 - tbl_load_fpn1
  21491
  21492load_fpn1_0:
  21493	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
  21494	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
  21495	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
  21496	lea		FP_SRC(%a6), %a0
  21497	rts
  21498load_fpn1_1:
  21499	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
  21500	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
  21501	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
  21502	lea		FP_SRC(%a6), %a0
  21503	rts
  21504load_fpn1_2:
  21505	fmovm.x		&0x20, FP_SRC(%a6)
  21506	lea		FP_SRC(%a6), %a0
  21507	rts
  21508load_fpn1_3:
  21509	fmovm.x		&0x10, FP_SRC(%a6)
  21510	lea		FP_SRC(%a6), %a0
  21511	rts
  21512load_fpn1_4:
  21513	fmovm.x		&0x08, FP_SRC(%a6)
  21514	lea		FP_SRC(%a6), %a0
  21515	rts
  21516load_fpn1_5:
  21517	fmovm.x		&0x04, FP_SRC(%a6)
  21518	lea		FP_SRC(%a6), %a0
  21519	rts
  21520load_fpn1_6:
  21521	fmovm.x		&0x02, FP_SRC(%a6)
  21522	lea		FP_SRC(%a6), %a0
  21523	rts
  21524load_fpn1_7:
  21525	fmovm.x		&0x01, FP_SRC(%a6)
  21526	lea		FP_SRC(%a6), %a0
  21527	rts
  21528
  21529#############################################################################
  21530
  21531#########################################################################
  21532# XDEF ****************************************************************	#
  21533#	load_fpn2(): load FP register value into FP_DST(a6).		#
  21534#									#
  21535# XREF ****************************************************************	#
  21536#	None								#
  21537#									#
  21538# INPUT ***************************************************************	#
  21539#	d0 = index of FP register to load				#
  21540#									#
  21541# OUTPUT **************************************************************	#
  21542#	FP_DST(a6) = value loaded from FP register file			#
  21543#									#
  21544# ALGORITHM ***********************************************************	#
  21545#	Using the index in d0, load FP_DST(a6) with a number from the	#
  21546# FP register file.							#
  21547#									#
  21548#########################################################################
  21549
  21550	global		load_fpn2
  21551load_fpn2:
  21552	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
  21553	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
  21554
  21555tbl_load_fpn2:
  21556	short		load_fpn2_0 - tbl_load_fpn2
  21557	short		load_fpn2_1 - tbl_load_fpn2
  21558	short		load_fpn2_2 - tbl_load_fpn2
  21559	short		load_fpn2_3 - tbl_load_fpn2
  21560	short		load_fpn2_4 - tbl_load_fpn2
  21561	short		load_fpn2_5 - tbl_load_fpn2
  21562	short		load_fpn2_6 - tbl_load_fpn2
  21563	short		load_fpn2_7 - tbl_load_fpn2
  21564
  21565load_fpn2_0:
  21566	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
  21567	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
  21568	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
  21569	lea		FP_DST(%a6), %a0
  21570	rts
  21571load_fpn2_1:
  21572	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
  21573	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
  21574	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
  21575	lea		FP_DST(%a6), %a0
  21576	rts
  21577load_fpn2_2:
  21578	fmovm.x		&0x20, FP_DST(%a6)
  21579	lea		FP_DST(%a6), %a0
  21580	rts
  21581load_fpn2_3:
  21582	fmovm.x		&0x10, FP_DST(%a6)
  21583	lea		FP_DST(%a6), %a0
  21584	rts
  21585load_fpn2_4:
  21586	fmovm.x		&0x08, FP_DST(%a6)
  21587	lea		FP_DST(%a6), %a0
  21588	rts
  21589load_fpn2_5:
  21590	fmovm.x		&0x04, FP_DST(%a6)
  21591	lea		FP_DST(%a6), %a0
  21592	rts
  21593load_fpn2_6:
  21594	fmovm.x		&0x02, FP_DST(%a6)
  21595	lea		FP_DST(%a6), %a0
  21596	rts
  21597load_fpn2_7:
  21598	fmovm.x		&0x01, FP_DST(%a6)
  21599	lea		FP_DST(%a6), %a0
  21600	rts
  21601
  21602#############################################################################
  21603
  21604#########################################################################
  21605# XDEF ****************************************************************	#
  21606#	store_fpreg(): store an fp value to the fpreg designated d0.	#
  21607#									#
  21608# XREF ****************************************************************	#
  21609#	None								#
  21610#									#
  21611# INPUT ***************************************************************	#
  21612#	fp0 = extended precision value to store				#
  21613#	d0  = index of floating-point register				#
  21614#									#
  21615# OUTPUT **************************************************************	#
  21616#	None								#
  21617#									#
  21618# ALGORITHM ***********************************************************	#
  21619#	Store the value in fp0 to the FP register designated by the	#
  21620# value in d0. The FP number can be DENORM or SNAN so we have to be	#
  21621# careful that we don't take an exception here.				#
  21622#									#
  21623#########################################################################
  21624
  21625	global		store_fpreg
  21626store_fpreg:
  21627	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
  21628	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
  21629
  21630tbl_store_fpreg:
  21631	short		store_fpreg_0 - tbl_store_fpreg
  21632	short		store_fpreg_1 - tbl_store_fpreg
  21633	short		store_fpreg_2 - tbl_store_fpreg
  21634	short		store_fpreg_3 - tbl_store_fpreg
  21635	short		store_fpreg_4 - tbl_store_fpreg
  21636	short		store_fpreg_5 - tbl_store_fpreg
  21637	short		store_fpreg_6 - tbl_store_fpreg
  21638	short		store_fpreg_7 - tbl_store_fpreg
  21639
  21640store_fpreg_0:
  21641	fmovm.x		&0x80, EXC_FP0(%a6)
  21642	rts
  21643store_fpreg_1:
  21644	fmovm.x		&0x80, EXC_FP1(%a6)
  21645	rts
  21646store_fpreg_2:
  21647	fmovm.x		&0x01, -(%sp)
  21648	fmovm.x		(%sp)+, &0x20
  21649	rts
  21650store_fpreg_3:
  21651	fmovm.x		&0x01, -(%sp)
  21652	fmovm.x		(%sp)+, &0x10
  21653	rts
  21654store_fpreg_4:
  21655	fmovm.x		&0x01, -(%sp)
  21656	fmovm.x		(%sp)+, &0x08
  21657	rts
  21658store_fpreg_5:
  21659	fmovm.x		&0x01, -(%sp)
  21660	fmovm.x		(%sp)+, &0x04
  21661	rts
  21662store_fpreg_6:
  21663	fmovm.x		&0x01, -(%sp)
  21664	fmovm.x		(%sp)+, &0x02
  21665	rts
  21666store_fpreg_7:
  21667	fmovm.x		&0x01, -(%sp)
  21668	fmovm.x		(%sp)+, &0x01
  21669	rts
  21670
  21671#########################################################################
  21672# XDEF ****************************************************************	#
  21673#	_denorm(): denormalize an intermediate result			#
  21674#									#
  21675# XREF ****************************************************************	#
  21676#	None								#
  21677#									#
  21678# INPUT *************************************************************** #
  21679#	a0 = points to the operand to be denormalized			#
  21680#		(in the internal extended format)			#
  21681#									#
  21682#	d0 = rounding precision						#
  21683#									#
  21684# OUTPUT **************************************************************	#
  21685#	a0 = pointer to the denormalized result				#
  21686#		(in the internal extended format)			#
  21687#									#
  21688#	d0 = guard,round,sticky						#
  21689#									#
  21690# ALGORITHM ***********************************************************	#
  21691#	According to the exponent underflow threshold for the given	#
  21692# precision, shift the mantissa bits to the right in order raise the	#
  21693# exponent of the operand to the threshold value. While shifting the	#
  21694# mantissa bits right, maintain the value of the guard, round, and	#
  21695# sticky bits.								#
  21696# other notes:								#
  21697#	(1) _denorm() is called by the underflow routines		#
  21698#	(2) _denorm() does NOT affect the status register		#
  21699#									#
  21700#########################################################################
  21701
  21702#
  21703# table of exponent threshold values for each precision
  21704#
  21705tbl_thresh:
  21706	short		0x0
  21707	short		sgl_thresh
  21708	short		dbl_thresh
  21709
  21710	global		_denorm
  21711_denorm:
  21712#
  21713# Load the exponent threshold for the precision selected and check
  21714# to see if (threshold - exponent) is > 65 in which case we can
  21715# simply calculate the sticky bit and zero the mantissa. otherwise
  21716# we have to call the denormalization routine.
  21717#
  21718	lsr.b		&0x2, %d0		# shift prec to lo bits
  21719	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
  21720	mov.w		%d1, %d0		# copy d1 into d0
  21721	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
  21722	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
  21723	bpl.b		denorm_set_stky		# yes; just calc sticky
  21724
  21725	clr.l		%d0			# clear g,r,s
  21726	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
  21727	beq.b		denorm_call		# no; don't change anything
  21728	bset		&29, %d0		# yes; set sticky bit
  21729
  21730denorm_call:
  21731	bsr.l		dnrm_lp			# denormalize the number
  21732	rts
  21733
  21734#
  21735# all bit would have been shifted off during the denorm so simply
  21736# calculate if the sticky should be set and clear the entire mantissa.
  21737#
  21738denorm_set_stky:
  21739	mov.l		&0x20000000, %d0	# set sticky bit in return value
  21740	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
  21741	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
  21742	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
  21743	rts
  21744
  21745#									#
  21746# dnrm_lp(): normalize exponent/mantissa to specified threshold		#
  21747#									#
  21748# INPUT:								#
  21749#	%a0	   : points to the operand to be denormalized		#
  21750#	%d0{31:29} : initial guard,round,sticky				#
  21751#	%d1{15:0}  : denormalization threshold				#
  21752# OUTPUT:								#
  21753#	%a0	   : points to the denormalized operand			#
  21754#	%d0{31:29} : final guard,round,sticky				#
  21755#									#
  21756
  21757# *** Local Equates *** #
  21758set	GRS,		L_SCR2			# g,r,s temp storage
  21759set	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
  21760
  21761	global		dnrm_lp
  21762dnrm_lp:
  21763
  21764#
  21765# make a copy of FTEMP_LO and place the g,r,s bits directly after it
  21766# in memory so as to make the bitfield extraction for denormalization easier.
  21767#
  21768	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
  21769	mov.l		%d0, GRS(%a6)		# place g,r,s after it
  21770
  21771#
  21772# check to see how much less than the underflow threshold the operand
  21773# exponent is.
  21774#
  21775	mov.l		%d1, %d0		# copy the denorm threshold
  21776	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
  21777	ble.b		dnrm_no_lp		# d1 <= 0
  21778	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
  21779	blt.b		case_1			# yes
  21780	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
  21781	blt.b		case_2			# yes
  21782	bra.w		case_3			# (d1 >= 64)
  21783
  21784#
  21785# No normalization necessary
  21786#
  21787dnrm_no_lp:
  21788	mov.l		GRS(%a6), %d0		# restore original g,r,s
  21789	rts
  21790
  21791#
  21792# case (0<d1<32)
  21793#
  21794# %d0 = denorm threshold
  21795# %d1 = "n" = amt to shift
  21796#
  21797#	---------------------------------------------------------
  21798#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
  21799#	---------------------------------------------------------
  21800#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
  21801#	\	   \		      \			 \
  21802#	 \	    \		       \		  \
  21803#	  \	     \			\		   \
  21804#	   \	      \			 \		    \
  21805#	    \	       \		  \		     \
  21806#	     \		\		   \		      \
  21807#	      \		 \		    \		       \
  21808#	       \	  \		     \			\
  21809#	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
  21810#	---------------------------------------------------------
  21811#	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
  21812#	---------------------------------------------------------
  21813#
  21814case_1:
  21815	mov.l		%d2, -(%sp)		# create temp storage
  21816
  21817	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
  21818	mov.l		&32, %d0
  21819	sub.w		%d1, %d0		# %d0 = 32 - %d1
  21820
  21821	cmpi.w		%d1, &29		# is shft amt >= 29
  21822	blt.b		case1_extract		# no; no fix needed
  21823	mov.b		GRS(%a6), %d2
  21824	or.b		%d2, 3+FTEMP_LO2(%a6)
  21825
  21826case1_extract:
  21827	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
  21828	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
  21829	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
  21830
  21831	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
  21832	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
  21833
  21834	bftst		%d0{&2:&30}		# were bits shifted off?
  21835	beq.b		case1_sticky_clear	# no; go finish
  21836	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
  21837
  21838case1_sticky_clear:
  21839	and.l		&0xe0000000, %d0	# clear all but G,R,S
  21840	mov.l		(%sp)+, %d2		# restore temp register
  21841	rts
  21842
  21843#
  21844# case (32<=d1<64)
  21845#
  21846# %d0 = denorm threshold
  21847# %d1 = "n" = amt to shift
  21848#
  21849#	---------------------------------------------------------
  21850#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
  21851#	---------------------------------------------------------
  21852#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
  21853#	\	   \		      \
  21854#	 \	    \		       \
  21855#	  \	     \			-------------------
  21856#	   \	      --------------------		   \
  21857#	    -------------------		  \		    \
  21858#			       \	   \		     \
  21859#				\	    \		      \
  21860#				 \	     \		       \
  21861#	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
  21862#	---------------------------------------------------------
  21863#	|0...............0|0....0| NEW_LO     |grs		|
  21864#	---------------------------------------------------------
  21865#
  21866case_2:
  21867	mov.l		%d2, -(%sp)		# create temp storage
  21868
  21869	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
  21870	subi.w		&0x20, %d1		# %d1 now between 0 and 32
  21871	mov.l		&0x20, %d0
  21872	sub.w		%d1, %d0		# %d0 = 32 - %d1
  21873
  21874# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
  21875# the number of bits to check for the sticky detect.
  21876# it only plays a role in shift amounts of 61-63.
  21877	mov.b		GRS(%a6), %d2
  21878	or.b		%d2, 3+FTEMP_LO2(%a6)
  21879
  21880	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
  21881	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
  21882
  21883	bftst		%d1{&2:&30}		# were any bits shifted off?
  21884	bne.b		case2_set_sticky	# yes; set sticky bit
  21885	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
  21886	bne.b		case2_set_sticky	# yes; set sticky bit
  21887
  21888	mov.l		%d1, %d0		# move new G,R,S to %d0
  21889	bra.b		case2_end
  21890
  21891case2_set_sticky:
  21892	mov.l		%d1, %d0		# move new G,R,S to %d0
  21893	bset		&rnd_stky_bit, %d0	# set sticky bit
  21894
  21895case2_end:
  21896	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
  21897	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
  21898	and.l		&0xe0000000, %d0	# clear all but G,R,S
  21899
  21900	mov.l		(%sp)+,%d2		# restore temp register
  21901	rts
  21902
  21903#
  21904# case (d1>=64)
  21905#
  21906# %d0 = denorm threshold
  21907# %d1 = amt to shift
  21908#
  21909case_3:
  21910	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
  21911
  21912	cmpi.w		%d1, &65		# is shift amt > 65?
  21913	blt.b		case3_64		# no; it's == 64
  21914	beq.b		case3_65		# no; it's == 65
  21915
  21916#
  21917# case (d1>65)
  21918#
  21919# Shift value is > 65 and out of range. All bits are shifted off.
  21920# Return a zero mantissa with the sticky bit set
  21921#
  21922	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
  21923	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
  21924	mov.l		&0x20000000, %d0	# set sticky bit
  21925	rts
  21926
  21927#
  21928# case (d1 == 64)
  21929#
  21930#	---------------------------------------------------------
  21931#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
  21932#	---------------------------------------------------------
  21933#	<-------(32)------>
  21934#	\		   \
  21935#	 \		    \
  21936#	  \		     \
  21937#	   \		      ------------------------------
  21938#	    -------------------------------		    \
  21939#					   \		     \
  21940#					    \		      \
  21941#					     \		       \
  21942#					      <-------(32)------>
  21943#	---------------------------------------------------------
  21944#	|0...............0|0................0|grs		|
  21945#	---------------------------------------------------------
  21946#
  21947case3_64:
  21948	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
  21949	mov.l		%d0, %d1		# make a copy
  21950	and.l		&0xc0000000, %d0	# extract G,R
  21951	and.l		&0x3fffffff, %d1	# extract other bits
  21952
  21953	bra.b		case3_complete
  21954
  21955#
  21956# case (d1 == 65)
  21957#
  21958#	---------------------------------------------------------
  21959#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
  21960#	---------------------------------------------------------
  21961#	<-------(32)------>
  21962#	\		   \
  21963#	 \		    \
  21964#	  \		     \
  21965#	   \		      ------------------------------
  21966#	    --------------------------------		    \
  21967#					    \		     \
  21968#					     \		      \
  21969#					      \		       \
  21970#					       <-------(31)----->
  21971#	---------------------------------------------------------
  21972#	|0...............0|0................0|0rs		|
  21973#	---------------------------------------------------------
  21974#
  21975case3_65:
  21976	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
  21977	and.l		&0x80000000, %d0	# extract R bit
  21978	lsr.l		&0x1, %d0		# shift high bit into R bit
  21979	and.l		&0x7fffffff, %d1	# extract other bits
  21980
  21981case3_complete:
  21982# last operation done was an "and" of the bits shifted off so the condition
  21983# codes are already set so branch accordingly.
  21984	bne.b		case3_set_sticky	# yes; go set new sticky
  21985	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
  21986	bne.b		case3_set_sticky	# yes; go set new sticky
  21987	tst.b		GRS(%a6)		# were any bits shifted off?
  21988	bne.b		case3_set_sticky	# yes; go set new sticky
  21989
  21990#
  21991# no bits were shifted off so don't set the sticky bit.
  21992# the guard and
  21993# the entire mantissa is zero.
  21994#
  21995	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
  21996	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
  21997	rts
  21998
  21999#
  22000# some bits were shifted off so set the sticky bit.
  22001# the entire mantissa is zero.
  22002#
  22003case3_set_sticky:
  22004	bset		&rnd_stky_bit,%d0	# set new sticky bit
  22005	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
  22006	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
  22007	rts
  22008
  22009#########################################################################
  22010# XDEF ****************************************************************	#
  22011#	_round(): round result according to precision/mode		#
  22012#									#
  22013# XREF ****************************************************************	#
  22014#	None								#
  22015#									#
  22016# INPUT ***************************************************************	#
  22017#	a0	  = ptr to input operand in internal extended format	#
  22018#	d1(hi)    = contains rounding precision:			#
  22019#			ext = $0000xxxx					#
  22020#			sgl = $0004xxxx					#
  22021#			dbl = $0008xxxx					#
  22022#	d1(lo)	  = contains rounding mode:				#
  22023#			RN  = $xxxx0000					#
  22024#			RZ  = $xxxx0001					#
  22025#			RM  = $xxxx0002					#
  22026#			RP  = $xxxx0003					#
  22027#	d0{31:29} = contains the g,r,s bits (extended)			#
  22028#									#
  22029# OUTPUT **************************************************************	#
  22030#	a0 = pointer to rounded result					#
  22031#									#
  22032# ALGORITHM ***********************************************************	#
  22033#	On return the value pointed to by a0 is correctly rounded,	#
  22034#	a0 is preserved and the g-r-s bits in d0 are cleared.		#
  22035#	The result is not typed - the tag field is invalid.  The	#
  22036#	result is still in the internal extended format.		#
  22037#									#
  22038#	The INEX bit of USER_FPSR will be set if the rounded result was	#
  22039#	inexact (i.e. if any of the g-r-s bits were set).		#
  22040#									#
  22041#########################################################################
  22042
  22043	global		_round
  22044_round:
  22045#
  22046# ext_grs() looks at the rounding precision and sets the appropriate
  22047# G,R,S bits.
  22048# If (G,R,S == 0) then result is exact and round is done, else set
  22049# the inex flag in status reg and continue.
  22050#
  22051	bsr.l		ext_grs			# extract G,R,S
  22052
  22053	tst.l		%d0			# are G,R,S zero?
  22054	beq.w		truncate		# yes; round is complete
  22055
  22056	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
  22057
  22058#
  22059# Use rounding mode as an index into a jump table for these modes.
  22060# All of the following assumes grs != 0.
  22061#
  22062	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
  22063	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
  22064
  22065tbl_mode:
  22066	short		rnd_near - tbl_mode
  22067	short		truncate - tbl_mode	# RZ always truncates
  22068	short		rnd_mnus - tbl_mode
  22069	short		rnd_plus - tbl_mode
  22070
  22071#################################################################
  22072#	ROUND PLUS INFINITY					#
  22073#								#
  22074#	If sign of fp number = 0 (positive), then add 1 to l.	#
  22075#################################################################
  22076rnd_plus:
  22077	tst.b		FTEMP_SGN(%a0)		# check for sign
  22078	bmi.w		truncate		# if positive then truncate
  22079
  22080	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
  22081	swap		%d1			# set up d1 for round prec.
  22082
  22083	cmpi.b		%d1, &s_mode		# is prec = sgl?
  22084	beq.w		add_sgl			# yes
  22085	bgt.w		add_dbl			# no; it's dbl
  22086	bra.w		add_ext			# no; it's ext
  22087
  22088#################################################################
  22089#	ROUND MINUS INFINITY					#
  22090#								#
  22091#	If sign of fp number = 1 (negative), then add 1 to l.	#
  22092#################################################################
  22093rnd_mnus:
  22094	tst.b		FTEMP_SGN(%a0)		# check for sign
  22095	bpl.w		truncate		# if negative then truncate
  22096
  22097	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
  22098	swap		%d1			# set up d1 for round prec.
  22099
  22100	cmpi.b		%d1, &s_mode		# is prec = sgl?
  22101	beq.w		add_sgl			# yes
  22102	bgt.w		add_dbl			# no; it's dbl
  22103	bra.w		add_ext			# no; it's ext
  22104
  22105#################################################################
  22106#	ROUND NEAREST						#
  22107#								#
  22108#	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
  22109#	Note that this will round to even in case of a tie.	#
  22110#################################################################
  22111rnd_near:
  22112	asl.l		&0x1, %d0		# shift g-bit to c-bit
  22113	bcc.w		truncate		# if (g=1) then
  22114
  22115	swap		%d1			# set up d1 for round prec.
  22116
  22117	cmpi.b		%d1, &s_mode		# is prec = sgl?
  22118	beq.w		add_sgl			# yes
  22119	bgt.w		add_dbl			# no; it's dbl
  22120	bra.w		add_ext			# no; it's ext
  22121
  22122# *** LOCAL EQUATES ***
  22123set	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
  22124set	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
  22125
  22126#########################
  22127#	ADD SINGLE	#
  22128#########################
  22129add_sgl:
  22130	add.l		&ad_1_sgl, FTEMP_HI(%a0)
  22131	bcc.b		scc_clr			# no mantissa overflow
  22132	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
  22133	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
  22134	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
  22135scc_clr:
  22136	tst.l		%d0			# test for rs = 0
  22137	bne.b		sgl_done
  22138	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
  22139sgl_done:
  22140	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
  22141	clr.l		FTEMP_LO(%a0)		# clear d2
  22142	rts
  22143
  22144#########################
  22145#	ADD EXTENDED	#
  22146#########################
  22147add_ext:
  22148	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
  22149	bcc.b		xcc_clr			# test for carry out
  22150	addq.l		&1,FTEMP_HI(%a0)	# propagate carry
  22151	bcc.b		xcc_clr
  22152	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
  22153	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
  22154	roxr.w		FTEMP_LO(%a0)
  22155	roxr.w		FTEMP_LO+2(%a0)
  22156	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
  22157xcc_clr:
  22158	tst.l		%d0			# test rs = 0
  22159	bne.b		add_ext_done
  22160	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
  22161add_ext_done:
  22162	rts
  22163
  22164#########################
  22165#	ADD DOUBLE	#
  22166#########################
  22167add_dbl:
  22168	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
  22169	bcc.b		dcc_clr			# no carry
  22170	addq.l		&0x1, FTEMP_HI(%a0)	# propagate carry
  22171	bcc.b		dcc_clr			# no carry
  22172
  22173	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
  22174	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
  22175	roxr.w		FTEMP_LO(%a0)
  22176	roxr.w		FTEMP_LO+2(%a0)
  22177	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
  22178dcc_clr:
  22179	tst.l		%d0			# test for rs = 0
  22180	bne.b		dbl_done
  22181	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
  22182
  22183dbl_done:
  22184	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
  22185	rts
  22186
  22187###########################
  22188# Truncate all other bits #
  22189###########################
  22190truncate:
  22191	swap		%d1			# select rnd prec
  22192
  22193	cmpi.b		%d1, &s_mode		# is prec sgl?
  22194	beq.w		sgl_done		# yes
  22195	bgt.b		dbl_done		# no; it's dbl
  22196	rts					# no; it's ext
  22197
  22198
  22199#
  22200# ext_grs(): extract guard, round and sticky bits according to
  22201#	     rounding precision.
  22202#
  22203# INPUT
  22204#	d0	   = extended precision g,r,s (in d0{31:29})
  22205#	d1	   = {PREC,ROUND}
  22206# OUTPUT
  22207#	d0{31:29}  = guard, round, sticky
  22208#
  22209# The ext_grs extract the guard/round/sticky bits according to the
  22210# selected rounding precision. It is called by the round subroutine
  22211# only.  All registers except d0 are kept intact. d0 becomes an
  22212# updated guard,round,sticky in d0{31:29}
  22213#
  22214# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
  22215#	 prior to usage, and needs to restore d1 to original. this
  22216#	 routine is tightly tied to the round routine and not meant to
  22217#	 uphold standard subroutine calling practices.
  22218#
  22219
  22220ext_grs:
  22221	swap		%d1			# have d1.w point to round precision
  22222	tst.b		%d1			# is rnd prec = extended?
  22223	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
  22224
  22225#
  22226# %d0 actually already hold g,r,s since _round() had it before calling
  22227# this function. so, as long as we don't disturb it, we are "returning" it.
  22228#
  22229ext_grs_ext:
  22230	swap		%d1			# yes; return to correct positions
  22231	rts
  22232
  22233ext_grs_not_ext:
  22234	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
  22235
  22236	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
  22237	bne.b		ext_grs_dbl		# no; go handle dbl
  22238
  22239#
  22240# sgl:
  22241#	96		64	  40	32		0
  22242#	-----------------------------------------------------
  22243#	| EXP	|XXXXXXX|	  |xx	|		|grs|
  22244#	-----------------------------------------------------
  22245#			<--(24)--->nn\			   /
  22246#				   ee ---------------------
  22247#				   ww		|
  22248#						v
  22249#				   gr	   new sticky
  22250#
  22251ext_grs_sgl:
  22252	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
  22253	mov.l		&30, %d2		# of the sgl prec. limits
  22254	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
  22255	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
  22256	and.l		&0x0000003f, %d2	# s bit is the or of all other
  22257	bne.b		ext_grs_st_stky		# bits to the right of g-r
  22258	tst.l		FTEMP_LO(%a0)		# test lower mantissa
  22259	bne.b		ext_grs_st_stky		# if any are set, set sticky
  22260	tst.l		%d0			# test original g,r,s
  22261	bne.b		ext_grs_st_stky		# if any are set, set sticky
  22262	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
  22263
  22264#
  22265# dbl:
  22266#	96		64		32	 11	0
  22267#	-----------------------------------------------------
  22268#	| EXP	|XXXXXXX|		|	 |xx	|grs|
  22269#	-----------------------------------------------------
  22270#						  nn\	    /
  22271#						  ee -------
  22272#						  ww	|
  22273#							v
  22274#						  gr	new sticky
  22275#
  22276ext_grs_dbl:
  22277	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
  22278	mov.l		&30, %d2		# of the dbl prec. limits
  22279	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
  22280	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
  22281	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
  22282	bne.b		ext_grs_st_stky		# other bits to the right of g-r
  22283	tst.l		%d0			# test word original g,r,s
  22284	bne.b		ext_grs_st_stky		# if any are set, set sticky
  22285	bra.b		ext_grs_end_sd		# if clear, exit
  22286
  22287ext_grs_st_stky:
  22288	bset		&rnd_stky_bit, %d3	# set sticky bit
  22289ext_grs_end_sd:
  22290	mov.l		%d3, %d0		# return grs to d0
  22291
  22292	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
  22293
  22294	swap		%d1			# restore d1 to original
  22295	rts
  22296
  22297#########################################################################
  22298# norm(): normalize the mantissa of an extended precision input. the	#
  22299#	  input operand should not be normalized already.		#
  22300#									#
  22301# XDEF ****************************************************************	#
  22302#	norm()								#
  22303#									#
  22304# XREF **************************************************************** #
  22305#	none								#
  22306#									#
  22307# INPUT *************************************************************** #
  22308#	a0 = pointer fp extended precision operand to normalize		#
  22309#									#
  22310# OUTPUT ************************************************************** #
  22311#	d0 = number of bit positions the mantissa was shifted		#
  22312#	a0 = the input operand's mantissa is normalized; the exponent	#
  22313#	     is unchanged.						#
  22314#									#
  22315#########################################################################
  22316	global		norm
  22317norm:
  22318	mov.l		%d2, -(%sp)		# create some temp regs
  22319	mov.l		%d3, -(%sp)
  22320
  22321	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
  22322	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
  22323
  22324	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
  22325	beq.b		norm_lo			# hi(man) is all zeroes!
  22326
  22327norm_hi:
  22328	lsl.l		%d2, %d0		# left shift hi(man)
  22329	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
  22330
  22331	or.l		%d3, %d0		# create hi(man)
  22332	lsl.l		%d2, %d1		# create lo(man)
  22333
  22334	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
  22335	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
  22336
  22337	mov.l		%d2, %d0		# return shift amount
  22338
  22339	mov.l		(%sp)+, %d3		# restore temp regs
  22340	mov.l		(%sp)+, %d2
  22341
  22342	rts
  22343
  22344norm_lo:
  22345	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
  22346	lsl.l		%d2, %d1		# shift lo(man)
  22347	add.l		&32, %d2		# add 32 to shft amount
  22348
  22349	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
  22350	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
  22351
  22352	mov.l		%d2, %d0		# return shift amount
  22353
  22354	mov.l		(%sp)+, %d3		# restore temp regs
  22355	mov.l		(%sp)+, %d2
  22356
  22357	rts
  22358
  22359#########################################################################
  22360# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
  22361#		- returns corresponding optype tag			#
  22362#									#
  22363# XDEF ****************************************************************	#
  22364#	unnorm_fix()							#
  22365#									#
  22366# XREF **************************************************************** #
  22367#	norm() - normalize the mantissa					#
  22368#									#
  22369# INPUT *************************************************************** #
  22370#	a0 = pointer to unnormalized extended precision number		#
  22371#									#
  22372# OUTPUT ************************************************************** #
  22373#	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
  22374#	a0 = input operand has been converted to a norm, denorm, or	#
  22375#	     zero; both the exponent and mantissa are changed.		#
  22376#									#
  22377#########################################################################
  22378
  22379	global		unnorm_fix
  22380unnorm_fix:
  22381	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
  22382	bne.b		unnorm_shift		# hi(man) is not all zeroes
  22383
  22384#
  22385# hi(man) is all zeroes so see if any bits in lo(man) are set
  22386#
  22387unnorm_chk_lo:
  22388	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
  22389	beq.w		unnorm_zero		# yes
  22390
  22391	add.w		&32, %d0		# no; fix shift distance
  22392
  22393#
  22394# d0 = # shifts needed for complete normalization
  22395#
  22396unnorm_shift:
  22397	clr.l		%d1			# clear top word
  22398	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
  22399	and.w		&0x7fff, %d1		# strip off sgn
  22400
  22401	cmp.w		%d0, %d1		# will denorm push exp < 0?
  22402	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
  22403
  22404#
  22405# exponent would not go < 0. Therefore, number stays normalized
  22406#
  22407	sub.w		%d0, %d1		# shift exponent value
  22408	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
  22409	and.w		&0x8000, %d0		# save old sign
  22410	or.w		%d0, %d1		# {sgn,new exp}
  22411	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
  22412
  22413	bsr.l		norm			# normalize UNNORM
  22414
  22415	mov.b		&NORM, %d0		# return new optype tag
  22416	rts
  22417
  22418#
  22419# exponent would go < 0, so only denormalize until exp = 0
  22420#
  22421unnorm_nrm_zero:
  22422	cmp.b		%d1, &32		# is exp <= 32?
  22423	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
  22424
  22425	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
  22426	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
  22427
  22428	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
  22429	lsl.l		%d1, %d0		# extract new lo(man)
  22430	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
  22431
  22432	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
  22433
  22434	mov.b		&DENORM, %d0		# return new optype tag
  22435	rts
  22436
  22437#
  22438# only mantissa bits set are in lo(man)
  22439#
  22440unnorm_nrm_zero_lrg:
  22441	sub.w		&32, %d1		# adjust shft amt by 32
  22442
  22443	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
  22444	lsl.l		%d1, %d0		# left shift lo(man)
  22445
  22446	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
  22447	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
  22448
  22449	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
  22450
  22451	mov.b		&DENORM, %d0		# return new optype tag
  22452	rts
  22453
  22454#
  22455# whole mantissa is zero so this UNNORM is actually a zero
  22456#
  22457unnorm_zero:
  22458	and.w		&0x8000, FTEMP_EX(%a0)	# force exponent to zero
  22459
  22460	mov.b		&ZERO, %d0		# fix optype tag
  22461	rts
  22462
  22463#########################################################################
  22464# XDEF ****************************************************************	#
  22465#	set_tag_x(): return the optype of the input ext fp number	#
  22466#									#
  22467# XREF ****************************************************************	#
  22468#	None								#
  22469#									#
  22470# INPUT ***************************************************************	#
  22471#	a0 = pointer to extended precision operand			#
  22472#									#
  22473# OUTPUT **************************************************************	#
  22474#	d0 = value of type tag						#
  22475#		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
  22476#									#
  22477# ALGORITHM ***********************************************************	#
  22478#	Simply test the exponent, j-bit, and mantissa values to		#
  22479# determine the type of operand.					#
  22480#	If it's an unnormalized zero, alter the operand and force it	#
  22481# to be a normal zero.							#
  22482#									#
  22483#########################################################################
  22484
  22485	global		set_tag_x
  22486set_tag_x:
  22487	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
  22488	andi.w		&0x7fff, %d0		# strip off sign
  22489	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
  22490	beq.b		inf_or_nan_x
  22491not_inf_or_nan_x:
  22492	btst		&0x7,FTEMP_HI(%a0)
  22493	beq.b		not_norm_x
  22494is_norm_x:
  22495	mov.b		&NORM, %d0
  22496	rts
  22497not_norm_x:
  22498	tst.w		%d0			# is exponent = 0?
  22499	bne.b		is_unnorm_x
  22500not_unnorm_x:
  22501	tst.l		FTEMP_HI(%a0)
  22502	bne.b		is_denorm_x
  22503	tst.l		FTEMP_LO(%a0)
  22504	bne.b		is_denorm_x
  22505is_zero_x:
  22506	mov.b		&ZERO, %d0
  22507	rts
  22508is_denorm_x:
  22509	mov.b		&DENORM, %d0
  22510	rts
  22511# must distinguish now "Unnormalized zeroes" which we
  22512# must convert to zero.
  22513is_unnorm_x:
  22514	tst.l		FTEMP_HI(%a0)
  22515	bne.b		is_unnorm_reg_x
  22516	tst.l		FTEMP_LO(%a0)
  22517	bne.b		is_unnorm_reg_x
  22518# it's an "unnormalized zero". let's convert it to an actual zero...
  22519	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
  22520	mov.b		&ZERO, %d0
  22521	rts
  22522is_unnorm_reg_x:
  22523	mov.b		&UNNORM, %d0
  22524	rts
  22525inf_or_nan_x:
  22526	tst.l		FTEMP_LO(%a0)
  22527	bne.b		is_nan_x
  22528	mov.l		FTEMP_HI(%a0), %d0
  22529	and.l		&0x7fffffff, %d0	# msb is a don't care!
  22530	bne.b		is_nan_x
  22531is_inf_x:
  22532	mov.b		&INF, %d0
  22533	rts
  22534is_nan_x:
  22535	btst		&0x6, FTEMP_HI(%a0)
  22536	beq.b		is_snan_x
  22537	mov.b		&QNAN, %d0
  22538	rts
  22539is_snan_x:
  22540	mov.b		&SNAN, %d0
  22541	rts
  22542
  22543#########################################################################
  22544# XDEF ****************************************************************	#
  22545#	set_tag_d(): return the optype of the input dbl fp number	#
  22546#									#
  22547# XREF ****************************************************************	#
  22548#	None								#
  22549#									#
  22550# INPUT ***************************************************************	#
  22551#	a0 = points to double precision operand				#
  22552#									#
  22553# OUTPUT **************************************************************	#
  22554#	d0 = value of type tag						#
  22555#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
  22556#									#
  22557# ALGORITHM ***********************************************************	#
  22558#	Simply test the exponent, j-bit, and mantissa values to		#
  22559# determine the type of operand.					#
  22560#									#
  22561#########################################################################
  22562
  22563	global		set_tag_d
  22564set_tag_d:
  22565	mov.l		FTEMP(%a0), %d0
  22566	mov.l		%d0, %d1
  22567
  22568	andi.l		&0x7ff00000, %d0
  22569	beq.b		zero_or_denorm_d
  22570
  22571	cmpi.l		%d0, &0x7ff00000
  22572	beq.b		inf_or_nan_d
  22573
  22574is_norm_d:
  22575	mov.b		&NORM, %d0
  22576	rts
  22577zero_or_denorm_d:
  22578	and.l		&0x000fffff, %d1
  22579	bne		is_denorm_d
  22580	tst.l		4+FTEMP(%a0)
  22581	bne		is_denorm_d
  22582is_zero_d:
  22583	mov.b		&ZERO, %d0
  22584	rts
  22585is_denorm_d:
  22586	mov.b		&DENORM, %d0
  22587	rts
  22588inf_or_nan_d:
  22589	and.l		&0x000fffff, %d1
  22590	bne		is_nan_d
  22591	tst.l		4+FTEMP(%a0)
  22592	bne		is_nan_d
  22593is_inf_d:
  22594	mov.b		&INF, %d0
  22595	rts
  22596is_nan_d:
  22597	btst		&19, %d1
  22598	bne		is_qnan_d
  22599is_snan_d:
  22600	mov.b		&SNAN, %d0
  22601	rts
  22602is_qnan_d:
  22603	mov.b		&QNAN, %d0
  22604	rts
  22605
  22606#########################################################################
  22607# XDEF ****************************************************************	#
  22608#	set_tag_s(): return the optype of the input sgl fp number	#
  22609#									#
  22610# XREF ****************************************************************	#
  22611#	None								#
  22612#									#
  22613# INPUT ***************************************************************	#
  22614#	a0 = pointer to single precision operand			#
  22615#									#
  22616# OUTPUT **************************************************************	#
  22617#	d0 = value of type tag						#
  22618#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
  22619#									#
  22620# ALGORITHM ***********************************************************	#
  22621#	Simply test the exponent, j-bit, and mantissa values to		#
  22622# determine the type of operand.					#
  22623#									#
  22624#########################################################################
  22625
  22626	global		set_tag_s
  22627set_tag_s:
  22628	mov.l		FTEMP(%a0), %d0
  22629	mov.l		%d0, %d1
  22630
  22631	andi.l		&0x7f800000, %d0
  22632	beq.b		zero_or_denorm_s
  22633
  22634	cmpi.l		%d0, &0x7f800000
  22635	beq.b		inf_or_nan_s
  22636
  22637is_norm_s:
  22638	mov.b		&NORM, %d0
  22639	rts
  22640zero_or_denorm_s:
  22641	and.l		&0x007fffff, %d1
  22642	bne		is_denorm_s
  22643is_zero_s:
  22644	mov.b		&ZERO, %d0
  22645	rts
  22646is_denorm_s:
  22647	mov.b		&DENORM, %d0
  22648	rts
  22649inf_or_nan_s:
  22650	and.l		&0x007fffff, %d1
  22651	bne		is_nan_s
  22652is_inf_s:
  22653	mov.b		&INF, %d0
  22654	rts
  22655is_nan_s:
  22656	btst		&22, %d1
  22657	bne		is_qnan_s
  22658is_snan_s:
  22659	mov.b		&SNAN, %d0
  22660	rts
  22661is_qnan_s:
  22662	mov.b		&QNAN, %d0
  22663	rts
  22664
  22665#########################################################################
  22666# XDEF ****************************************************************	#
  22667#	unf_res(): routine to produce default underflow result of a	#
  22668#		   scaled extended precision number; this is used by	#
  22669#		   fadd/fdiv/fmul/etc. emulation routines.		#
  22670#	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
  22671#		    single round prec and extended prec mode.		#
  22672#									#
  22673# XREF ****************************************************************	#
  22674#	_denorm() - denormalize according to scale factor		#
  22675#	_round() - round denormalized number according to rnd prec	#
  22676#									#
  22677# INPUT ***************************************************************	#
  22678#	a0 = pointer to extended precison operand			#
  22679#	d0 = scale factor						#
  22680#	d1 = rounding precision/mode					#
  22681#									#
  22682# OUTPUT **************************************************************	#
  22683#	a0 = pointer to default underflow result in extended precision	#
  22684#	d0.b = result FPSR_cc which caller may or may not want to save	#
  22685#									#
  22686# ALGORITHM ***********************************************************	#
  22687#	Convert the input operand to "internal format" which means the	#
  22688# exponent is extended to 16 bits and the sign is stored in the unused	#
  22689# portion of the extended precison operand. Denormalize the number	#
  22690# according to the scale factor passed in d0. Then, round the		#
  22691# denormalized result.							#
  22692#	Set the FPSR_exc bits as appropriate but return the cc bits in	#
  22693# d0 in case the caller doesn't want to save them (as is the case for	#
  22694# fmove out).								#
  22695#	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
  22696# precision and the rounding mode to single.				#
  22697#									#
  22698#########################################################################
  22699	global		unf_res
  22700unf_res:
  22701	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
  22702
  22703	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
  22704	sne		FTEMP_SGN(%a0)
  22705
  22706	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
  22707	and.w		&0x7fff, %d1
  22708	sub.w		%d0, %d1
  22709	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
  22710
  22711	mov.l		%a0, -(%sp)		# save operand ptr during calls
  22712
  22713	mov.l		0x4(%sp),%d0		# pass rnd prec.
  22714	andi.w		&0x00c0,%d0
  22715	lsr.w		&0x4,%d0
  22716	bsr.l		_denorm			# denorm result
  22717
  22718	mov.l		(%sp),%a0
  22719	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
  22720	andi.w		&0xc0,%d1		# extract rnd prec
  22721	lsr.w		&0x4,%d1
  22722	swap		%d1
  22723	mov.w		0x6(%sp),%d1
  22724	andi.w		&0x30,%d1
  22725	lsr.w		&0x4,%d1
  22726	bsr.l		_round			# round the denorm
  22727
  22728	mov.l		(%sp)+, %a0
  22729
  22730# result is now rounded properly. convert back to normal format
  22731	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
  22732	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
  22733	beq.b		unf_res_chkifzero	# no; result is positive
  22734	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
  22735	clr.b		FTEMP_SGN(%a0)		# clear temp sign
  22736
  22737# the number may have become zero after rounding. set ccodes accordingly.
  22738unf_res_chkifzero:
  22739	clr.l		%d0
  22740	tst.l		FTEMP_HI(%a0)		# is value now a zero?
  22741	bne.b		unf_res_cont		# no
  22742	tst.l		FTEMP_LO(%a0)
  22743	bne.b		unf_res_cont		# no
  22744#	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
  22745	bset		&z_bit, %d0		# yes; set zero ccode bit
  22746
  22747unf_res_cont:
  22748
  22749#
  22750# can inex1 also be set along with unfl and inex2???
  22751#
  22752# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
  22753#
  22754	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
  22755	beq.b		unf_res_end		# no
  22756	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
  22757
  22758unf_res_end:
  22759	add.l		&0x4, %sp		# clear stack
  22760	rts
  22761
  22762# unf_res() for fsglmul() and fsgldiv().
  22763	global		unf_res4
  22764unf_res4:
  22765	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
  22766
  22767	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
  22768	sne		FTEMP_SGN(%a0)
  22769
  22770	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
  22771	and.w		&0x7fff,%d1
  22772	sub.w		%d0,%d1
  22773	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
  22774
  22775	mov.l		%a0,-(%sp)		# save operand ptr during calls
  22776
  22777	clr.l		%d0			# force rnd prec = ext
  22778	bsr.l		_denorm			# denorm result
  22779
  22780	mov.l		(%sp),%a0
  22781	mov.w		&s_mode,%d1		# force rnd prec = sgl
  22782	swap		%d1
  22783	mov.w		0x6(%sp),%d1		# load rnd mode
  22784	andi.w		&0x30,%d1		# extract rnd prec
  22785	lsr.w		&0x4,%d1
  22786	bsr.l		_round			# round the denorm
  22787
  22788	mov.l		(%sp)+,%a0
  22789
  22790# result is now rounded properly. convert back to normal format
  22791	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
  22792	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
  22793	beq.b		unf_res4_chkifzero	# no; result is positive
  22794	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
  22795	clr.b		FTEMP_SGN(%a0)		# clear temp sign
  22796
  22797# the number may have become zero after rounding. set ccodes accordingly.
  22798unf_res4_chkifzero:
  22799	clr.l		%d0
  22800	tst.l		FTEMP_HI(%a0)		# is value now a zero?
  22801	bne.b		unf_res4_cont		# no
  22802	tst.l		FTEMP_LO(%a0)
  22803	bne.b		unf_res4_cont		# no
  22804#	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
  22805	bset		&z_bit,%d0		# yes; set zero ccode bit
  22806
  22807unf_res4_cont:
  22808
  22809#
  22810# can inex1 also be set along with unfl and inex2???
  22811#
  22812# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
  22813#
  22814	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
  22815	beq.b		unf_res4_end		# no
  22816	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
  22817
  22818unf_res4_end:
  22819	add.l		&0x4,%sp		# clear stack
  22820	rts
  22821
  22822#########################################################################
  22823# XDEF ****************************************************************	#
  22824#	ovf_res(): routine to produce the default overflow result of	#
  22825#		   an overflowing number.				#
  22826#	ovf_res2(): same as above but the rnd mode/prec are passed	#
  22827#		    differently.					#
  22828#									#
  22829# XREF ****************************************************************	#
  22830#	none								#
  22831#									#
  22832# INPUT ***************************************************************	#
  22833#	d1.b	= '-1' => (-); '0' => (+)				#
  22834#   ovf_res():								#
  22835#	d0	= rnd mode/prec						#
  22836#   ovf_res2():								#
  22837#	hi(d0)	= rnd prec						#
  22838#	lo(d0)	= rnd mode						#
  22839#									#
  22840# OUTPUT **************************************************************	#
  22841#	a0	= points to extended precision result			#
  22842#	d0.b	= condition code bits					#
  22843#									#
  22844# ALGORITHM ***********************************************************	#
  22845#	The default overflow result can be determined by the sign of	#
  22846# the result and the rounding mode/prec in effect. These bits are	#
  22847# concatenated together to create an index into the default result	#
  22848# table. A pointer to the correct result is returned in a0. The		#
  22849# resulting condition codes are returned in d0 in case the caller	#
  22850# doesn't want FPSR_cc altered (as is the case for fmove out).		#
  22851#									#
  22852#########################################################################
  22853
  22854	global		ovf_res
  22855ovf_res:
  22856	andi.w		&0x10,%d1		# keep result sign
  22857	lsr.b		&0x4,%d0		# shift prec/mode
  22858	or.b		%d0,%d1			# concat the two
  22859	mov.w		%d1,%d0			# make a copy
  22860	lsl.b		&0x1,%d1		# multiply d1 by 2
  22861	bra.b		ovf_res_load
  22862
  22863	global		ovf_res2
  22864ovf_res2:
  22865	and.w		&0x10, %d1		# keep result sign
  22866	or.b		%d0, %d1		# insert rnd mode
  22867	swap		%d0
  22868	or.b		%d0, %d1		# insert rnd prec
  22869	mov.w		%d1, %d0		# make a copy
  22870	lsl.b		&0x1, %d1		# shift left by 1
  22871
  22872#
  22873# use the rounding mode, precision, and result sign as in index into the
  22874# two tables below to fetch the default result and the result ccodes.
  22875#
  22876ovf_res_load:
  22877	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
  22878	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
  22879
  22880	rts
  22881
  22882tbl_ovfl_cc:
  22883	byte		0x2, 0x0, 0x0, 0x2
  22884	byte		0x2, 0x0, 0x0, 0x2
  22885	byte		0x2, 0x0, 0x0, 0x2
  22886	byte		0x0, 0x0, 0x0, 0x0
  22887	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
  22888	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
  22889	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
  22890
  22891tbl_ovfl_result:
  22892	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
  22893	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
  22894	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
  22895	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
  22896
  22897	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
  22898	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
  22899	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
  22900	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
  22901
  22902	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
  22903	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
  22904	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
  22905	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
  22906
  22907	long		0x00000000,0x00000000,0x00000000,0x00000000
  22908	long		0x00000000,0x00000000,0x00000000,0x00000000
  22909	long		0x00000000,0x00000000,0x00000000,0x00000000
  22910	long		0x00000000,0x00000000,0x00000000,0x00000000
  22911
  22912	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
  22913	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
  22914	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
  22915	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
  22916
  22917	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
  22918	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
  22919	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
  22920	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
  22921
  22922	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
  22923	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
  22924	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
  22925	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
  22926
  22927#########################################################################
  22928# XDEF ****************************************************************	#
  22929#	get_packed(): fetch a packed operand from memory and then	#
  22930#		      convert it to a floating-point binary number.	#
  22931#									#
  22932# XREF ****************************************************************	#
  22933#	_dcalc_ea() - calculate the correct <ea>			#
  22934#	_mem_read() - fetch the packed operand from memory		#
  22935#	facc_in_x() - the fetch failed so jump to special exit code	#
  22936#	decbin()    - convert packed to binary extended precision	#
  22937#									#
  22938# INPUT ***************************************************************	#
  22939#	None								#
  22940#									#
  22941# OUTPUT **************************************************************	#
  22942#	If no failure on _mem_read():					#
  22943#	FP_SRC(a6) = packed operand now as a binary FP number		#
  22944#									#
  22945# ALGORITHM ***********************************************************	#
  22946#	Get the correct <ea> which is the value on the exception stack	#
  22947# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
  22948# Then, fetch the operand from memory. If the fetch fails, exit		#
  22949# through facc_in_x().							#
  22950#	If the packed operand is a ZERO,NAN, or INF, convert it to	#
  22951# its binary representation here. Else, call decbin() which will	#
  22952# convert the packed value to an extended precision binary value.	#
  22953#									#
  22954#########################################################################
  22955
  22956# the stacked <ea> for packed is correct except for -(An).
  22957# the base reg must be updated for both -(An) and (An)+.
  22958	global		get_packed
  22959get_packed:
  22960	mov.l		&0xc,%d0		# packed is 12 bytes
  22961	bsr.l		_dcalc_ea		# fetch <ea>; correct An
  22962
  22963	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
  22964	mov.l		&0xc,%d0		# pass: 12 bytes
  22965	bsr.l		_dmem_read		# read packed operand
  22966
  22967	tst.l		%d1			# did dfetch fail?
  22968	bne.l		facc_in_x		# yes
  22969
  22970# The packed operand is an INF or a NAN if the exponent field is all ones.
  22971	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
  22972	cmpi.w		%d0,&0x7fff		# INF or NAN?
  22973	bne.b		gp_try_zero		# no
  22974	rts					# operand is an INF or NAN
  22975
  22976# The packed operand is a zero if the mantissa is all zero, else it's
  22977# a normal packed op.
  22978gp_try_zero:
  22979	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
  22980	andi.b		&0x0f,%d0		# clear all but last nybble
  22981	bne.b		gp_not_spec		# not a zero
  22982	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
  22983	bne.b		gp_not_spec		# not a zero
  22984	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
  22985	bne.b		gp_not_spec		# not a zero
  22986	rts					# operand is a ZERO
  22987gp_not_spec:
  22988	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
  22989	bsr.l		decbin			# convert to extended
  22990	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
  22991	rts
  22992
  22993#########################################################################
  22994# decbin(): Converts normalized packed bcd value pointed to by register	#
  22995#	    a0 to extended-precision value in fp0.			#
  22996#									#
  22997# INPUT ***************************************************************	#
  22998#	a0 = pointer to normalized packed bcd value			#
  22999#									#
  23000# OUTPUT **************************************************************	#
  23001#	fp0 = exact fp representation of the packed bcd value.		#
  23002#									#
  23003# ALGORITHM ***********************************************************	#
  23004#	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
  23005#	and NaN operands are dispatched without entering this routine)	#
  23006#	value in 68881/882 format at location (a0).			#
  23007#									#
  23008#	A1. Convert the bcd exponent to binary by successive adds and	#
  23009#	muls. Set the sign according to SE. Subtract 16 to compensate	#
  23010#	for the mantissa which is to be interpreted as 17 integer	#
  23011#	digits, rather than 1 integer and 16 fraction digits.		#
  23012#	Note: this operation can never overflow.			#
  23013#									#
  23014#	A2. Convert the bcd mantissa to binary by successive		#
  23015#	adds and muls in FP0. Set the sign according to SM.		#
  23016#	The mantissa digits will be converted with the decimal point	#
  23017#	assumed following the least-significant digit.			#
  23018#	Note: this operation can never overflow.			#
  23019#									#
  23020#	A3. Count the number of leading/trailing zeros in the		#
  23021#	bcd string.  If SE is positive, count the leading zeros;	#
  23022#	if negative, count the trailing zeros.  Set the adjusted	#
  23023#	exponent equal to the exponent from A1 and the zero count	#
  23024#	added if SM = 1 and subtracted if SM = 0.  Scale the		#
  23025#	mantissa the equivalent of forcing in the bcd value:		#
  23026#									#
  23027#	SM = 0	a non-zero digit in the integer position		#
  23028#	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
  23029#									#
  23030#	this will insure that any value, regardless of its		#
  23031#	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
  23032#	consistently.							#
  23033#									#
  23034#	A4. Calculate the factor 10^exp in FP1 using a table of		#
  23035#	10^(2^n) values.  To reduce the error in forming factors	#
  23036#	greater than 10^27, a directed rounding scheme is used with	#
  23037#	tables rounded to RN, RM, and RP, according to the table	#
  23038#	in the comments of the pwrten section.				#
  23039#									#
  23040#	A5. Form the final binary number by scaling the mantissa by	#
  23041#	the exponent factor.  This is done by multiplying the		#
  23042#	mantissa in FP0 by the factor in FP1 if the adjusted		#
  23043#	exponent sign is positive, and dividing FP0 by FP1 if		#
  23044#	it is negative.							#
  23045#									#
  23046#	Clean up and return. Check if the final mul or div was inexact.	#
  23047#	If so, set INEX1 in USER_FPSR.					#
  23048#									#
  23049#########################################################################
  23050
  23051#
  23052#	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
  23053#	to nearest, minus, and plus, respectively.  The tables include
  23054#	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
  23055#	is required until the power is greater than 27, however, all
  23056#	tables include the first 5 for ease of indexing.
  23057#
  23058RTABLE:
  23059	byte		0,0,0,0
  23060	byte		2,3,2,3
  23061	byte		2,3,3,2
  23062	byte		3,2,2,3
  23063
  23064	set		FNIBS,7
  23065	set		FSTRT,0
  23066
  23067	set		ESTRT,4
  23068	set		EDIGITS,2
  23069
  23070	global		decbin
  23071decbin:
  23072	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
  23073	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
  23074	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
  23075
  23076	lea		FP_SCR0(%a6),%a0
  23077
  23078	movm.l		&0x3c00,-(%sp)		# save d2-d5
  23079	fmovm.x		&0x1,-(%sp)		# save fp1
  23080#
  23081# Calculate exponent:
  23082#  1. Copy bcd value in memory for use as a working copy.
  23083#  2. Calculate absolute value of exponent in d1 by mul and add.
  23084#  3. Correct for exponent sign.
  23085#  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
  23086#     (i.e., all digits assumed left of the decimal point.)
  23087#
  23088# Register usage:
  23089#
  23090#  calc_e:
  23091#	(*)  d0: temp digit storage
  23092#	(*)  d1: accumulator for binary exponent
  23093#	(*)  d2: digit count
  23094#	(*)  d3: offset pointer
  23095#	( )  d4: first word of bcd
  23096#	( )  a0: pointer to working bcd value
  23097#	( )  a6: pointer to original bcd value
  23098#	(*)  FP_SCR1: working copy of original bcd value
  23099#	(*)  L_SCR1: copy of original exponent word
  23100#
  23101calc_e:
  23102	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
  23103	mov.l		&ESTRT,%d3		# counter to pick up digits
  23104	mov.l		(%a0),%d4		# get first word of bcd
  23105	clr.l		%d1			# zero d1 for accumulator
  23106e_gd:
  23107	mulu.l		&0xa,%d1		# mul partial product by one digit place
  23108	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
  23109	add.l		%d0,%d1			# d1 = d1 + d0
  23110	addq.b		&4,%d3			# advance d3 to the next digit
  23111	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
  23112	btst		&30,%d4			# get SE
  23113	beq.b		e_pos			# don't negate if pos
  23114	neg.l		%d1			# negate before subtracting
  23115e_pos:
  23116	sub.l		&16,%d1			# sub to compensate for shift of mant
  23117	bge.b		e_save			# if still pos, do not neg
  23118	neg.l		%d1			# now negative, make pos and set SE
  23119	or.l		&0x40000000,%d4		# set SE in d4,
  23120	or.l		&0x40000000,(%a0)	# and in working bcd
  23121e_save:
  23122	mov.l		%d1,-(%sp)		# save exp on stack
  23123#
  23124#
  23125# Calculate mantissa:
  23126#  1. Calculate absolute value of mantissa in fp0 by mul and add.
  23127#  2. Correct for mantissa sign.
  23128#     (i.e., all digits assumed left of the decimal point.)
  23129#
  23130# Register usage:
  23131#
  23132#  calc_m:
  23133#	(*)  d0: temp digit storage
  23134#	(*)  d1: lword counter
  23135#	(*)  d2: digit count
  23136#	(*)  d3: offset pointer
  23137#	( )  d4: words 2 and 3 of bcd
  23138#	( )  a0: pointer to working bcd value
  23139#	( )  a6: pointer to original bcd value
  23140#	(*) fp0: mantissa accumulator
  23141#	( )  FP_SCR1: working copy of original bcd value
  23142#	( )  L_SCR1: copy of original exponent word
  23143#
  23144calc_m:
  23145	mov.l		&1,%d1			# word counter, init to 1
  23146	fmov.s		&0x00000000,%fp0	# accumulator
  23147#
  23148#
  23149#  Since the packed number has a long word between the first & second parts,
  23150#  get the integer digit then skip down & get the rest of the
  23151#  mantissa.  We will unroll the loop once.
  23152#
  23153	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
  23154	fadd.b		%d0,%fp0		# add digit to sum in fp0
  23155#
  23156#
  23157#  Get the rest of the mantissa.
  23158#
  23159loadlw:
  23160	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
  23161	mov.l		&FSTRT,%d3		# counter to pick up digits
  23162	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
  23163md2b:
  23164	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
  23165	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
  23166	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
  23167#
  23168#
  23169#  If all the digits (8) in that long word have been converted (d2=0),
  23170#  then inc d1 (=2) to point to the next long word and reset d3 to 0
  23171#  to initialize the digit offset, and set d2 to 7 for the digit count;
  23172#  else continue with this long word.
  23173#
  23174	addq.b		&4,%d3			# advance d3 to the next digit
  23175	dbf.w		%d2,md2b		# check for last digit in this lw
  23176nextlw:
  23177	addq.l		&1,%d1			# inc lw pointer in mantissa
  23178	cmp.l		%d1,&2			# test for last lw
  23179	ble.b		loadlw			# if not, get last one
  23180#
  23181#  Check the sign of the mant and make the value in fp0 the same sign.
  23182#
  23183m_sign:
  23184	btst		&31,(%a0)		# test sign of the mantissa
  23185	beq.b		ap_st_z			# if clear, go to append/strip zeros
  23186	fneg.x		%fp0			# if set, negate fp0
  23187#
  23188# Append/strip zeros:
  23189#
  23190#  For adjusted exponents which have an absolute value greater than 27*,
  23191#  this routine calculates the amount needed to normalize the mantissa
  23192#  for the adjusted exponent.  That number is subtracted from the exp
  23193#  if the exp was positive, and added if it was negative.  The purpose
  23194#  of this is to reduce the value of the exponent and the possibility
  23195#  of error in calculation of pwrten.
  23196#
  23197#  1. Branch on the sign of the adjusted exponent.
  23198#  2p.(positive exp)
  23199#   2. Check M16 and the digits in lwords 2 and 3 in descending order.
  23200#   3. Add one for each zero encountered until a non-zero digit.
  23201#   4. Subtract the count from the exp.
  23202#   5. Check if the exp has crossed zero in #3 above; make the exp abs
  23203#	   and set SE.
  23204#	6. Multiply the mantissa by 10**count.
  23205#  2n.(negative exp)
  23206#   2. Check the digits in lwords 3 and 2 in descending order.
  23207#   3. Add one for each zero encountered until a non-zero digit.
  23208#   4. Add the count to the exp.
  23209#   5. Check if the exp has crossed zero in #3 above; clear SE.
  23210#   6. Divide the mantissa by 10**count.
  23211#
  23212#  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
  23213#   any adjustment due to append/strip zeros will drive the resultane
  23214#   exponent towards zero.  Since all pwrten constants with a power
  23215#   of 27 or less are exact, there is no need to use this routine to
  23216#   attempt to lessen the resultant exponent.
  23217#
  23218# Register usage:
  23219#
  23220#  ap_st_z:
  23221#	(*)  d0: temp digit storage
  23222#	(*)  d1: zero count
  23223#	(*)  d2: digit count
  23224#	(*)  d3: offset pointer
  23225#	( )  d4: first word of bcd
  23226#	(*)  d5: lword counter
  23227#	( )  a0: pointer to working bcd value
  23228#	( )  FP_SCR1: working copy of original bcd value
  23229#	( )  L_SCR1: copy of original exponent word
  23230#
  23231#
  23232# First check the absolute value of the exponent to see if this
  23233# routine is necessary.  If so, then check the sign of the exponent
  23234# and do append (+) or strip (-) zeros accordingly.
  23235# This section handles a positive adjusted exponent.
  23236#
  23237ap_st_z:
  23238	mov.l		(%sp),%d1		# load expA for range test
  23239	cmp.l		%d1,&27			# test is with 27
  23240	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
  23241	btst		&30,(%a0)		# check sign of exp
  23242	bne.b		ap_st_n			# if neg, go to neg side
  23243	clr.l		%d1			# zero count reg
  23244	mov.l		(%a0),%d4		# load lword 1 to d4
  23245	bfextu		%d4{&28:&4},%d0		# get M16 in d0
  23246	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
  23247	addq.l		&1,%d1			# inc zero count
  23248	mov.l		&1,%d5			# init lword counter
  23249	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
  23250	bne.b		ap_p_cl			# if lw 2 is zero, skip it
  23251	addq.l		&8,%d1			# and inc count by 8
  23252	addq.l		&1,%d5			# inc lword counter
  23253	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
  23254ap_p_cl:
  23255	clr.l		%d3			# init offset reg
  23256	mov.l		&7,%d2			# init digit counter
  23257ap_p_gd:
  23258	bfextu		%d4{%d3:&4},%d0		# get digit
  23259	bne.b		ap_p_fx			# if non-zero, go to fix exp
  23260	addq.l		&4,%d3			# point to next digit
  23261	addq.l		&1,%d1			# inc digit counter
  23262	dbf.w		%d2,ap_p_gd		# get next digit
  23263ap_p_fx:
  23264	mov.l		%d1,%d0			# copy counter to d2
  23265	mov.l		(%sp),%d1		# get adjusted exp from memory
  23266	sub.l		%d0,%d1			# subtract count from exp
  23267	bge.b		ap_p_fm			# if still pos, go to pwrten
  23268	neg.l		%d1			# now its neg; get abs
  23269	mov.l		(%a0),%d4		# load lword 1 to d4
  23270	or.l		&0x40000000,%d4		# and set SE in d4
  23271	or.l		&0x40000000,(%a0)	# and in memory
  23272#
  23273# Calculate the mantissa multiplier to compensate for the striping of
  23274# zeros from the mantissa.
  23275#
  23276ap_p_fm:
  23277	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
  23278	clr.l		%d3			# init table index
  23279	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
  23280	mov.l		&3,%d2			# init d2 to count bits in counter
  23281ap_p_el:
  23282	asr.l		&1,%d0			# shift lsb into carry
  23283	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
  23284	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
  23285ap_p_en:
  23286	add.l		&12,%d3			# inc d3 to next rtable entry
  23287	tst.l		%d0			# check if d0 is zero
  23288	bne.b		ap_p_el			# if not, get next bit
  23289	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
  23290	bra.b		pwrten			# go calc pwrten
  23291#
  23292# This section handles a negative adjusted exponent.
  23293#
  23294ap_st_n:
  23295	clr.l		%d1			# clr counter
  23296	mov.l		&2,%d5			# set up d5 to point to lword 3
  23297	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
  23298	bne.b		ap_n_cl			# if not zero, check digits
  23299	sub.l		&1,%d5			# dec d5 to point to lword 2
  23300	addq.l		&8,%d1			# inc counter by 8
  23301	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
  23302ap_n_cl:
  23303	mov.l		&28,%d3			# point to last digit
  23304	mov.l		&7,%d2			# init digit counter
  23305ap_n_gd:
  23306	bfextu		%d4{%d3:&4},%d0		# get digit
  23307	bne.b		ap_n_fx			# if non-zero, go to exp fix
  23308	subq.l		&4,%d3			# point to previous digit
  23309	addq.l		&1,%d1			# inc digit counter
  23310	dbf.w		%d2,ap_n_gd		# get next digit
  23311ap_n_fx:
  23312	mov.l		%d1,%d0			# copy counter to d0
  23313	mov.l		(%sp),%d1		# get adjusted exp from memory
  23314	sub.l		%d0,%d1			# subtract count from exp
  23315	bgt.b		ap_n_fm			# if still pos, go fix mantissa
  23316	neg.l		%d1			# take abs of exp and clr SE
  23317	mov.l		(%a0),%d4		# load lword 1 to d4
  23318	and.l		&0xbfffffff,%d4		# and clr SE in d4
  23319	and.l		&0xbfffffff,(%a0)	# and in memory
  23320#
  23321# Calculate the mantissa multiplier to compensate for the appending of
  23322# zeros to the mantissa.
  23323#
  23324ap_n_fm:
  23325	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
  23326	clr.l		%d3			# init table index
  23327	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
  23328	mov.l		&3,%d2			# init d2 to count bits in counter
  23329ap_n_el:
  23330	asr.l		&1,%d0			# shift lsb into carry
  23331	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
  23332	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
  23333ap_n_en:
  23334	add.l		&12,%d3			# inc d3 to next rtable entry
  23335	tst.l		%d0			# check if d0 is zero
  23336	bne.b		ap_n_el			# if not, get next bit
  23337	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
  23338#
  23339#
  23340# Calculate power-of-ten factor from adjusted and shifted exponent.
  23341#
  23342# Register usage:
  23343#
  23344#  pwrten:
  23345#	(*)  d0: temp
  23346#	( )  d1: exponent
  23347#	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
  23348#	(*)  d3: FPCR work copy
  23349#	( )  d4: first word of bcd
  23350#	(*)  a1: RTABLE pointer
  23351#  calc_p:
  23352#	(*)  d0: temp
  23353#	( )  d1: exponent
  23354#	(*)  d3: PWRTxx table index
  23355#	( )  a0: pointer to working copy of bcd
  23356#	(*)  a1: PWRTxx pointer
  23357#	(*) fp1: power-of-ten accumulator
  23358#
  23359# Pwrten calculates the exponent factor in the selected rounding mode
  23360# according to the following table:
  23361#
  23362#	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
  23363#
  23364#	ANY	  ANY	RN	RN
  23365#
  23366#	 +	   +	RP	RP
  23367#	 -	   +	RP	RM
  23368#	 +	   -	RP	RM
  23369#	 -	   -	RP	RP
  23370#
  23371#	 +	   +	RM	RM
  23372#	 -	   +	RM	RP
  23373#	 +	   -	RM	RP
  23374#	 -	   -	RM	RM
  23375#
  23376#	 +	   +	RZ	RM
  23377#	 -	   +	RZ	RM
  23378#	 +	   -	RZ	RP
  23379#	 -	   -	RZ	RP
  23380#
  23381#
  23382pwrten:
  23383	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
  23384	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
  23385	mov.l		(%a0),%d4		# reload 1st bcd word to d4
  23386	asl.l		&2,%d2			# format d2 to be
  23387	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
  23388	add.l		%d0,%d2			# in d2 as index into RTABLE
  23389	lea.l		RTABLE(%pc),%a1		# load rtable base
  23390	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
  23391	clr.l		%d3			# clear d3 to force no exc and extended
  23392	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
  23393	fmov.l		%d3,%fpcr		# write new FPCR
  23394	asr.l		&1,%d0			# write correct PTENxx table
  23395	bcc.b		not_rp			# to a1
  23396	lea.l		PTENRP(%pc),%a1		# it is RP
  23397	bra.b		calc_p			# go to init section
  23398not_rp:
  23399	asr.l		&1,%d0			# keep checking
  23400	bcc.b		not_rm
  23401	lea.l		PTENRM(%pc),%a1		# it is RM
  23402	bra.b		calc_p			# go to init section
  23403not_rm:
  23404	lea.l		PTENRN(%pc),%a1		# it is RN
  23405calc_p:
  23406	mov.l		%d1,%d0			# copy exp to d0;use d0
  23407	bpl.b		no_neg			# if exp is negative,
  23408	neg.l		%d0			# invert it
  23409	or.l		&0x40000000,(%a0)	# and set SE bit
  23410no_neg:
  23411	clr.l		%d3			# table index
  23412	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
  23413e_loop:
  23414	asr.l		&1,%d0			# shift next bit into carry
  23415	bcc.b		e_next			# if zero, skip the mul
  23416	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
  23417e_next:
  23418	add.l		&12,%d3			# inc d3 to next rtable entry
  23419	tst.l		%d0			# check if d0 is zero
  23420	bne.b		e_loop			# not zero, continue shifting
  23421#
  23422#
  23423#  Check the sign of the adjusted exp and make the value in fp0 the
  23424#  same sign. If the exp was pos then multiply fp1*fp0;
  23425#  else divide fp0/fp1.
  23426#
  23427# Register Usage:
  23428#  norm:
  23429#	( )  a0: pointer to working bcd value
  23430#	(*) fp0: mantissa accumulator
  23431#	( ) fp1: scaling factor - 10**(abs(exp))
  23432#
  23433pnorm:
  23434	btst		&30,(%a0)		# test the sign of the exponent
  23435	beq.b		mul			# if clear, go to multiply
  23436div:
  23437	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
  23438	bra.b		end_dec
  23439mul:
  23440	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
  23441#
  23442#
  23443# Clean up and return with result in fp0.
  23444#
  23445# If the final mul/div in decbin incurred an inex exception,
  23446# it will be inex2, but will be reported as inex1 by get_op.
  23447#
  23448end_dec:
  23449	fmov.l		%fpsr,%d0		# get status register
  23450	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
  23451	beq.b		no_exc			# skip this if no exc
  23452	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
  23453no_exc:
  23454	add.l		&0x4,%sp		# clear 1 lw param
  23455	fmovm.x		(%sp)+,&0x40		# restore fp1
  23456	movm.l		(%sp)+,&0x3c		# restore d2-d5
  23457	fmov.l		&0x0,%fpcr
  23458	fmov.l		&0x0,%fpsr
  23459	rts
  23460
  23461#########################################################################
  23462# bindec(): Converts an input in extended precision format to bcd format#
  23463#									#
  23464# INPUT ***************************************************************	#
  23465#	a0 = pointer to the input extended precision value in memory.	#
  23466#	     the input may be either normalized, unnormalized, or	#
  23467#	     denormalized.						#
  23468#	d0 = contains the k-factor sign-extended to 32-bits.		#
  23469#									#
  23470# OUTPUT **************************************************************	#
  23471#	FP_SCR0(a6) = bcd format result on the stack.			#
  23472#									#
  23473# ALGORITHM ***********************************************************	#
  23474#									#
  23475#	A1.	Set RM and size ext;  Set SIGMA = sign of input.	#
  23476#		The k-factor is saved for use in d7. Clear the		#
  23477#		BINDEC_FLG for separating normalized/denormalized	#
  23478#		input.  If input is unnormalized or denormalized,	#
  23479#		normalize it.						#
  23480#									#
  23481#	A2.	Set X = abs(input).					#
  23482#									#
  23483#	A3.	Compute ILOG.						#
  23484#		ILOG is the log base 10 of the input value.  It is	#
  23485#		approximated by adding e + 0.f when the original	#
  23486#		value is viewed as 2^^e * 1.f in extended precision.	#
  23487#		This value is stored in d6.				#
  23488#									#
  23489#	A4.	Clr INEX bit.						#
  23490#		The operation in A3 above may have set INEX2.		#
  23491#									#
  23492#	A5.	Set ICTR = 0;						#
  23493#		ICTR is a flag used in A13.  It must be set before the	#
  23494#		loop entry A6.						#
  23495#									#
  23496#	A6.	Calculate LEN.						#
  23497#		LEN is the number of digits to be displayed.  The	#
  23498#		k-factor can dictate either the total number of digits,	#
  23499#		if it is a positive number, or the number of digits	#
  23500#		after the decimal point which are to be included as	#
  23501#		significant.  See the 68882 manual for examples.	#
  23502#		If LEN is computed to be greater than 17, set OPERR in	#
  23503#		USER_FPSR.  LEN is stored in d4.			#
  23504#									#
  23505#	A7.	Calculate SCALE.					#
  23506#		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
  23507#		of decimal places needed to insure LEN integer digits	#
  23508#		in the output before conversion to bcd. LAMBDA is the	#
  23509#		sign of ISCALE, used in A9. Fp1 contains		#
  23510#		10^^(abs(ISCALE)) using a rounding mode which is a	#
  23511#		function of the original rounding mode and the signs	#
  23512#		of ISCALE and X.  A table is given in the code.		#
  23513#									#
  23514#	A8.	Clr INEX; Force RZ.					#
  23515#		The operation in A3 above may have set INEX2.		#
  23516#		RZ mode is forced for the scaling operation to insure	#
  23517#		only one rounding error.  The grs bits are collected in #
  23518#		the INEX flag for use in A10.				#
  23519#									#
  23520#	A9.	Scale X -> Y.						#
  23521#		The mantissa is scaled to the desired number of		#
  23522#		significant digits.  The excess digits are collected	#
  23523#		in INEX2.						#
  23524#									#
  23525#	A10.	Or in INEX.						#
  23526#		If INEX is set, round error occurred.  This is		#
  23527#		compensated for by 'or-ing' in the INEX2 flag to	#
  23528#		the lsb of Y.						#
  23529#									#
  23530#	A11.	Restore original FPCR; set size ext.			#
  23531#		Perform FINT operation in the user's rounding mode.	#
  23532#		Keep the size to extended.				#
  23533#									#
  23534#	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
  23535#		mode.  The FPSP routine sintd0 is used.  The output	#
  23536#		is in fp0.						#
  23537#									#
  23538#	A13.	Check for LEN digits.					#
  23539#		If the int operation results in more than LEN digits,	#
  23540#		or less than LEN -1 digits, adjust ILOG and repeat from	#
  23541#		A6.  This test occurs only on the first pass.  If the	#
  23542#		result is exactly 10^LEN, decrement ILOG and divide	#
  23543#		the mantissa by 10.					#
  23544#									#
  23545#	A14.	Convert the mantissa to bcd.				#
  23546#		The binstr routine is used to convert the LEN digit	#
  23547#		mantissa to bcd in memory.  The input to binstr is	#
  23548#		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
  23549#		such that the decimal point is to the left of bit 63.	#
  23550#		The bcd digits are stored in the correct position in	#
  23551#		the final string area in memory.			#
  23552#									#
  23553#	A15.	Convert the exponent to bcd.				#
  23554#		As in A14 above, the exp is converted to bcd and the	#
  23555#		digits are stored in the final string.			#
  23556#		Test the length of the final exponent string.  If the	#
  23557#		length is 4, set operr.					#
  23558#									#
  23559#	A16.	Write sign bits to final string.			#
  23560#									#
  23561#########################################################################
  23562
  23563set	BINDEC_FLG,	EXC_TEMP	# DENORM flag
  23564
  23565# Constants in extended precision
  23566PLOG2:
  23567	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
  23568PLOG2UP1:
  23569	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
  23570
  23571# Constants in single precision
  23572FONE:
  23573	long		0x3F800000,0x00000000,0x00000000,0x00000000
  23574FTWO:
  23575	long		0x40000000,0x00000000,0x00000000,0x00000000
  23576FTEN:
  23577	long		0x41200000,0x00000000,0x00000000,0x00000000
  23578F4933:
  23579	long		0x459A2800,0x00000000,0x00000000,0x00000000
  23580
  23581RBDTBL:
  23582	byte		0,0,0,0
  23583	byte		3,3,2,2
  23584	byte		3,2,2,3
  23585	byte		2,3,3,2
  23586
  23587#	Implementation Notes:
  23588#
  23589#	The registers are used as follows:
  23590#
  23591#		d0: scratch; LEN input to binstr
  23592#		d1: scratch
  23593#		d2: upper 32-bits of mantissa for binstr
  23594#		d3: scratch;lower 32-bits of mantissa for binstr
  23595#		d4: LEN
  23596#		d5: LAMBDA/ICTR
  23597#		d6: ILOG
  23598#		d7: k-factor
  23599#		a0: ptr for original operand/final result
  23600#		a1: scratch pointer
  23601#		a2: pointer to FP_X; abs(original value) in ext
  23602#		fp0: scratch
  23603#		fp1: scratch
  23604#		fp2: scratch
  23605#		F_SCR1:
  23606#		F_SCR2:
  23607#		L_SCR1:
  23608#		L_SCR2:
  23609
  23610	global		bindec
  23611bindec:
  23612	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
  23613	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
  23614
  23615# A1. Set RM and size ext. Set SIGMA = sign input;
  23616#     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
  23617#     separating  normalized/denormalized input.  If the input
  23618#     is a denormalized number, set the BINDEC_FLG memory word
  23619#     to signal denorm.  If the input is unnormalized, normalize
  23620#     the input and test for denormalized result.
  23621#
  23622	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
  23623	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
  23624	mov.l		%d0,%d7		# move k-factor to d7
  23625
  23626	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
  23627	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
  23628	bne.w		A2_str		# no; input is a NORM
  23629
  23630#
  23631# Normalize the denorm
  23632#
  23633un_de_norm:
  23634	mov.w		(%a0),%d0
  23635	and.w		&0x7fff,%d0	# strip sign of normalized exp
  23636	mov.l		4(%a0),%d1
  23637	mov.l		8(%a0),%d2
  23638norm_loop:
  23639	sub.w		&1,%d0
  23640	lsl.l		&1,%d2
  23641	roxl.l		&1,%d1
  23642	tst.l		%d1
  23643	bge.b		norm_loop
  23644#
  23645# Test if the normalized input is denormalized
  23646#
  23647	tst.w		%d0
  23648	bgt.b		pos_exp		# if greater than zero, it is a norm
  23649	st		BINDEC_FLG(%a6)	# set flag for denorm
  23650pos_exp:
  23651	and.w		&0x7fff,%d0	# strip sign of normalized exp
  23652	mov.w		%d0,(%a0)
  23653	mov.l		%d1,4(%a0)
  23654	mov.l		%d2,8(%a0)
  23655
  23656# A2. Set X = abs(input).
  23657#
  23658A2_str:
  23659	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
  23660	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
  23661	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
  23662	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
  23663
  23664# A3. Compute ILOG.
  23665#     ILOG is the log base 10 of the input value.  It is approx-
  23666#     imated by adding e + 0.f when the original value is viewed
  23667#     as 2^^e * 1.f in extended precision.  This value is stored
  23668#     in d6.
  23669#
  23670# Register usage:
  23671#	Input/Output
  23672#	d0: k-factor/exponent
  23673#	d2: x/x
  23674#	d3: x/x
  23675#	d4: x/x
  23676#	d5: x/x
  23677#	d6: x/ILOG
  23678#	d7: k-factor/Unchanged
  23679#	a0: ptr for original operand/final result
  23680#	a1: x/x
  23681#	a2: x/x
  23682#	fp0: x/float(ILOG)
  23683#	fp1: x/x
  23684#	fp2: x/x
  23685#	F_SCR1:x/x
  23686#	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
  23687#	L_SCR1:x/x
  23688#	L_SCR2:first word of X packed/Unchanged
  23689
  23690	tst.b		BINDEC_FLG(%a6)	# check for denorm
  23691	beq.b		A3_cont		# if clr, continue with norm
  23692	mov.l		&-4933,%d6	# force ILOG = -4933
  23693	bra.b		A4_str
  23694A3_cont:
  23695	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
  23696	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
  23697	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
  23698	sub.w		&0x3fff,%d0	# strip off bias
  23699	fadd.w		%d0,%fp0	# add in exp
  23700	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
  23701	fbge.w		pos_res		# if pos, branch
  23702	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
  23703	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
  23704	bra.b		A4_str		# go move out ILOG
  23705pos_res:
  23706	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
  23707	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
  23708
  23709
  23710# A4. Clr INEX bit.
  23711#     The operation in A3 above may have set INEX2.
  23712
  23713A4_str:
  23714	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
  23715
  23716
  23717# A5. Set ICTR = 0;
  23718#     ICTR is a flag used in A13.  It must be set before the
  23719#     loop entry A6. The lower word of d5 is used for ICTR.
  23720
  23721	clr.w		%d5		# clear ICTR
  23722
  23723# A6. Calculate LEN.
  23724#     LEN is the number of digits to be displayed.  The k-factor
  23725#     can dictate either the total number of digits, if it is
  23726#     a positive number, or the number of digits after the
  23727#     original decimal point which are to be included as
  23728#     significant.  See the 68882 manual for examples.
  23729#     If LEN is computed to be greater than 17, set OPERR in
  23730#     USER_FPSR.  LEN is stored in d4.
  23731#
  23732# Register usage:
  23733#	Input/Output
  23734#	d0: exponent/Unchanged
  23735#	d2: x/x/scratch
  23736#	d3: x/x
  23737#	d4: exc picture/LEN
  23738#	d5: ICTR/Unchanged
  23739#	d6: ILOG/Unchanged
  23740#	d7: k-factor/Unchanged
  23741#	a0: ptr for original operand/final result
  23742#	a1: x/x
  23743#	a2: x/x
  23744#	fp0: float(ILOG)/Unchanged
  23745#	fp1: x/x
  23746#	fp2: x/x
  23747#	F_SCR1:x/x
  23748#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
  23749#	L_SCR1:x/x
  23750#	L_SCR2:first word of X packed/Unchanged
  23751
  23752A6_str:
  23753	tst.l		%d7		# branch on sign of k
  23754	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
  23755	mov.l		%d7,%d4		# if k > 0, LEN = k
  23756	bra.b		len_ck		# skip to LEN check
  23757k_neg:
  23758	mov.l		%d6,%d4		# first load ILOG to d4
  23759	sub.l		%d7,%d4		# subtract off k
  23760	addq.l		&1,%d4		# add in the 1
  23761len_ck:
  23762	tst.l		%d4		# LEN check: branch on sign of LEN
  23763	ble.b		LEN_ng		# if neg, set LEN = 1
  23764	cmp.l		%d4,&17		# test if LEN > 17
  23765	ble.b		A7_str		# if not, forget it
  23766	mov.l		&17,%d4		# set max LEN = 17
  23767	tst.l		%d7		# if negative, never set OPERR
  23768	ble.b		A7_str		# if positive, continue
  23769	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
  23770	bra.b		A7_str		# finished here
  23771LEN_ng:
  23772	mov.l		&1,%d4		# min LEN is 1
  23773
  23774
  23775# A7. Calculate SCALE.
  23776#     SCALE is equal to 10^ISCALE, where ISCALE is the number
  23777#     of decimal places needed to insure LEN integer digits
  23778#     in the output before conversion to bcd. LAMBDA is the sign
  23779#     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
  23780#     the rounding mode as given in the following table (see
  23781#     Coonen, p. 7.23 as ref.; however, the SCALE variable is
  23782#     of opposite sign in bindec.sa from Coonen).
  23783#
  23784#	Initial					USE
  23785#	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
  23786#	----------------------------------------------
  23787#	 RN	00	   0	   0		00/0	RN
  23788#	 RN	00	   0	   1		00/0	RN
  23789#	 RN	00	   1	   0		00/0	RN
  23790#	 RN	00	   1	   1		00/0	RN
  23791#	 RZ	01	   0	   0		11/3	RP
  23792#	 RZ	01	   0	   1		11/3	RP
  23793#	 RZ	01	   1	   0		10/2	RM
  23794#	 RZ	01	   1	   1		10/2	RM
  23795#	 RM	10	   0	   0		11/3	RP
  23796#	 RM	10	   0	   1		10/2	RM
  23797#	 RM	10	   1	   0		10/2	RM
  23798#	 RM	10	   1	   1		11/3	RP
  23799#	 RP	11	   0	   0		10/2	RM
  23800#	 RP	11	   0	   1		11/3	RP
  23801#	 RP	11	   1	   0		11/3	RP
  23802#	 RP	11	   1	   1		10/2	RM
  23803#
  23804# Register usage:
  23805#	Input/Output
  23806#	d0: exponent/scratch - final is 0
  23807#	d2: x/0 or 24 for A9
  23808#	d3: x/scratch - offset ptr into PTENRM array
  23809#	d4: LEN/Unchanged
  23810#	d5: 0/ICTR:LAMBDA
  23811#	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
  23812#	d7: k-factor/Unchanged
  23813#	a0: ptr for original operand/final result
  23814#	a1: x/ptr to PTENRM array
  23815#	a2: x/x
  23816#	fp0: float(ILOG)/Unchanged
  23817#	fp1: x/10^ISCALE
  23818#	fp2: x/x
  23819#	F_SCR1:x/x
  23820#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
  23821#	L_SCR1:x/x
  23822#	L_SCR2:first word of X packed/Unchanged
  23823
  23824A7_str:
  23825	tst.l		%d7		# test sign of k
  23826	bgt.b		k_pos		# if pos and > 0, skip this
  23827	cmp.l		%d7,%d6		# test k - ILOG
  23828	blt.b		k_pos		# if ILOG >= k, skip this
  23829	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
  23830k_pos:
  23831	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
  23832	addq.l		&1,%d0		# add the 1
  23833	sub.l		%d4,%d0		# sub off LEN
  23834	swap		%d5		# use upper word of d5 for LAMBDA
  23835	clr.w		%d5		# set it zero initially
  23836	clr.w		%d2		# set up d2 for very small case
  23837	tst.l		%d0		# test sign of ISCALE
  23838	bge.b		iscale		# if pos, skip next inst
  23839	addq.w		&1,%d5		# if neg, set LAMBDA true
  23840	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
  23841	bgt.b		no_inf		# if false, skip rest
  23842	add.l		&24,%d0		# add in 24 to iscale
  23843	mov.l		&24,%d2		# put 24 in d2 for A9
  23844no_inf:
  23845	neg.l		%d0		# and take abs of ISCALE
  23846iscale:
  23847	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
  23848	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
  23849	lsl.w		&1,%d1		# put them in bits 2:1
  23850	add.w		%d5,%d1		# add in LAMBDA
  23851	lsl.w		&1,%d1		# put them in bits 3:1
  23852	tst.l		L_SCR2(%a6)	# test sign of original x
  23853	bge.b		x_pos		# if pos, don't set bit 0
  23854	addq.l		&1,%d1		# if neg, set bit 0
  23855x_pos:
  23856	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
  23857	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
  23858	lsl.l		&4,%d3		# put bits in proper position
  23859	fmov.l		%d3,%fpcr	# load bits into fpu
  23860	lsr.l		&4,%d3		# put bits in proper position
  23861	tst.b		%d3		# decode new rmode for pten table
  23862	bne.b		not_rn		# if zero, it is RN
  23863	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
  23864	bra.b		rmode		# exit decode
  23865not_rn:
  23866	lsr.b		&1,%d3		# get lsb in carry
  23867	bcc.b		not_rp2		# if carry clear, it is RM
  23868	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
  23869	bra.b		rmode		# exit decode
  23870not_rp2:
  23871	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
  23872rmode:
  23873	clr.l		%d3		# clr table index
  23874e_loop2:
  23875	lsr.l		&1,%d0		# shift next bit into carry
  23876	bcc.b		e_next2		# if zero, skip the mul
  23877	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
  23878e_next2:
  23879	add.l		&12,%d3		# inc d3 to next pwrten table entry
  23880	tst.l		%d0		# test if ISCALE is zero
  23881	bne.b		e_loop2		# if not, loop
  23882
  23883# A8. Clr INEX; Force RZ.
  23884#     The operation in A3 above may have set INEX2.
  23885#     RZ mode is forced for the scaling operation to insure
  23886#     only one rounding error.  The grs bits are collected in
  23887#     the INEX flag for use in A10.
  23888#
  23889# Register usage:
  23890#	Input/Output
  23891
  23892	fmov.l		&0,%fpsr	# clr INEX
  23893	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
  23894
  23895# A9. Scale X -> Y.
  23896#     The mantissa is scaled to the desired number of significant
  23897#     digits.  The excess digits are collected in INEX2. If mul,
  23898#     Check d2 for excess 10 exponential value.  If not zero,
  23899#     the iscale value would have caused the pwrten calculation
  23900#     to overflow.  Only a negative iscale can cause this, so
  23901#     multiply by 10^(d2), which is now only allowed to be 24,
  23902#     with a multiply by 10^8 and 10^16, which is exact since
  23903#     10^24 is exact.  If the input was denormalized, we must
  23904#     create a busy stack frame with the mul command and the
  23905#     two operands, and allow the fpu to complete the multiply.
  23906#
  23907# Register usage:
  23908#	Input/Output
  23909#	d0: FPCR with RZ mode/Unchanged
  23910#	d2: 0 or 24/unchanged
  23911#	d3: x/x
  23912#	d4: LEN/Unchanged
  23913#	d5: ICTR:LAMBDA
  23914#	d6: ILOG/Unchanged
  23915#	d7: k-factor/Unchanged
  23916#	a0: ptr for original operand/final result
  23917#	a1: ptr to PTENRM array/Unchanged
  23918#	a2: x/x
  23919#	fp0: float(ILOG)/X adjusted for SCALE (Y)
  23920#	fp1: 10^ISCALE/Unchanged
  23921#	fp2: x/x
  23922#	F_SCR1:x/x
  23923#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
  23924#	L_SCR1:x/x
  23925#	L_SCR2:first word of X packed/Unchanged
  23926
  23927A9_str:
  23928	fmov.x		(%a0),%fp0	# load X from memory
  23929	fabs.x		%fp0		# use abs(X)
  23930	tst.w		%d5		# LAMBDA is in lower word of d5
  23931	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
  23932	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
  23933	bra.w		A10_st		# branch to A10
  23934
  23935sc_mul:
  23936	tst.b		BINDEC_FLG(%a6)	# check for denorm
  23937	beq.w		A9_norm		# if norm, continue with mul
  23938
  23939# for DENORM, we must calculate:
  23940#	fp0 = input_op * 10^ISCALE * 10^24
  23941# since the input operand is a DENORM, we can't multiply it directly.
  23942# so, we do the multiplication of the exponents and mantissas separately.
  23943# in this way, we avoid underflow on intermediate stages of the
  23944# multiplication and guarantee a result without exception.
  23945	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
  23946
  23947	mov.w		(%sp),%d3	# grab exponent
  23948	andi.w		&0x7fff,%d3	# clear sign
  23949	ori.w		&0x8000,(%a0)	# make DENORM exp negative
  23950	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
  23951	subi.w		&0x3fff,%d3	# subtract BIAS
  23952	add.w		36(%a1),%d3
  23953	subi.w		&0x3fff,%d3	# subtract BIAS
  23954	add.w		48(%a1),%d3
  23955	subi.w		&0x3fff,%d3	# subtract BIAS
  23956
  23957	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
  23958
  23959	andi.w		&0x8000,(%sp)	# keep sign
  23960	or.w		%d3,(%sp)	# insert new exponent
  23961	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
  23962	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
  23963	mov.l		0x4(%a0),-(%sp)
  23964	mov.l		&0x3fff0000,-(%sp) # force exp to zero
  23965	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
  23966	fmul.x		(%sp)+,%fp0
  23967
  23968#	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
  23969#	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
  23970	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
  23971	mov.l		36+4(%a1),-(%sp)
  23972	mov.l		&0x3fff0000,-(%sp) # force exp to zero
  23973	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
  23974	mov.l		48+4(%a1),-(%sp)
  23975	mov.l		&0x3fff0000,-(%sp)# force exp to zero
  23976	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
  23977	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
  23978	bra.b		A10_st
  23979
  23980sc_mul_err:
  23981	bra.b		sc_mul_err
  23982
  23983A9_norm:
  23984	tst.w		%d2		# test for small exp case
  23985	beq.b		A9_con		# if zero, continue as normal
  23986	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
  23987	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
  23988A9_con:
  23989	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
  23990
  23991# A10. Or in INEX.
  23992#      If INEX is set, round error occurred.  This is compensated
  23993#      for by 'or-ing' in the INEX2 flag to the lsb of Y.
  23994#
  23995# Register usage:
  23996#	Input/Output
  23997#	d0: FPCR with RZ mode/FPSR with INEX2 isolated
  23998#	d2: x/x
  23999#	d3: x/x
  24000#	d4: LEN/Unchanged
  24001#	d5: ICTR:LAMBDA
  24002#	d6: ILOG/Unchanged
  24003#	d7: k-factor/Unchanged
  24004#	a0: ptr for original operand/final result
  24005#	a1: ptr to PTENxx array/Unchanged
  24006#	a2: x/ptr to FP_SCR1(a6)
  24007#	fp0: Y/Y with lsb adjusted
  24008#	fp1: 10^ISCALE/Unchanged
  24009#	fp2: x/x
  24010
  24011A10_st:
  24012	fmov.l		%fpsr,%d0	# get FPSR
  24013	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
  24014	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
  24015	btst		&9,%d0		# check if INEX2 set
  24016	beq.b		A11_st		# if clear, skip rest
  24017	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
  24018	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
  24019
  24020
  24021# A11. Restore original FPCR; set size ext.
  24022#      Perform FINT operation in the user's rounding mode.  Keep
  24023#      the size to extended.  The sintdo entry point in the sint
  24024#      routine expects the FPCR value to be in USER_FPCR for
  24025#      mode and precision.  The original FPCR is saved in L_SCR1.
  24026
  24027A11_st:
  24028	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
  24029	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
  24030#					;block exceptions
  24031
  24032
  24033# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
  24034#      The FPSP routine sintd0 is used.  The output is in fp0.
  24035#
  24036# Register usage:
  24037#	Input/Output
  24038#	d0: FPSR with AINEX cleared/FPCR with size set to ext
  24039#	d2: x/x/scratch
  24040#	d3: x/x
  24041#	d4: LEN/Unchanged
  24042#	d5: ICTR:LAMBDA/Unchanged
  24043#	d6: ILOG/Unchanged
  24044#	d7: k-factor/Unchanged
  24045#	a0: ptr for original operand/src ptr for sintdo
  24046#	a1: ptr to PTENxx array/Unchanged
  24047#	a2: ptr to FP_SCR1(a6)/Unchanged
  24048#	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
  24049#	fp0: Y/YINT
  24050#	fp1: 10^ISCALE/Unchanged
  24051#	fp2: x/x
  24052#	F_SCR1:x/x
  24053#	F_SCR2:Y adjusted for inex/Y with original exponent
  24054#	L_SCR1:x/original USER_FPCR
  24055#	L_SCR2:first word of X packed/Unchanged
  24056
  24057A12_st:
  24058	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
  24059	mov.l	L_SCR1(%a6),-(%sp)
  24060	mov.l	L_SCR2(%a6),-(%sp)
  24061
  24062	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
  24063	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
  24064	tst.l		L_SCR2(%a6)	# test sign of original operand
  24065	bge.b		do_fint12		# if pos, use Y
  24066	or.l		&0x80000000,(%a0)	# if neg, use -Y
  24067do_fint12:
  24068	mov.l	USER_FPSR(%a6),-(%sp)
  24069#	bsr	sintdo		# sint routine returns int in fp0
  24070
  24071	fmov.l	USER_FPCR(%a6),%fpcr
  24072	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
  24073##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
  24074##	andi.l		&0x00000030,%d0
  24075##	fmov.l		%d0,%fpcr
  24076	fint.x		FP_SCR1(%a6),%fp0	# do fint()
  24077	fmov.l	%fpsr,%d0
  24078	or.w	%d0,FPSR_EXCEPT(%a6)
  24079##	fmov.l		&0x0,%fpcr
  24080##	fmov.l		%fpsr,%d0		# don't keep ccodes
  24081##	or.w		%d0,FPSR_EXCEPT(%a6)
  24082
  24083	mov.b	(%sp),USER_FPSR(%a6)
  24084	add.l	&4,%sp
  24085
  24086	mov.l	(%sp)+,L_SCR2(%a6)
  24087	mov.l	(%sp)+,L_SCR1(%a6)
  24088	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
  24089
  24090	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
  24091	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
  24092
  24093# A13. Check for LEN digits.
  24094#      If the int operation results in more than LEN digits,
  24095#      or less than LEN -1 digits, adjust ILOG and repeat from
  24096#      A6.  This test occurs only on the first pass.  If the
  24097#      result is exactly 10^LEN, decrement ILOG and divide
  24098#      the mantissa by 10.  The calculation of 10^LEN cannot
  24099#      be inexact, since all powers of ten up to 10^27 are exact
  24100#      in extended precision, so the use of a previous power-of-ten
  24101#      table will introduce no error.
  24102#
  24103#
  24104# Register usage:
  24105#	Input/Output
  24106#	d0: FPCR with size set to ext/scratch final = 0
  24107#	d2: x/x
  24108#	d3: x/scratch final = x
  24109#	d4: LEN/LEN adjusted
  24110#	d5: ICTR:LAMBDA/LAMBDA:ICTR
  24111#	d6: ILOG/ILOG adjusted
  24112#	d7: k-factor/Unchanged
  24113#	a0: pointer into memory for packed bcd string formation
  24114#	a1: ptr to PTENxx array/Unchanged
  24115#	a2: ptr to FP_SCR1(a6)/Unchanged
  24116#	fp0: int portion of Y/abs(YINT) adjusted
  24117#	fp1: 10^ISCALE/Unchanged
  24118#	fp2: x/10^LEN
  24119#	F_SCR1:x/x
  24120#	F_SCR2:Y with original exponent/Unchanged
  24121#	L_SCR1:original USER_FPCR/Unchanged
  24122#	L_SCR2:first word of X packed/Unchanged
  24123
  24124A13_st:
  24125	swap		%d5		# put ICTR in lower word of d5
  24126	tst.w		%d5		# check if ICTR = 0
  24127	bne		not_zr		# if non-zero, go to second test
  24128#
  24129# Compute 10^(LEN-1)
  24130#
  24131	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
  24132	mov.l		%d4,%d0		# put LEN in d0
  24133	subq.l		&1,%d0		# d0 = LEN -1
  24134	clr.l		%d3		# clr table index
  24135l_loop:
  24136	lsr.l		&1,%d0		# shift next bit into carry
  24137	bcc.b		l_next		# if zero, skip the mul
  24138	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
  24139l_next:
  24140	add.l		&12,%d3		# inc d3 to next pwrten table entry
  24141	tst.l		%d0		# test if LEN is zero
  24142	bne.b		l_loop		# if not, loop
  24143#
  24144# 10^LEN-1 is computed for this test and A14.  If the input was
  24145# denormalized, check only the case in which YINT > 10^LEN.
  24146#
  24147	tst.b		BINDEC_FLG(%a6)	# check if input was norm
  24148	beq.b		A13_con		# if norm, continue with checking
  24149	fabs.x		%fp0		# take abs of YINT
  24150	bra		test_2
  24151#
  24152# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
  24153#
  24154A13_con:
  24155	fabs.x		%fp0		# take abs of YINT
  24156	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
  24157	fbge.w		test_2		# if greater, do next test
  24158	subq.l		&1,%d6		# subtract 1 from ILOG
  24159	mov.w		&1,%d5		# set ICTR
  24160	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
  24161	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
  24162	bra.w		A6_str		# return to A6 and recompute YINT
  24163test_2:
  24164	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
  24165	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
  24166	fblt.w		A14_st		# if less, all is ok, go to A14
  24167	fbgt.w		fix_ex		# if greater, fix and redo
  24168	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
  24169	addq.l		&1,%d6		# and inc ILOG
  24170	bra.b		A14_st		# and continue elsewhere
  24171fix_ex:
  24172	addq.l		&1,%d6		# increment ILOG by 1
  24173	mov.w		&1,%d5		# set ICTR
  24174	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
  24175	bra.w		A6_str		# return to A6 and recompute YINT
  24176#
  24177# Since ICTR <> 0, we have already been through one adjustment,
  24178# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
  24179# 10^LEN is again computed using whatever table is in a1 since the
  24180# value calculated cannot be inexact.
  24181#
  24182not_zr:
  24183	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
  24184	mov.l		%d4,%d0		# put LEN in d0
  24185	clr.l		%d3		# clr table index
  24186z_loop:
  24187	lsr.l		&1,%d0		# shift next bit into carry
  24188	bcc.b		z_next		# if zero, skip the mul
  24189	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
  24190z_next:
  24191	add.l		&12,%d3		# inc d3 to next pwrten table entry
  24192	tst.l		%d0		# test if LEN is zero
  24193	bne.b		z_loop		# if not, loop
  24194	fabs.x		%fp0		# get abs(YINT)
  24195	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
  24196	fbneq.w		A14_st		# if not, skip this
  24197	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
  24198	addq.l		&1,%d6		# and inc ILOG by 1
  24199	addq.l		&1,%d4		# and inc LEN
  24200	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
  24201
  24202# A14. Convert the mantissa to bcd.
  24203#      The binstr routine is used to convert the LEN digit
  24204#      mantissa to bcd in memory.  The input to binstr is
  24205#      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
  24206#      such that the decimal point is to the left of bit 63.
  24207#      The bcd digits are stored in the correct position in
  24208#      the final string area in memory.
  24209#
  24210#
  24211# Register usage:
  24212#	Input/Output
  24213#	d0: x/LEN call to binstr - final is 0
  24214#	d1: x/0
  24215#	d2: x/ms 32-bits of mant of abs(YINT)
  24216#	d3: x/ls 32-bits of mant of abs(YINT)
  24217#	d4: LEN/Unchanged
  24218#	d5: ICTR:LAMBDA/LAMBDA:ICTR
  24219#	d6: ILOG
  24220#	d7: k-factor/Unchanged
  24221#	a0: pointer into memory for packed bcd string formation
  24222#	    /ptr to first mantissa byte in result string
  24223#	a1: ptr to PTENxx array/Unchanged
  24224#	a2: ptr to FP_SCR1(a6)/Unchanged
  24225#	fp0: int portion of Y/abs(YINT) adjusted
  24226#	fp1: 10^ISCALE/Unchanged
  24227#	fp2: 10^LEN/Unchanged
  24228#	F_SCR1:x/Work area for final result
  24229#	F_SCR2:Y with original exponent/Unchanged
  24230#	L_SCR1:original USER_FPCR/Unchanged
  24231#	L_SCR2:first word of X packed/Unchanged
  24232
  24233A14_st:
  24234	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
  24235	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
  24236	lea.l		FP_SCR0(%a6),%a0
  24237	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
  24238	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
  24239	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
  24240	clr.l		4(%a0)		# zero word 2 of FP_RES
  24241	clr.l		8(%a0)		# zero word 3 of FP_RES
  24242	mov.l		(%a0),%d0	# move exponent to d0
  24243	swap		%d0		# put exponent in lower word
  24244	beq.b		no_sft		# if zero, don't shift
  24245	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
  24246	tst.l		%d0		# check if > 1
  24247	bgt.b		no_sft		# if so, don't shift
  24248	neg.l		%d0		# make exp positive
  24249m_loop:
  24250	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
  24251	roxr.l		&1,%d3		# the number of places
  24252	dbf.w		%d0,m_loop	# given in d0
  24253no_sft:
  24254	tst.l		%d2		# check for mantissa of zero
  24255	bne.b		no_zr		# if not, go on
  24256	tst.l		%d3		# continue zero check
  24257	beq.b		zer_m		# if zero, go directly to binstr
  24258no_zr:
  24259	clr.l		%d1		# put zero in d1 for addx
  24260	add.l		&0x00000080,%d3	# inc at bit 7
  24261	addx.l		%d1,%d2		# continue inc
  24262	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
  24263zer_m:
  24264	mov.l		%d4,%d0		# put LEN in d0 for binstr call
  24265	addq.l		&3,%a0		# a0 points to M16 byte in result
  24266	bsr		binstr		# call binstr to convert mant
  24267
  24268
  24269# A15. Convert the exponent to bcd.
  24270#      As in A14 above, the exp is converted to bcd and the
  24271#      digits are stored in the final string.
  24272#
  24273#      Digits are stored in L_SCR1(a6) on return from BINDEC as:
  24274#
  24275#	 32               16 15                0
  24276#	-----------------------------------------
  24277#	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
  24278#	-----------------------------------------
  24279#
  24280# And are moved into their proper places in FP_SCR0.  If digit e4
  24281# is non-zero, OPERR is signaled.  In all cases, all 4 digits are
  24282# written as specified in the 881/882 manual for packed decimal.
  24283#
  24284# Register usage:
  24285#	Input/Output
  24286#	d0: x/LEN call to binstr - final is 0
  24287#	d1: x/scratch (0);shift count for final exponent packing
  24288#	d2: x/ms 32-bits of exp fraction/scratch
  24289#	d3: x/ls 32-bits of exp fraction
  24290#	d4: LEN/Unchanged
  24291#	d5: ICTR:LAMBDA/LAMBDA:ICTR
  24292#	d6: ILOG
  24293#	d7: k-factor/Unchanged
  24294#	a0: ptr to result string/ptr to L_SCR1(a6)
  24295#	a1: ptr to PTENxx array/Unchanged
  24296#	a2: ptr to FP_SCR1(a6)/Unchanged
  24297#	fp0: abs(YINT) adjusted/float(ILOG)
  24298#	fp1: 10^ISCALE/Unchanged
  24299#	fp2: 10^LEN/Unchanged
  24300#	F_SCR1:Work area for final result/BCD result
  24301#	F_SCR2:Y with original exponent/ILOG/10^4
  24302#	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
  24303#	L_SCR2:first word of X packed/Unchanged
  24304
  24305A15_st:
  24306	tst.b		BINDEC_FLG(%a6)	# check for denorm
  24307	beq.b		not_denorm
  24308	ftest.x		%fp0		# test for zero
  24309	fbeq.w		den_zero	# if zero, use k-factor or 4933
  24310	fmov.l		%d6,%fp0	# float ILOG
  24311	fabs.x		%fp0		# get abs of ILOG
  24312	bra.b		convrt
  24313den_zero:
  24314	tst.l		%d7		# check sign of the k-factor
  24315	blt.b		use_ilog	# if negative, use ILOG
  24316	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
  24317	bra.b		convrt		# do it
  24318use_ilog:
  24319	fmov.l		%d6,%fp0	# float ILOG
  24320	fabs.x		%fp0		# get abs of ILOG
  24321	bra.b		convrt
  24322not_denorm:
  24323	ftest.x		%fp0		# test for zero
  24324	fbneq.w		not_zero	# if zero, force exponent
  24325	fmov.s		FONE(%pc),%fp0	# force exponent to 1
  24326	bra.b		convrt		# do it
  24327not_zero:
  24328	fmov.l		%d6,%fp0	# float ILOG
  24329	fabs.x		%fp0		# get abs of ILOG
  24330convrt:
  24331	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
  24332	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
  24333	mov.l		4(%a2),%d2	# move word 2 to d2
  24334	mov.l		8(%a2),%d3	# move word 3 to d3
  24335	mov.w		(%a2),%d0	# move exp to d0
  24336	beq.b		x_loop_fin	# if zero, skip the shift
  24337	sub.w		&0x3ffd,%d0	# subtract off bias
  24338	neg.w		%d0		# make exp positive
  24339x_loop:
  24340	lsr.l		&1,%d2		# shift d2:d3 right
  24341	roxr.l		&1,%d3		# the number of places
  24342	dbf.w		%d0,x_loop	# given in d0
  24343x_loop_fin:
  24344	clr.l		%d1		# put zero in d1 for addx
  24345	add.l		&0x00000080,%d3	# inc at bit 6
  24346	addx.l		%d1,%d2		# continue inc
  24347	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
  24348	mov.l		&4,%d0		# put 4 in d0 for binstr call
  24349	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
  24350	bsr		binstr		# call binstr to convert exp
  24351	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
  24352	mov.l		&12,%d1		# use d1 for shift count
  24353	lsr.l		%d1,%d0		# shift d0 right by 12
  24354	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
  24355	lsr.l		%d1,%d0		# shift d0 right by 12
  24356	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
  24357	tst.b		%d0		# check if e4 is zero
  24358	beq.b		A16_st		# if zero, skip rest
  24359	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
  24360
  24361
  24362# A16. Write sign bits to final string.
  24363#	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
  24364#
  24365# Register usage:
  24366#	Input/Output
  24367#	d0: x/scratch - final is x
  24368#	d2: x/x
  24369#	d3: x/x
  24370#	d4: LEN/Unchanged
  24371#	d5: ICTR:LAMBDA/LAMBDA:ICTR
  24372#	d6: ILOG/ILOG adjusted
  24373#	d7: k-factor/Unchanged
  24374#	a0: ptr to L_SCR1(a6)/Unchanged
  24375#	a1: ptr to PTENxx array/Unchanged
  24376#	a2: ptr to FP_SCR1(a6)/Unchanged
  24377#	fp0: float(ILOG)/Unchanged
  24378#	fp1: 10^ISCALE/Unchanged
  24379#	fp2: 10^LEN/Unchanged
  24380#	F_SCR1:BCD result with correct signs
  24381#	F_SCR2:ILOG/10^4
  24382#	L_SCR1:Exponent digits on return from binstr
  24383#	L_SCR2:first word of X packed/Unchanged
  24384
  24385A16_st:
  24386	clr.l		%d0		# clr d0 for collection of signs
  24387	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
  24388	tst.l		L_SCR2(%a6)	# check sign of original mantissa
  24389	bge.b		mant_p		# if pos, don't set SM
  24390	mov.l		&2,%d0		# move 2 in to d0 for SM
  24391mant_p:
  24392	tst.l		%d6		# check sign of ILOG
  24393	bge.b		wr_sgn		# if pos, don't set SE
  24394	addq.l		&1,%d0		# set bit 0 in d0 for SE
  24395wr_sgn:
  24396	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
  24397
  24398# Clean up and restore all registers used.
  24399
  24400	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
  24401	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
  24402	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
  24403	rts
  24404
  24405	global		PTENRN
  24406PTENRN:
  24407	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
  24408	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
  24409	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
  24410	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
  24411	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
  24412	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
  24413	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
  24414	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
  24415	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
  24416	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
  24417	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
  24418	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
  24419	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
  24420
  24421	global		PTENRP
  24422PTENRP:
  24423	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
  24424	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
  24425	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
  24426	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
  24427	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
  24428	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
  24429	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
  24430	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
  24431	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
  24432	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
  24433	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
  24434	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
  24435	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
  24436
  24437	global		PTENRM
  24438PTENRM:
  24439	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
  24440	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
  24441	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
  24442	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
  24443	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
  24444	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
  24445	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
  24446	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
  24447	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
  24448	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
  24449	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
  24450	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
  24451	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
  24452
  24453#########################################################################
  24454# binstr(): Converts a 64-bit binary integer to bcd.			#
  24455#									#
  24456# INPUT *************************************************************** #
  24457#	d2:d3 = 64-bit binary integer					#
  24458#	d0    = desired length (LEN)					#
  24459#	a0    = pointer to start in memory for bcd characters		#
  24460#		(This pointer must point to byte 4 of the first		#
  24461#		 lword of the packed decimal memory string.)		#
  24462#									#
  24463# OUTPUT ************************************************************** #
  24464#	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
  24465#									#
  24466# ALGORITHM ***********************************************************	#
  24467#	The 64-bit binary is assumed to have a decimal point before	#
  24468#	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
  24469#	shift and a mul by 8 shift.  The bits shifted out of the	#
  24470#	msb form a decimal digit.  This process is iterated until	#
  24471#	LEN digits are formed.						#
  24472#									#
  24473# A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
  24474#     digit formed will be assumed the least significant.  This is	#
  24475#     to force the first byte formed to have a 0 in the upper 4 bits.	#
  24476#									#
  24477# A2. Beginning of the loop:						#
  24478#     Copy the fraction in d2:d3 to d4:d5.				#
  24479#									#
  24480# A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
  24481#     extracts and shifts.  The three msbs from d2 will go into d1.	#
  24482#									#
  24483# A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
  24484#     will be collected by the carry.					#
  24485#									#
  24486# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
  24487#     into d2:d3.  D1 will contain the bcd digit formed.		#
  24488#									#
  24489# A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
  24490#     zero, it is the ls digit.  Put the digit in its place in the	#
  24491#     upper word of d0.  If it is the ls digit, write the word		#
  24492#     from d0 to memory.						#
  24493#									#
  24494# A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
  24495#									#
  24496#########################################################################
  24497
  24498#	Implementation Notes:
  24499#
  24500#	The registers are used as follows:
  24501#
  24502#		d0: LEN counter
  24503#		d1: temp used to form the digit
  24504#		d2: upper 32-bits of fraction for mul by 8
  24505#		d3: lower 32-bits of fraction for mul by 8
  24506#		d4: upper 32-bits of fraction for mul by 2
  24507#		d5: lower 32-bits of fraction for mul by 2
  24508#		d6: temp for bit-field extracts
  24509#		d7: byte digit formation word;digit count {0,1}
  24510#		a0: pointer into memory for packed bcd string formation
  24511#
  24512
  24513	global		binstr
  24514binstr:
  24515	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
  24516
  24517#
  24518# A1: Init d7
  24519#
  24520	mov.l		&1,%d7		# init d7 for second digit
  24521	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
  24522#
  24523# A2. Copy d2:d3 to d4:d5.  Start loop.
  24524#
  24525loop:
  24526	mov.l		%d2,%d4		# copy the fraction before muls
  24527	mov.l		%d3,%d5		# to d4:d5
  24528#
  24529# A3. Multiply d2:d3 by 8; extract msbs into d1.
  24530#
  24531	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
  24532	asl.l		&3,%d2		# shift d2 left by 3 places
  24533	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
  24534	asl.l		&3,%d3		# shift d3 left by 3 places
  24535	or.l		%d6,%d2		# or in msbs from d3 into d2
  24536#
  24537# A4. Multiply d4:d5 by 2; add carry out to d1.
  24538#
  24539	asl.l		&1,%d5		# mul d5 by 2
  24540	roxl.l		&1,%d4		# mul d4 by 2
  24541	swap		%d6		# put 0 in d6 lower word
  24542	addx.w		%d6,%d1		# add in extend from mul by 2
  24543#
  24544# A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
  24545#
  24546	add.l		%d5,%d3		# add lower 32 bits
  24547	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
  24548	addx.l		%d4,%d2		# add with extend upper 32 bits
  24549	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
  24550	addx.w		%d6,%d1		# add in extend from add to d1
  24551	swap		%d6		# with d6 = 0; put 0 in upper word
  24552#
  24553# A6. Test d7 and branch.
  24554#
  24555	tst.w		%d7		# if zero, store digit & to loop
  24556	beq.b		first_d		# if non-zero, form byte & write
  24557sec_d:
  24558	swap		%d7		# bring first digit to word d7b
  24559	asl.w		&4,%d7		# first digit in upper 4 bits d7b
  24560	add.w		%d1,%d7		# add in ls digit to d7b
  24561	mov.b		%d7,(%a0)+	# store d7b byte in memory
  24562	swap		%d7		# put LEN counter in word d7a
  24563	clr.w		%d7		# set d7a to signal no digits done
  24564	dbf.w		%d0,loop	# do loop some more!
  24565	bra.b		end_bstr	# finished, so exit
  24566first_d:
  24567	swap		%d7		# put digit word in d7b
  24568	mov.w		%d1,%d7		# put new digit in d7b
  24569	swap		%d7		# put LEN counter in word d7a
  24570	addq.w		&1,%d7		# set d7a to signal first digit done
  24571	dbf.w		%d0,loop	# do loop some more!
  24572	swap		%d7		# put last digit in string
  24573	lsl.w		&4,%d7		# move it to upper 4 bits
  24574	mov.b		%d7,(%a0)+	# store it in memory string
  24575#
  24576# Clean up and return with result in fp0.
  24577#
  24578end_bstr:
  24579	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
  24580	rts
  24581
  24582#########################################################################
  24583# XDEF ****************************************************************	#
  24584#	facc_in_b(): dmem_read_byte failed				#
  24585#	facc_in_w(): dmem_read_word failed				#
  24586#	facc_in_l(): dmem_read_long failed				#
  24587#	facc_in_d(): dmem_read of dbl prec failed			#
  24588#	facc_in_x(): dmem_read of ext prec failed			#
  24589#									#
  24590#	facc_out_b(): dmem_write_byte failed				#
  24591#	facc_out_w(): dmem_write_word failed				#
  24592#	facc_out_l(): dmem_write_long failed				#
  24593#	facc_out_d(): dmem_write of dbl prec failed			#
  24594#	facc_out_x(): dmem_write of ext prec failed			#
  24595#									#
  24596# XREF ****************************************************************	#
  24597#	_real_access() - exit through access error handler		#
  24598#									#
  24599# INPUT ***************************************************************	#
  24600#	None								#
  24601#									#
  24602# OUTPUT **************************************************************	#
  24603#	None								#
  24604#									#
  24605# ALGORITHM ***********************************************************	#
  24606#	Flow jumps here when an FP data fetch call gets an error	#
  24607# result. This means the operating system wants an access error frame	#
  24608# made out of the current exception stack frame.			#
  24609#	So, we first call restore() which makes sure that any updated	#
  24610# -(an)+ register gets returned to its pre-exception value and then	#
  24611# we change the stack to an access error stack frame.			#
  24612#									#
  24613#########################################################################
  24614
  24615facc_in_b:
  24616	movq.l		&0x1,%d0			# one byte
  24617	bsr.w		restore				# fix An
  24618
  24619	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
  24620	bra.w		facc_finish
  24621
  24622facc_in_w:
  24623	movq.l		&0x2,%d0			# two bytes
  24624	bsr.w		restore				# fix An
  24625
  24626	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
  24627	bra.b		facc_finish
  24628
  24629facc_in_l:
  24630	movq.l		&0x4,%d0			# four bytes
  24631	bsr.w		restore				# fix An
  24632
  24633	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
  24634	bra.b		facc_finish
  24635
  24636facc_in_d:
  24637	movq.l		&0x8,%d0			# eight bytes
  24638	bsr.w		restore				# fix An
  24639
  24640	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
  24641	bra.b		facc_finish
  24642
  24643facc_in_x:
  24644	movq.l		&0xc,%d0			# twelve bytes
  24645	bsr.w		restore				# fix An
  24646
  24647	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
  24648	bra.b		facc_finish
  24649
  24650################################################################
  24651
  24652facc_out_b:
  24653	movq.l		&0x1,%d0			# one byte
  24654	bsr.w		restore				# restore An
  24655
  24656	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
  24657	bra.b		facc_finish
  24658
  24659facc_out_w:
  24660	movq.l		&0x2,%d0			# two bytes
  24661	bsr.w		restore				# restore An
  24662
  24663	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
  24664	bra.b		facc_finish
  24665
  24666facc_out_l:
  24667	movq.l		&0x4,%d0			# four bytes
  24668	bsr.w		restore				# restore An
  24669
  24670	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
  24671	bra.b		facc_finish
  24672
  24673facc_out_d:
  24674	movq.l		&0x8,%d0			# eight bytes
  24675	bsr.w		restore				# restore An
  24676
  24677	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
  24678	bra.b		facc_finish
  24679
  24680facc_out_x:
  24681	mov.l		&0xc,%d0			# twelve bytes
  24682	bsr.w		restore				# restore An
  24683
  24684	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
  24685
  24686# here's where we actually create the access error frame from the
  24687# current exception stack frame.
  24688facc_finish:
  24689	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
  24690
  24691	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
  24692	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
  24693	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
  24694
  24695	unlk		%a6
  24696
  24697	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
  24698	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
  24699	mov.l		0xc(%sp),0x8(%sp)	# store EA
  24700	mov.l		&0x00000001,0xc(%sp)	# store FSLW
  24701	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
  24702	mov.w		&0x4008,0x6(%sp)	# store voff
  24703
  24704	btst		&0x5,(%sp)		# supervisor or user mode?
  24705	beq.b		facc_out2		# user
  24706	bset		&0x2,0xd(%sp)		# set supervisor TM bit
  24707
  24708facc_out2:
  24709	bra.l		_real_access
  24710
  24711##################################################################
  24712
  24713# if the effective addressing mode was predecrement or postincrement,
  24714# the emulation has already changed its value to the correct post-
  24715# instruction value. but since we're exiting to the access error
  24716# handler, then AN must be returned to its pre-instruction value.
  24717# we do that here.
  24718restore:
  24719	mov.b		EXC_OPWORD+0x1(%a6),%d1
  24720	andi.b		&0x38,%d1		# extract opmode
  24721	cmpi.b		%d1,&0x18		# postinc?
  24722	beq.w		rest_inc
  24723	cmpi.b		%d1,&0x20		# predec?
  24724	beq.w		rest_dec
  24725	rts
  24726
  24727rest_inc:
  24728	mov.b		EXC_OPWORD+0x1(%a6),%d1
  24729	andi.w		&0x0007,%d1		# fetch An
  24730
  24731	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
  24732	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
  24733
  24734tbl_rest_inc:
  24735	short		ri_a0 - tbl_rest_inc
  24736	short		ri_a1 - tbl_rest_inc
  24737	short		ri_a2 - tbl_rest_inc
  24738	short		ri_a3 - tbl_rest_inc
  24739	short		ri_a4 - tbl_rest_inc
  24740	short		ri_a5 - tbl_rest_inc
  24741	short		ri_a6 - tbl_rest_inc
  24742	short		ri_a7 - tbl_rest_inc
  24743
  24744ri_a0:
  24745	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
  24746	rts
  24747ri_a1:
  24748	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
  24749	rts
  24750ri_a2:
  24751	sub.l		%d0,%a2			# fix a2
  24752	rts
  24753ri_a3:
  24754	sub.l		%d0,%a3			# fix a3
  24755	rts
  24756ri_a4:
  24757	sub.l		%d0,%a4			# fix a4
  24758	rts
  24759ri_a5:
  24760	sub.l		%d0,%a5			# fix a5
  24761	rts
  24762ri_a6:
  24763	sub.l		%d0,(%a6)		# fix stacked a6
  24764	rts
  24765# if it's a fmove out instruction, we don't have to fix a7
  24766# because we hadn't changed it yet. if it's an opclass two
  24767# instruction (data moved in) and the exception was in supervisor
  24768# mode, then also also wasn't updated. if it was user mode, then
  24769# restore the correct a7 which is in the USP currently.
  24770ri_a7:
  24771	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
  24772	bne.b		ri_a7_done		# out
  24773
  24774	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
  24775	bne.b		ri_a7_done		# supervisor
  24776	movc		%usp,%a0		# restore USP
  24777	sub.l		%d0,%a0
  24778	movc		%a0,%usp
  24779ri_a7_done:
  24780	rts
  24781
  24782# need to invert adjustment value if the <ea> was predec
  24783rest_dec:
  24784	neg.l		%d0
  24785	bra.b		rest_inc