cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

res_func.S (53177B)


      1|
      2|	res_func.sa 3.9 7/29/91
      3|
      4| Normalizes denormalized numbers if necessary and updates the
      5| stack frame.  The function is then restored back into the
      6| machine and the 040 completes the operation.  This routine
      7| is only used by the unsupported data type/format handler.
      8| (Exception vector 55).
      9|
     10| For packed move out (fmove.p fpm,<ea>) the operation is
     11| completed here; data is packed and moved to user memory.
     12| The stack is restored to the 040 only in the case of a
     13| reportable exception in the conversion.
     14|
     15|
     16|		Copyright (C) Motorola, Inc. 1990
     17|			All Rights Reserved
     18|
     19|       For details on the license for this file, please see the
     20|       file, README, in this same directory.
     21
     22RES_FUNC:    |idnt    2,1 | Motorola 040 Floating Point Software Package
     23
     24	|section	8
     25
     26#include "fpsp.h"
     27
     28sp_bnds:	.short	0x3f81,0x407e
     29		.short	0x3f6a,0x0000
     30dp_bnds:	.short	0x3c01,0x43fe
     31		.short	0x3bcd,0x0000
     32
     33	|xref	mem_write
     34	|xref	bindec
     35	|xref	get_fline
     36	|xref	round
     37	|xref	denorm
     38	|xref	dest_ext
     39	|xref	dest_dbl
     40	|xref	dest_sgl
     41	|xref	unf_sub
     42	|xref	nrm_set
     43	|xref	dnrm_lp
     44	|xref	ovf_res
     45	|xref	reg_dest
     46	|xref	t_ovfl
     47	|xref	t_unfl
     48
     49	.global	res_func
     50	.global	p_move
     51
     52res_func:
     53	clrb	DNRM_FLG(%a6)
     54	clrb	RES_FLG(%a6)
     55	clrb	CU_ONLY(%a6)
     56	tstb	DY_MO_FLG(%a6)
     57	beqs	monadic
     58dyadic:
     59	btstb	#7,DTAG(%a6)	|if dop = norm=000, zero=001,
     60|				;inf=010 or nan=011
     61	beqs	monadic		|then branch
     62|				;else denorm
     63| HANDLE DESTINATION DENORM HERE
     64|				;set dtag to norm
     65|				;write the tag & fpte15 to the fstack
     66	leal	FPTEMP(%a6),%a0
     67
     68	bclrb	#sign_bit,LOCAL_EX(%a0)
     69	sne	LOCAL_SGN(%a0)
     70
     71	bsr	nrm_set		|normalize number (exp will go negative)
     72	bclrb	#sign_bit,LOCAL_EX(%a0) |get rid of false sign
     73	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
     74	beqs	dpos
     75	bsetb	#sign_bit,LOCAL_EX(%a0)
     76dpos:
     77	bfclr	DTAG(%a6){#0:#4}	|set tag to normalized, FPTE15 = 0
     78	bsetb	#4,DTAG(%a6)	|set FPTE15
     79	orb	#0x0f,DNRM_FLG(%a6)
     80monadic:
     81	leal	ETEMP(%a6),%a0
     82	btstb	#direction_bit,CMDREG1B(%a6)	|check direction
     83	bne	opclass3			|it is a mv out
     84|
     85| At this point, only opclass 0 and 2 possible
     86|
     87	btstb	#7,STAG(%a6)	|if sop = norm=000, zero=001,
     88|				;inf=010 or nan=011
     89	bne	mon_dnrm	|else denorm
     90	tstb	DY_MO_FLG(%a6)	|all cases of dyadic instructions would
     91	bne	normal		|require normalization of denorm
     92
     93| At this point:
     94|	monadic instructions:	fabs  = $18  fneg   = $1a  ftst   = $3a
     95|				fmove = $00  fsmove = $40  fdmove = $44
     96|				fsqrt = $05* fssqrt = $41  fdsqrt = $45
     97|				(*fsqrt reencoded to $05)
     98|
     99	movew	CMDREG1B(%a6),%d0	|get command register
    100	andil	#0x7f,%d0			|strip to only command word
    101|
    102| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
    103| fdsqrt are possible.
    104| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
    105| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
    106|
    107	btstl	#0,%d0
    108	bne	normal			|weed out fsqrt instructions
    109|
    110| cu_norm handles fmove in instructions with normalized inputs.
    111| The routine round is used to correctly round the input for the
    112| destination precision and mode.
    113|
    114cu_norm:
    115	st	CU_ONLY(%a6)		|set cu-only inst flag
    116	movew	CMDREG1B(%a6),%d0
    117	andib	#0x3b,%d0		|isolate bits to select inst
    118	tstb	%d0
    119	beql	cu_nmove	|if zero, it is an fmove
    120	cmpib	#0x18,%d0
    121	beql	cu_nabs		|if $18, it is fabs
    122	cmpib	#0x1a,%d0
    123	beql	cu_nneg		|if $1a, it is fneg
    124|
    125| Inst is ftst.  Check the source operand and set the cc's accordingly.
    126| No write is done, so simply rts.
    127|
    128cu_ntst:
    129	movew	LOCAL_EX(%a0),%d0
    130	bclrl	#15,%d0
    131	sne	LOCAL_SGN(%a0)
    132	beqs	cu_ntpo
    133	orl	#neg_mask,USER_FPSR(%a6) |set N
    134cu_ntpo:
    135	cmpiw	#0x7fff,%d0	|test for inf/nan
    136	bnes	cu_ntcz
    137	tstl	LOCAL_HI(%a0)
    138	bnes	cu_ntn
    139	tstl	LOCAL_LO(%a0)
    140	bnes	cu_ntn
    141	orl	#inf_mask,USER_FPSR(%a6)
    142	rts
    143cu_ntn:
    144	orl	#nan_mask,USER_FPSR(%a6)
    145	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
    146|						;snan handler
    147
    148	rts
    149cu_ntcz:
    150	tstl	LOCAL_HI(%a0)
    151	bnel	cu_ntsx
    152	tstl	LOCAL_LO(%a0)
    153	bnel	cu_ntsx
    154	orl	#z_mask,USER_FPSR(%a6)
    155cu_ntsx:
    156	rts
    157|
    158| Inst is fabs.  Execute the absolute value function on the input.
    159| Branch to the fmove code.  If the operand is NaN, do nothing.
    160|
    161cu_nabs:
    162	moveb	STAG(%a6),%d0
    163	btstl	#5,%d0			|test for NaN or zero
    164	bne	wr_etemp		|if either, simply write it
    165	bclrb	#7,LOCAL_EX(%a0)		|do abs
    166	bras	cu_nmove		|fmove code will finish
    167|
    168| Inst is fneg.  Execute the negate value function on the input.
    169| Fall though to the fmove code.  If the operand is NaN, do nothing.
    170|
    171cu_nneg:
    172	moveb	STAG(%a6),%d0
    173	btstl	#5,%d0			|test for NaN or zero
    174	bne	wr_etemp		|if either, simply write it
    175	bchgb	#7,LOCAL_EX(%a0)		|do neg
    176|
    177| Inst is fmove.  This code also handles all result writes.
    178| If bit 2 is set, round is forced to double.  If it is clear,
    179| and bit 6 is set, round is forced to single.  If both are clear,
    180| the round precision is found in the fpcr.  If the rounding precision
    181| is double or single, round the result before the write.
    182|
    183cu_nmove:
    184	moveb	STAG(%a6),%d0
    185	andib	#0xe0,%d0			|isolate stag bits
    186	bne	wr_etemp		|if not norm, simply write it
    187	btstb	#2,CMDREG1B+1(%a6)	|check for rd
    188	bne	cu_nmrd
    189	btstb	#6,CMDREG1B+1(%a6)	|check for rs
    190	bne	cu_nmrs
    191|
    192| The move or operation is not with forced precision.  Test for
    193| nan or inf as the input; if so, simply write it to FPn.  Use the
    194| FPCR_MODE byte to get rounding on norms and zeros.
    195|
    196cu_nmnr:
    197	bfextu	FPCR_MODE(%a6){#0:#2},%d0
    198	tstb	%d0			|check for extended
    199	beq	cu_wrexn		|if so, just write result
    200	cmpib	#1,%d0			|check for single
    201	beq	cu_nmrs			|fall through to double
    202|
    203| The move is fdmove or round precision is double.
    204|
    205cu_nmrd:
    206	movel	#2,%d0			|set up the size for denorm
    207	movew	LOCAL_EX(%a0),%d1		|compare exponent to double threshold
    208	andw	#0x7fff,%d1
    209	cmpw	#0x3c01,%d1
    210	bls	cu_nunfl
    211	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|get rmode
    212	orl	#0x00020000,%d1		|or in rprec (double)
    213	clrl	%d0			|clear g,r,s for round
    214	bclrb	#sign_bit,LOCAL_EX(%a0)	|convert to internal format
    215	sne	LOCAL_SGN(%a0)
    216	bsrl	round
    217	bfclr	LOCAL_SGN(%a0){#0:#8}
    218	beqs	cu_nmrdc
    219	bsetb	#sign_bit,LOCAL_EX(%a0)
    220cu_nmrdc:
    221	movew	LOCAL_EX(%a0),%d1		|check for overflow
    222	andw	#0x7fff,%d1
    223	cmpw	#0x43ff,%d1
    224	bge	cu_novfl		|take care of overflow case
    225	bra	cu_wrexn
    226|
    227| The move is fsmove or round precision is single.
    228|
    229cu_nmrs:
    230	movel	#1,%d0
    231	movew	LOCAL_EX(%a0),%d1
    232	andw	#0x7fff,%d1
    233	cmpw	#0x3f81,%d1
    234	bls	cu_nunfl
    235	bfextu	FPCR_MODE(%a6){#2:#2},%d1
    236	orl	#0x00010000,%d1
    237	clrl	%d0
    238	bclrb	#sign_bit,LOCAL_EX(%a0)
    239	sne	LOCAL_SGN(%a0)
    240	bsrl	round
    241	bfclr	LOCAL_SGN(%a0){#0:#8}
    242	beqs	cu_nmrsc
    243	bsetb	#sign_bit,LOCAL_EX(%a0)
    244cu_nmrsc:
    245	movew	LOCAL_EX(%a0),%d1
    246	andw	#0x7FFF,%d1
    247	cmpw	#0x407f,%d1
    248	blt	cu_wrexn
    249|
    250| The operand is above precision boundaries.  Use t_ovfl to
    251| generate the correct value.
    252|
    253cu_novfl:
    254	bsr	t_ovfl
    255	bra	cu_wrexn
    256|
    257| The operand is below precision boundaries.  Use denorm to
    258| generate the correct value.
    259|
    260cu_nunfl:
    261	bclrb	#sign_bit,LOCAL_EX(%a0)
    262	sne	LOCAL_SGN(%a0)
    263	bsr	denorm
    264	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
    265	beqs	cu_nucont
    266	bsetb	#sign_bit,LOCAL_EX(%a0)
    267cu_nucont:
    268	bfextu	FPCR_MODE(%a6){#2:#2},%d1
    269	btstb	#2,CMDREG1B+1(%a6)	|check for rd
    270	bne	inst_d
    271	btstb	#6,CMDREG1B+1(%a6)	|check for rs
    272	bne	inst_s
    273	swap	%d1
    274	moveb	FPCR_MODE(%a6),%d1
    275	lsrb	#6,%d1
    276	swap	%d1
    277	bra	inst_sd
    278inst_d:
    279	orl	#0x00020000,%d1
    280	bra	inst_sd
    281inst_s:
    282	orl	#0x00010000,%d1
    283inst_sd:
    284	bclrb	#sign_bit,LOCAL_EX(%a0)
    285	sne	LOCAL_SGN(%a0)
    286	bsrl	round
    287	bfclr	LOCAL_SGN(%a0){#0:#8}
    288	beqs	cu_nuflp
    289	bsetb	#sign_bit,LOCAL_EX(%a0)
    290cu_nuflp:
    291	btstb	#inex2_bit,FPSR_EXCEPT(%a6)
    292	beqs	cu_nuninx
    293	orl	#aunfl_mask,USER_FPSR(%a6) |if the round was inex, set AUNFL
    294cu_nuninx:
    295	tstl	LOCAL_HI(%a0)		|test for zero
    296	bnes	cu_nunzro
    297	tstl	LOCAL_LO(%a0)
    298	bnes	cu_nunzro
    299|
    300| The mantissa is zero from the denorm loop.  Check sign and rmode
    301| to see if rounding should have occurred which would leave the lsb.
    302|
    303	movel	USER_FPCR(%a6),%d0
    304	andil	#0x30,%d0		|isolate rmode
    305	cmpil	#0x20,%d0
    306	blts	cu_nzro
    307	bnes	cu_nrp
    308cu_nrm:
    309	tstw	LOCAL_EX(%a0)	|if positive, set lsb
    310	bges	cu_nzro
    311	btstb	#7,FPCR_MODE(%a6) |check for double
    312	beqs	cu_nincs
    313	bras	cu_nincd
    314cu_nrp:
    315	tstw	LOCAL_EX(%a0)	|if positive, set lsb
    316	blts	cu_nzro
    317	btstb	#7,FPCR_MODE(%a6) |check for double
    318	beqs	cu_nincs
    319cu_nincd:
    320	orl	#0x800,LOCAL_LO(%a0) |inc for double
    321	bra	cu_nunzro
    322cu_nincs:
    323	orl	#0x100,LOCAL_HI(%a0) |inc for single
    324	bra	cu_nunzro
    325cu_nzro:
    326	orl	#z_mask,USER_FPSR(%a6)
    327	moveb	STAG(%a6),%d0
    328	andib	#0xe0,%d0
    329	cmpib	#0x40,%d0		|check if input was tagged zero
    330	beqs	cu_numv
    331cu_nunzro:
    332	orl	#unfl_mask,USER_FPSR(%a6) |set unfl
    333cu_numv:
    334	movel	(%a0),ETEMP(%a6)
    335	movel	4(%a0),ETEMP_HI(%a6)
    336	movel	8(%a0),ETEMP_LO(%a6)
    337|
    338| Write the result to memory, setting the fpsr cc bits.  NaN and Inf
    339| bypass cu_wrexn.
    340|
    341cu_wrexn:
    342	tstw	LOCAL_EX(%a0)		|test for zero
    343	beqs	cu_wrzero
    344	cmpw	#0x8000,LOCAL_EX(%a0)	|test for zero
    345	bnes	cu_wreon
    346cu_wrzero:
    347	orl	#z_mask,USER_FPSR(%a6)	|set Z bit
    348cu_wreon:
    349	tstw	LOCAL_EX(%a0)
    350	bpl	wr_etemp
    351	orl	#neg_mask,USER_FPSR(%a6)
    352	bra	wr_etemp
    353
    354|
    355| HANDLE SOURCE DENORM HERE
    356|
    357|				;clear denorm stag to norm
    358|				;write the new tag & ete15 to the fstack
    359mon_dnrm:
    360|
    361| At this point, check for the cases in which normalizing the
    362| denorm produces incorrect results.
    363|
    364	tstb	DY_MO_FLG(%a6)	|all cases of dyadic instructions would
    365	bnes	nrm_src		|require normalization of denorm
    366
    367| At this point:
    368|	monadic instructions:	fabs  = $18  fneg   = $1a  ftst   = $3a
    369|				fmove = $00  fsmove = $40  fdmove = $44
    370|				fsqrt = $05* fssqrt = $41  fdsqrt = $45
    371|				(*fsqrt reencoded to $05)
    372|
    373	movew	CMDREG1B(%a6),%d0	|get command register
    374	andil	#0x7f,%d0			|strip to only command word
    375|
    376| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
    377| fdsqrt are possible.
    378| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
    379| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
    380|
    381	btstl	#0,%d0
    382	bnes	nrm_src		|weed out fsqrt instructions
    383	st	CU_ONLY(%a6)	|set cu-only inst flag
    384	bra	cu_dnrm		|fmove, fabs, fneg, ftst
    385|				;cases go to cu_dnrm
    386nrm_src:
    387	bclrb	#sign_bit,LOCAL_EX(%a0)
    388	sne	LOCAL_SGN(%a0)
    389	bsr	nrm_set		|normalize number (exponent will go
    390|				; negative)
    391	bclrb	#sign_bit,LOCAL_EX(%a0) |get rid of false sign
    392
    393	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
    394	beqs	spos
    395	bsetb	#sign_bit,LOCAL_EX(%a0)
    396spos:
    397	bfclr	STAG(%a6){#0:#4}	|set tag to normalized, FPTE15 = 0
    398	bsetb	#4,STAG(%a6)	|set ETE15
    399	orb	#0xf0,DNRM_FLG(%a6)
    400normal:
    401	tstb	DNRM_FLG(%a6)	|check if any of the ops were denorms
    402	bne	ck_wrap		|if so, check if it is a potential
    403|				;wrap-around case
    404fix_stk:
    405	moveb	#0xfe,CU_SAVEPC(%a6)
    406	bclrb	#E1,E_BYTE(%a6)
    407
    408	clrw	NMNEXC(%a6)
    409
    410	st	RES_FLG(%a6)	|indicate that a restore is needed
    411	rts
    412
    413|
    414| cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and
    415| ftst) completely in software without an frestore to the 040.
    416|
    417cu_dnrm:
    418	st	CU_ONLY(%a6)
    419	movew	CMDREG1B(%a6),%d0
    420	andib	#0x3b,%d0		|isolate bits to select inst
    421	tstb	%d0
    422	beql	cu_dmove	|if zero, it is an fmove
    423	cmpib	#0x18,%d0
    424	beql	cu_dabs		|if $18, it is fabs
    425	cmpib	#0x1a,%d0
    426	beql	cu_dneg		|if $1a, it is fneg
    427|
    428| Inst is ftst.  Check the source operand and set the cc's accordingly.
    429| No write is done, so simply rts.
    430|
    431cu_dtst:
    432	movew	LOCAL_EX(%a0),%d0
    433	bclrl	#15,%d0
    434	sne	LOCAL_SGN(%a0)
    435	beqs	cu_dtpo
    436	orl	#neg_mask,USER_FPSR(%a6) |set N
    437cu_dtpo:
    438	cmpiw	#0x7fff,%d0	|test for inf/nan
    439	bnes	cu_dtcz
    440	tstl	LOCAL_HI(%a0)
    441	bnes	cu_dtn
    442	tstl	LOCAL_LO(%a0)
    443	bnes	cu_dtn
    444	orl	#inf_mask,USER_FPSR(%a6)
    445	rts
    446cu_dtn:
    447	orl	#nan_mask,USER_FPSR(%a6)
    448	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
    449|						;snan handler
    450	rts
    451cu_dtcz:
    452	tstl	LOCAL_HI(%a0)
    453	bnel	cu_dtsx
    454	tstl	LOCAL_LO(%a0)
    455	bnel	cu_dtsx
    456	orl	#z_mask,USER_FPSR(%a6)
    457cu_dtsx:
    458	rts
    459|
    460| Inst is fabs.  Execute the absolute value function on the input.
    461| Branch to the fmove code.
    462|
    463cu_dabs:
    464	bclrb	#7,LOCAL_EX(%a0)		|do abs
    465	bras	cu_dmove		|fmove code will finish
    466|
    467| Inst is fneg.  Execute the negate value function on the input.
    468| Fall though to the fmove code.
    469|
    470cu_dneg:
    471	bchgb	#7,LOCAL_EX(%a0)		|do neg
    472|
    473| Inst is fmove.  This code also handles all result writes.
    474| If bit 2 is set, round is forced to double.  If it is clear,
    475| and bit 6 is set, round is forced to single.  If both are clear,
    476| the round precision is found in the fpcr.  If the rounding precision
    477| is double or single, the result is zero, and the mode is checked
    478| to determine if the lsb of the result should be set.
    479|
    480cu_dmove:
    481	btstb	#2,CMDREG1B+1(%a6)	|check for rd
    482	bne	cu_dmrd
    483	btstb	#6,CMDREG1B+1(%a6)	|check for rs
    484	bne	cu_dmrs
    485|
    486| The move or operation is not with forced precision.  Use the
    487| FPCR_MODE byte to get rounding.
    488|
    489cu_dmnr:
    490	bfextu	FPCR_MODE(%a6){#0:#2},%d0
    491	tstb	%d0			|check for extended
    492	beq	cu_wrexd		|if so, just write result
    493	cmpib	#1,%d0			|check for single
    494	beq	cu_dmrs			|fall through to double
    495|
    496| The move is fdmove or round precision is double.  Result is zero.
    497| Check rmode for rp or rm and set lsb accordingly.
    498|
    499cu_dmrd:
    500	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|get rmode
    501	tstw	LOCAL_EX(%a0)		|check sign
    502	blts	cu_dmdn
    503	cmpib	#3,%d1			|check for rp
    504	bne	cu_dpd			|load double pos zero
    505	bra	cu_dpdr			|load double pos zero w/lsb
    506cu_dmdn:
    507	cmpib	#2,%d1			|check for rm
    508	bne	cu_dnd			|load double neg zero
    509	bra	cu_dndr			|load double neg zero w/lsb
    510|
    511| The move is fsmove or round precision is single.  Result is zero.
    512| Check for rp or rm and set lsb accordingly.
    513|
    514cu_dmrs:
    515	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|get rmode
    516	tstw	LOCAL_EX(%a0)		|check sign
    517	blts	cu_dmsn
    518	cmpib	#3,%d1			|check for rp
    519	bne	cu_spd			|load single pos zero
    520	bra	cu_spdr			|load single pos zero w/lsb
    521cu_dmsn:
    522	cmpib	#2,%d1			|check for rm
    523	bne	cu_snd			|load single neg zero
    524	bra	cu_sndr			|load single neg zero w/lsb
    525|
    526| The precision is extended, so the result in etemp is correct.
    527| Simply set unfl (not inex2 or aunfl) and write the result to
    528| the correct fp register.
    529cu_wrexd:
    530	orl	#unfl_mask,USER_FPSR(%a6)
    531	tstw	LOCAL_EX(%a0)
    532	beq	wr_etemp
    533	orl	#neg_mask,USER_FPSR(%a6)
    534	bra	wr_etemp
    535|
    536| These routines write +/- zero in double format.  The routines
    537| cu_dpdr and cu_dndr set the double lsb.
    538|
    539cu_dpd:
    540	movel	#0x3c010000,LOCAL_EX(%a0)	|force pos double zero
    541	clrl	LOCAL_HI(%a0)
    542	clrl	LOCAL_LO(%a0)
    543	orl	#z_mask,USER_FPSR(%a6)
    544	orl	#unfinx_mask,USER_FPSR(%a6)
    545	bra	wr_etemp
    546cu_dpdr:
    547	movel	#0x3c010000,LOCAL_EX(%a0)	|force pos double zero
    548	clrl	LOCAL_HI(%a0)
    549	movel	#0x800,LOCAL_LO(%a0)	|with lsb set
    550	orl	#unfinx_mask,USER_FPSR(%a6)
    551	bra	wr_etemp
    552cu_dnd:
    553	movel	#0xbc010000,LOCAL_EX(%a0)	|force pos double zero
    554	clrl	LOCAL_HI(%a0)
    555	clrl	LOCAL_LO(%a0)
    556	orl	#z_mask,USER_FPSR(%a6)
    557	orl	#neg_mask,USER_FPSR(%a6)
    558	orl	#unfinx_mask,USER_FPSR(%a6)
    559	bra	wr_etemp
    560cu_dndr:
    561	movel	#0xbc010000,LOCAL_EX(%a0)	|force pos double zero
    562	clrl	LOCAL_HI(%a0)
    563	movel	#0x800,LOCAL_LO(%a0)	|with lsb set
    564	orl	#neg_mask,USER_FPSR(%a6)
    565	orl	#unfinx_mask,USER_FPSR(%a6)
    566	bra	wr_etemp
    567|
    568| These routines write +/- zero in single format.  The routines
    569| cu_dpdr and cu_dndr set the single lsb.
    570|
    571cu_spd:
    572	movel	#0x3f810000,LOCAL_EX(%a0)	|force pos single zero
    573	clrl	LOCAL_HI(%a0)
    574	clrl	LOCAL_LO(%a0)
    575	orl	#z_mask,USER_FPSR(%a6)
    576	orl	#unfinx_mask,USER_FPSR(%a6)
    577	bra	wr_etemp
    578cu_spdr:
    579	movel	#0x3f810000,LOCAL_EX(%a0)	|force pos single zero
    580	movel	#0x100,LOCAL_HI(%a0)	|with lsb set
    581	clrl	LOCAL_LO(%a0)
    582	orl	#unfinx_mask,USER_FPSR(%a6)
    583	bra	wr_etemp
    584cu_snd:
    585	movel	#0xbf810000,LOCAL_EX(%a0)	|force pos single zero
    586	clrl	LOCAL_HI(%a0)
    587	clrl	LOCAL_LO(%a0)
    588	orl	#z_mask,USER_FPSR(%a6)
    589	orl	#neg_mask,USER_FPSR(%a6)
    590	orl	#unfinx_mask,USER_FPSR(%a6)
    591	bra	wr_etemp
    592cu_sndr:
    593	movel	#0xbf810000,LOCAL_EX(%a0)	|force pos single zero
    594	movel	#0x100,LOCAL_HI(%a0)	|with lsb set
    595	clrl	LOCAL_LO(%a0)
    596	orl	#neg_mask,USER_FPSR(%a6)
    597	orl	#unfinx_mask,USER_FPSR(%a6)
    598	bra	wr_etemp
    599
    600|
    601| This code checks for 16-bit overflow conditions on dyadic
    602| operations which are not restorable into the floating-point
    603| unit and must be completed in software.  Basically, this
    604| condition exists with a very large norm and a denorm.  One
    605| of the operands must be denormalized to enter this code.
    606|
    607| Flags used:
    608|	DY_MO_FLG contains 0 for monadic op, $ff for dyadic
    609|	DNRM_FLG contains $00 for neither op denormalized
    610|	                  $0f for the destination op denormalized
    611|	                  $f0 for the source op denormalized
    612|	                  $ff for both ops denormalized
    613|
    614| The wrap-around condition occurs for add, sub, div, and cmp
    615| when
    616|
    617|	abs(dest_exp - src_exp) >= $8000
    618|
    619| and for mul when
    620|
    621|	(dest_exp + src_exp) < $0
    622|
    623| we must process the operation here if this case is true.
    624|
    625| The rts following the frcfpn routine is the exit from res_func
    626| for this condition.  The restore flag (RES_FLG) is left clear.
    627| No frestore is done unless an exception is to be reported.
    628|
    629| For fadd:
    630|	if(sign_of(dest) != sign_of(src))
    631|		replace exponent of src with $3fff (keep sign)
    632|		use fpu to perform dest+new_src (user's rmode and X)
    633|		clr sticky
    634|	else
    635|		set sticky
    636|	call round with user's precision and mode
    637|	move result to fpn and wbtemp
    638|
    639| For fsub:
    640|	if(sign_of(dest) == sign_of(src))
    641|		replace exponent of src with $3fff (keep sign)
    642|		use fpu to perform dest+new_src (user's rmode and X)
    643|		clr sticky
    644|	else
    645|		set sticky
    646|	call round with user's precision and mode
    647|	move result to fpn and wbtemp
    648|
    649| For fdiv/fsgldiv:
    650|	if(both operands are denorm)
    651|		restore_to_fpu;
    652|	if(dest is norm)
    653|		force_ovf;
    654|	else(dest is denorm)
    655|		force_unf:
    656|
    657| For fcmp:
    658|	if(dest is norm)
    659|		N = sign_of(dest);
    660|	else(dest is denorm)
    661|		N = sign_of(src);
    662|
    663| For fmul:
    664|	if(both operands are denorm)
    665|		force_unf;
    666|	if((dest_exp + src_exp) < 0)
    667|		force_unf:
    668|	else
    669|		restore_to_fpu;
    670|
    671| local equates:
    672	.set	addcode,0x22
    673	.set	subcode,0x28
    674	.set	mulcode,0x23
    675	.set	divcode,0x20
    676	.set	cmpcode,0x38
    677ck_wrap:
    678	| tstb	DY_MO_FLG(%a6)	;check for fsqrt
    679	beq	fix_stk		|if zero, it is fsqrt
    680	movew	CMDREG1B(%a6),%d0
    681	andiw	#0x3b,%d0		|strip to command bits
    682	cmpiw	#addcode,%d0
    683	beq	wrap_add
    684	cmpiw	#subcode,%d0
    685	beq	wrap_sub
    686	cmpiw	#mulcode,%d0
    687	beq	wrap_mul
    688	cmpiw	#cmpcode,%d0
    689	beq	wrap_cmp
    690|
    691| Inst is fdiv.
    692|
    693wrap_div:
    694	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
    695	beq	fix_stk		 |restore to fpu
    696|
    697| One of the ops is denormalized.  Test for wrap condition
    698| and force the result.
    699|
    700	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
    701	bnes	div_srcd
    702div_destd:
    703	bsrl	ckinf_ns
    704	bne	fix_stk
    705	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
    706	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
    707	subl	%d1,%d0			|subtract dest from src
    708	cmpl	#0x7fff,%d0
    709	blt	fix_stk			|if less, not wrap case
    710	clrb	WBTEMP_SGN(%a6)
    711	movew	ETEMP_EX(%a6),%d0		|find the sign of the result
    712	movew	FPTEMP_EX(%a6),%d1
    713	eorw	%d1,%d0
    714	andiw	#0x8000,%d0
    715	beq	force_unf
    716	st	WBTEMP_SGN(%a6)
    717	bra	force_unf
    718
    719ckinf_ns:
    720	moveb	STAG(%a6),%d0		|check source tag for inf or nan
    721	bra	ck_in_com
    722ckinf_nd:
    723	moveb	DTAG(%a6),%d0		|check destination tag for inf or nan
    724ck_in_com:
    725	andib	#0x60,%d0			|isolate tag bits
    726	cmpb	#0x40,%d0			|is it inf?
    727	beq	nan_or_inf		|not wrap case
    728	cmpb	#0x60,%d0			|is it nan?
    729	beq	nan_or_inf		|yes, not wrap case?
    730	cmpb	#0x20,%d0			|is it a zero?
    731	beq	nan_or_inf		|yes
    732	clrl	%d0
    733	rts				|then ; it is either a zero of norm,
    734|					;check wrap case
    735nan_or_inf:
    736	moveql	#-1,%d0
    737	rts
    738
    739
    740
    741div_srcd:
    742	bsrl	ckinf_nd
    743	bne	fix_stk
    744	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
    745	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
    746	subl	%d1,%d0			|subtract src from dest
    747	cmpl	#0x8000,%d0
    748	blt	fix_stk			|if less, not wrap case
    749	clrb	WBTEMP_SGN(%a6)
    750	movew	ETEMP_EX(%a6),%d0		|find the sign of the result
    751	movew	FPTEMP_EX(%a6),%d1
    752	eorw	%d1,%d0
    753	andiw	#0x8000,%d0
    754	beqs	force_ovf
    755	st	WBTEMP_SGN(%a6)
    756|
    757| This code handles the case of the instruction resulting in
    758| an overflow condition.
    759|
    760force_ovf:
    761	bclrb	#E1,E_BYTE(%a6)
    762	orl	#ovfl_inx_mask,USER_FPSR(%a6)
    763	clrw	NMNEXC(%a6)
    764	leal	WBTEMP(%a6),%a0		|point a0 to memory location
    765	movew	CMDREG1B(%a6),%d0
    766	btstl	#6,%d0			|test for forced precision
    767	beqs	frcovf_fpcr
    768	btstl	#2,%d0			|check for double
    769	bnes	frcovf_dbl
    770	movel	#0x1,%d0			|inst is forced single
    771	bras	frcovf_rnd
    772frcovf_dbl:
    773	movel	#0x2,%d0			|inst is forced double
    774	bras	frcovf_rnd
    775frcovf_fpcr:
    776	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|inst not forced - use fpcr prec
    777frcovf_rnd:
    778
    779| The 881/882 does not set inex2 for the following case, so the
    780| line is commented out to be compatible with 881/882
    781|	tst.b	%d0
    782|	beq.b	frcovf_x
    783|	or.l	#inex2_mask,USER_FPSR(%a6) ;if prec is s or d, set inex2
    784
    785|frcovf_x:
    786	bsrl	ovf_res			|get correct result based on
    787|					;round precision/mode.  This
    788|					;sets FPSR_CC correctly
    789|					;returns in external format
    790	bfclr	WBTEMP_SGN(%a6){#0:#8}
    791	beq	frcfpn
    792	bsetb	#sign_bit,WBTEMP_EX(%a6)
    793	bra	frcfpn
    794|
    795| Inst is fadd.
    796|
    797wrap_add:
    798	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
    799	beq	fix_stk		 |restore to fpu
    800|
    801| One of the ops is denormalized.  Test for wrap condition
    802| and complete the instruction.
    803|
    804	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
    805	bnes	add_srcd
    806add_destd:
    807	bsrl	ckinf_ns
    808	bne	fix_stk
    809	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
    810	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
    811	subl	%d1,%d0			|subtract dest from src
    812	cmpl	#0x8000,%d0
    813	blt	fix_stk			|if less, not wrap case
    814	bra	add_wrap
    815add_srcd:
    816	bsrl	ckinf_nd
    817	bne	fix_stk
    818	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
    819	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
    820	subl	%d1,%d0			|subtract src from dest
    821	cmpl	#0x8000,%d0
    822	blt	fix_stk			|if less, not wrap case
    823|
    824| Check the signs of the operands.  If they are unlike, the fpu
    825| can be used to add the norm and 1.0 with the sign of the
    826| denorm and it will correctly generate the result in extended
    827| precision.  We can then call round with no sticky and the result
    828| will be correct for the user's rounding mode and precision.  If
    829| the signs are the same, we call round with the sticky bit set
    830| and the result will be correct for the user's rounding mode and
    831| precision.
    832|
    833add_wrap:
    834	movew	ETEMP_EX(%a6),%d0
    835	movew	FPTEMP_EX(%a6),%d1
    836	eorw	%d1,%d0
    837	andiw	#0x8000,%d0
    838	beq	add_same
    839|
    840| The signs are unlike.
    841|
    842	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
    843	bnes	add_u_srcd
    844	movew	FPTEMP_EX(%a6),%d0
    845	andiw	#0x8000,%d0
    846	orw	#0x3fff,%d0	|force the exponent to +/- 1
    847	movew	%d0,FPTEMP_EX(%a6) |in the denorm
    848	movel	USER_FPCR(%a6),%d0
    849	andil	#0x30,%d0
    850	fmovel	%d0,%fpcr		|set up users rmode and X
    851	fmovex	ETEMP(%a6),%fp0
    852	faddx	FPTEMP(%a6),%fp0
    853	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
    854	fmovel	%fpsr,%d1
    855	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
    856	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
    857	lsrl	#4,%d0		|put rmode in lower 2 bits
    858	movel	USER_FPCR(%a6),%d1
    859	andil	#0xc0,%d1
    860	lsrl	#6,%d1		|put precision in upper word
    861	swap	%d1
    862	orl	%d0,%d1		|set up for round call
    863	clrl	%d0		|force sticky to zero
    864	bclrb	#sign_bit,WBTEMP_EX(%a6)
    865	sne	WBTEMP_SGN(%a6)
    866	bsrl	round		|round result to users rmode & prec
    867	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
    868	beq	frcfpnr
    869	bsetb	#sign_bit,WBTEMP_EX(%a6)
    870	bra	frcfpnr
    871add_u_srcd:
    872	movew	ETEMP_EX(%a6),%d0
    873	andiw	#0x8000,%d0
    874	orw	#0x3fff,%d0	|force the exponent to +/- 1
    875	movew	%d0,ETEMP_EX(%a6) |in the denorm
    876	movel	USER_FPCR(%a6),%d0
    877	andil	#0x30,%d0
    878	fmovel	%d0,%fpcr		|set up users rmode and X
    879	fmovex	ETEMP(%a6),%fp0
    880	faddx	FPTEMP(%a6),%fp0
    881	fmovel	%fpsr,%d1
    882	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
    883	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
    884	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
    885	lsrl	#4,%d0		|put rmode in lower 2 bits
    886	movel	USER_FPCR(%a6),%d1
    887	andil	#0xc0,%d1
    888	lsrl	#6,%d1		|put precision in upper word
    889	swap	%d1
    890	orl	%d0,%d1		|set up for round call
    891	clrl	%d0		|force sticky to zero
    892	bclrb	#sign_bit,WBTEMP_EX(%a6)
    893	sne	WBTEMP_SGN(%a6)	|use internal format for round
    894	bsrl	round		|round result to users rmode & prec
    895	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
    896	beq	frcfpnr
    897	bsetb	#sign_bit,WBTEMP_EX(%a6)
    898	bra	frcfpnr
    899|
    900| Signs are alike:
    901|
    902add_same:
    903	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
    904	bnes	add_s_srcd
    905add_s_destd:
    906	leal	ETEMP(%a6),%a0
    907	movel	USER_FPCR(%a6),%d0
    908	andil	#0x30,%d0
    909	lsrl	#4,%d0		|put rmode in lower 2 bits
    910	movel	USER_FPCR(%a6),%d1
    911	andil	#0xc0,%d1
    912	lsrl	#6,%d1		|put precision in upper word
    913	swap	%d1
    914	orl	%d0,%d1		|set up for round call
    915	movel	#0x20000000,%d0	|set sticky for round
    916	bclrb	#sign_bit,ETEMP_EX(%a6)
    917	sne	ETEMP_SGN(%a6)
    918	bsrl	round		|round result to users rmode & prec
    919	bfclr	ETEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
    920	beqs	add_s_dclr
    921	bsetb	#sign_bit,ETEMP_EX(%a6)
    922add_s_dclr:
    923	leal	WBTEMP(%a6),%a0
    924	movel	ETEMP(%a6),(%a0)	|write result to wbtemp
    925	movel	ETEMP_HI(%a6),4(%a0)
    926	movel	ETEMP_LO(%a6),8(%a0)
    927	tstw	ETEMP_EX(%a6)
    928	bgt	add_ckovf
    929	orl	#neg_mask,USER_FPSR(%a6)
    930	bra	add_ckovf
    931add_s_srcd:
    932	leal	FPTEMP(%a6),%a0
    933	movel	USER_FPCR(%a6),%d0
    934	andil	#0x30,%d0
    935	lsrl	#4,%d0		|put rmode in lower 2 bits
    936	movel	USER_FPCR(%a6),%d1
    937	andil	#0xc0,%d1
    938	lsrl	#6,%d1		|put precision in upper word
    939	swap	%d1
    940	orl	%d0,%d1		|set up for round call
    941	movel	#0x20000000,%d0	|set sticky for round
    942	bclrb	#sign_bit,FPTEMP_EX(%a6)
    943	sne	FPTEMP_SGN(%a6)
    944	bsrl	round		|round result to users rmode & prec
    945	bfclr	FPTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
    946	beqs	add_s_sclr
    947	bsetb	#sign_bit,FPTEMP_EX(%a6)
    948add_s_sclr:
    949	leal	WBTEMP(%a6),%a0
    950	movel	FPTEMP(%a6),(%a0)	|write result to wbtemp
    951	movel	FPTEMP_HI(%a6),4(%a0)
    952	movel	FPTEMP_LO(%a6),8(%a0)
    953	tstw	FPTEMP_EX(%a6)
    954	bgt	add_ckovf
    955	orl	#neg_mask,USER_FPSR(%a6)
    956add_ckovf:
    957	movew	WBTEMP_EX(%a6),%d0
    958	andiw	#0x7fff,%d0
    959	cmpiw	#0x7fff,%d0
    960	bne	frcfpnr
    961|
    962| The result has overflowed to $7fff exponent.  Set I, ovfl,
    963| and aovfl, and clr the mantissa (incorrectly set by the
    964| round routine.)
    965|
    966	orl	#inf_mask+ovfl_inx_mask,USER_FPSR(%a6)
    967	clrl	4(%a0)
    968	bra	frcfpnr
    969|
    970| Inst is fsub.
    971|
    972wrap_sub:
    973	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
    974	beq	fix_stk		 |restore to fpu
    975|
    976| One of the ops is denormalized.  Test for wrap condition
    977| and complete the instruction.
    978|
    979	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
    980	bnes	sub_srcd
    981sub_destd:
    982	bsrl	ckinf_ns
    983	bne	fix_stk
    984	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
    985	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
    986	subl	%d1,%d0			|subtract src from dest
    987	cmpl	#0x8000,%d0
    988	blt	fix_stk			|if less, not wrap case
    989	bra	sub_wrap
    990sub_srcd:
    991	bsrl	ckinf_nd
    992	bne	fix_stk
    993	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
    994	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
    995	subl	%d1,%d0			|subtract dest from src
    996	cmpl	#0x8000,%d0
    997	blt	fix_stk			|if less, not wrap case
    998|
    999| Check the signs of the operands.  If they are alike, the fpu
   1000| can be used to subtract from the norm 1.0 with the sign of the
   1001| denorm and it will correctly generate the result in extended
   1002| precision.  We can then call round with no sticky and the result
   1003| will be correct for the user's rounding mode and precision.  If
   1004| the signs are unlike, we call round with the sticky bit set
   1005| and the result will be correct for the user's rounding mode and
   1006| precision.
   1007|
   1008sub_wrap:
   1009	movew	ETEMP_EX(%a6),%d0
   1010	movew	FPTEMP_EX(%a6),%d1
   1011	eorw	%d1,%d0
   1012	andiw	#0x8000,%d0
   1013	bne	sub_diff
   1014|
   1015| The signs are alike.
   1016|
   1017	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
   1018	bnes	sub_u_srcd
   1019	movew	FPTEMP_EX(%a6),%d0
   1020	andiw	#0x8000,%d0
   1021	orw	#0x3fff,%d0	|force the exponent to +/- 1
   1022	movew	%d0,FPTEMP_EX(%a6) |in the denorm
   1023	movel	USER_FPCR(%a6),%d0
   1024	andil	#0x30,%d0
   1025	fmovel	%d0,%fpcr		|set up users rmode and X
   1026	fmovex	FPTEMP(%a6),%fp0
   1027	fsubx	ETEMP(%a6),%fp0
   1028	fmovel	%fpsr,%d1
   1029	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
   1030	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
   1031	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
   1032	lsrl	#4,%d0		|put rmode in lower 2 bits
   1033	movel	USER_FPCR(%a6),%d1
   1034	andil	#0xc0,%d1
   1035	lsrl	#6,%d1		|put precision in upper word
   1036	swap	%d1
   1037	orl	%d0,%d1		|set up for round call
   1038	clrl	%d0		|force sticky to zero
   1039	bclrb	#sign_bit,WBTEMP_EX(%a6)
   1040	sne	WBTEMP_SGN(%a6)
   1041	bsrl	round		|round result to users rmode & prec
   1042	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
   1043	beq	frcfpnr
   1044	bsetb	#sign_bit,WBTEMP_EX(%a6)
   1045	bra	frcfpnr
   1046sub_u_srcd:
   1047	movew	ETEMP_EX(%a6),%d0
   1048	andiw	#0x8000,%d0
   1049	orw	#0x3fff,%d0	|force the exponent to +/- 1
   1050	movew	%d0,ETEMP_EX(%a6) |in the denorm
   1051	movel	USER_FPCR(%a6),%d0
   1052	andil	#0x30,%d0
   1053	fmovel	%d0,%fpcr		|set up users rmode and X
   1054	fmovex	FPTEMP(%a6),%fp0
   1055	fsubx	ETEMP(%a6),%fp0
   1056	fmovel	%fpsr,%d1
   1057	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
   1058	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
   1059	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
   1060	lsrl	#4,%d0		|put rmode in lower 2 bits
   1061	movel	USER_FPCR(%a6),%d1
   1062	andil	#0xc0,%d1
   1063	lsrl	#6,%d1		|put precision in upper word
   1064	swap	%d1
   1065	orl	%d0,%d1		|set up for round call
   1066	clrl	%d0		|force sticky to zero
   1067	bclrb	#sign_bit,WBTEMP_EX(%a6)
   1068	sne	WBTEMP_SGN(%a6)
   1069	bsrl	round		|round result to users rmode & prec
   1070	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
   1071	beq	frcfpnr
   1072	bsetb	#sign_bit,WBTEMP_EX(%a6)
   1073	bra	frcfpnr
   1074|
   1075| Signs are unlike:
   1076|
   1077sub_diff:
   1078	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
   1079	bnes	sub_s_srcd
   1080sub_s_destd:
   1081	leal	ETEMP(%a6),%a0
   1082	movel	USER_FPCR(%a6),%d0
   1083	andil	#0x30,%d0
   1084	lsrl	#4,%d0		|put rmode in lower 2 bits
   1085	movel	USER_FPCR(%a6),%d1
   1086	andil	#0xc0,%d1
   1087	lsrl	#6,%d1		|put precision in upper word
   1088	swap	%d1
   1089	orl	%d0,%d1		|set up for round call
   1090	movel	#0x20000000,%d0	|set sticky for round
   1091|
   1092| Since the dest is the denorm, the sign is the opposite of the
   1093| norm sign.
   1094|
   1095	eoriw	#0x8000,ETEMP_EX(%a6)	|flip sign on result
   1096	tstw	ETEMP_EX(%a6)
   1097	bgts	sub_s_dwr
   1098	orl	#neg_mask,USER_FPSR(%a6)
   1099sub_s_dwr:
   1100	bclrb	#sign_bit,ETEMP_EX(%a6)
   1101	sne	ETEMP_SGN(%a6)
   1102	bsrl	round		|round result to users rmode & prec
   1103	bfclr	ETEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
   1104	beqs	sub_s_dclr
   1105	bsetb	#sign_bit,ETEMP_EX(%a6)
   1106sub_s_dclr:
   1107	leal	WBTEMP(%a6),%a0
   1108	movel	ETEMP(%a6),(%a0)	|write result to wbtemp
   1109	movel	ETEMP_HI(%a6),4(%a0)
   1110	movel	ETEMP_LO(%a6),8(%a0)
   1111	bra	sub_ckovf
   1112sub_s_srcd:
   1113	leal	FPTEMP(%a6),%a0
   1114	movel	USER_FPCR(%a6),%d0
   1115	andil	#0x30,%d0
   1116	lsrl	#4,%d0		|put rmode in lower 2 bits
   1117	movel	USER_FPCR(%a6),%d1
   1118	andil	#0xc0,%d1
   1119	lsrl	#6,%d1		|put precision in upper word
   1120	swap	%d1
   1121	orl	%d0,%d1		|set up for round call
   1122	movel	#0x20000000,%d0	|set sticky for round
   1123	bclrb	#sign_bit,FPTEMP_EX(%a6)
   1124	sne	FPTEMP_SGN(%a6)
   1125	bsrl	round		|round result to users rmode & prec
   1126	bfclr	FPTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
   1127	beqs	sub_s_sclr
   1128	bsetb	#sign_bit,FPTEMP_EX(%a6)
   1129sub_s_sclr:
   1130	leal	WBTEMP(%a6),%a0
   1131	movel	FPTEMP(%a6),(%a0)	|write result to wbtemp
   1132	movel	FPTEMP_HI(%a6),4(%a0)
   1133	movel	FPTEMP_LO(%a6),8(%a0)
   1134	tstw	FPTEMP_EX(%a6)
   1135	bgt	sub_ckovf
   1136	orl	#neg_mask,USER_FPSR(%a6)
   1137sub_ckovf:
   1138	movew	WBTEMP_EX(%a6),%d0
   1139	andiw	#0x7fff,%d0
   1140	cmpiw	#0x7fff,%d0
   1141	bne	frcfpnr
   1142|
   1143| The result has overflowed to $7fff exponent.  Set I, ovfl,
   1144| and aovfl, and clr the mantissa (incorrectly set by the
   1145| round routine.)
   1146|
   1147	orl	#inf_mask+ovfl_inx_mask,USER_FPSR(%a6)
   1148	clrl	4(%a0)
   1149	bra	frcfpnr
   1150|
   1151| Inst is fcmp.
   1152|
   1153wrap_cmp:
   1154	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
   1155	beq	fix_stk		 |restore to fpu
   1156|
   1157| One of the ops is denormalized.  Test for wrap condition
   1158| and complete the instruction.
   1159|
   1160	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
   1161	bnes	cmp_srcd
   1162cmp_destd:
   1163	bsrl	ckinf_ns
   1164	bne	fix_stk
   1165	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
   1166	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
   1167	subl	%d1,%d0			|subtract dest from src
   1168	cmpl	#0x8000,%d0
   1169	blt	fix_stk			|if less, not wrap case
   1170	tstw	ETEMP_EX(%a6)		|set N to ~sign_of(src)
   1171	bge	cmp_setn
   1172	rts
   1173cmp_srcd:
   1174	bsrl	ckinf_nd
   1175	bne	fix_stk
   1176	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
   1177	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
   1178	subl	%d1,%d0			|subtract src from dest
   1179	cmpl	#0x8000,%d0
   1180	blt	fix_stk			|if less, not wrap case
   1181	tstw	FPTEMP_EX(%a6)		|set N to sign_of(dest)
   1182	blt	cmp_setn
   1183	rts
   1184cmp_setn:
   1185	orl	#neg_mask,USER_FPSR(%a6)
   1186	rts
   1187
   1188|
   1189| Inst is fmul.
   1190|
   1191wrap_mul:
   1192	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
   1193	beq	force_unf	|force an underflow (really!)
   1194|
   1195| One of the ops is denormalized.  Test for wrap condition
   1196| and complete the instruction.
   1197|
   1198	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
   1199	bnes	mul_srcd
   1200mul_destd:
   1201	bsrl	ckinf_ns
   1202	bne	fix_stk
   1203	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
   1204	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
   1205	addl	%d1,%d0			|subtract dest from src
   1206	bgt	fix_stk
   1207	bra	force_unf
   1208mul_srcd:
   1209	bsrl	ckinf_nd
   1210	bne	fix_stk
   1211	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
   1212	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
   1213	addl	%d1,%d0			|subtract src from dest
   1214	bgt	fix_stk
   1215
   1216|
   1217| This code handles the case of the instruction resulting in
   1218| an underflow condition.
   1219|
   1220force_unf:
   1221	bclrb	#E1,E_BYTE(%a6)
   1222	orl	#unfinx_mask,USER_FPSR(%a6)
   1223	clrw	NMNEXC(%a6)
   1224	clrb	WBTEMP_SGN(%a6)
   1225	movew	ETEMP_EX(%a6),%d0		|find the sign of the result
   1226	movew	FPTEMP_EX(%a6),%d1
   1227	eorw	%d1,%d0
   1228	andiw	#0x8000,%d0
   1229	beqs	frcunfcont
   1230	st	WBTEMP_SGN(%a6)
   1231frcunfcont:
   1232	lea	WBTEMP(%a6),%a0		|point a0 to memory location
   1233	movew	CMDREG1B(%a6),%d0
   1234	btstl	#6,%d0			|test for forced precision
   1235	beqs	frcunf_fpcr
   1236	btstl	#2,%d0			|check for double
   1237	bnes	frcunf_dbl
   1238	movel	#0x1,%d0			|inst is forced single
   1239	bras	frcunf_rnd
   1240frcunf_dbl:
   1241	movel	#0x2,%d0			|inst is forced double
   1242	bras	frcunf_rnd
   1243frcunf_fpcr:
   1244	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|inst not forced - use fpcr prec
   1245frcunf_rnd:
   1246	bsrl	unf_sub			|get correct result based on
   1247|					;round precision/mode.  This
   1248|					;sets FPSR_CC correctly
   1249	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
   1250	beqs	frcfpn
   1251	bsetb	#sign_bit,WBTEMP_EX(%a6)
   1252	bra	frcfpn
   1253
   1254|
   1255| Write the result to the user's fpn.  All results must be HUGE to be
   1256| written; otherwise the results would have overflowed or underflowed.
   1257| If the rounding precision is single or double, the ovf_res routine
   1258| is needed to correctly supply the max value.
   1259|
   1260frcfpnr:
   1261	movew	CMDREG1B(%a6),%d0
   1262	btstl	#6,%d0			|test for forced precision
   1263	beqs	frcfpn_fpcr
   1264	btstl	#2,%d0			|check for double
   1265	bnes	frcfpn_dbl
   1266	movel	#0x1,%d0			|inst is forced single
   1267	bras	frcfpn_rnd
   1268frcfpn_dbl:
   1269	movel	#0x2,%d0			|inst is forced double
   1270	bras	frcfpn_rnd
   1271frcfpn_fpcr:
   1272	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|inst not forced - use fpcr prec
   1273	tstb	%d0
   1274	beqs	frcfpn			|if extended, write what you got
   1275frcfpn_rnd:
   1276	bclrb	#sign_bit,WBTEMP_EX(%a6)
   1277	sne	WBTEMP_SGN(%a6)
   1278	bsrl	ovf_res			|get correct result based on
   1279|					;round precision/mode.  This
   1280|					;sets FPSR_CC correctly
   1281	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
   1282	beqs	frcfpn_clr
   1283	bsetb	#sign_bit,WBTEMP_EX(%a6)
   1284frcfpn_clr:
   1285	orl	#ovfinx_mask,USER_FPSR(%a6)
   1286|
   1287| Perform the write.
   1288|
   1289frcfpn:
   1290	bfextu	CMDREG1B(%a6){#6:#3},%d0	|extract fp destination register
   1291	cmpib	#3,%d0
   1292	bles	frc0123			|check if dest is fp0-fp3
   1293	movel	#7,%d1
   1294	subl	%d0,%d1
   1295	clrl	%d0
   1296	bsetl	%d1,%d0
   1297	fmovemx WBTEMP(%a6),%d0
   1298	rts
   1299frc0123:
   1300	cmpib	#0,%d0
   1301	beqs	frc0_dst
   1302	cmpib	#1,%d0
   1303	beqs	frc1_dst
   1304	cmpib	#2,%d0
   1305	beqs	frc2_dst
   1306frc3_dst:
   1307	movel	WBTEMP_EX(%a6),USER_FP3(%a6)
   1308	movel	WBTEMP_HI(%a6),USER_FP3+4(%a6)
   1309	movel	WBTEMP_LO(%a6),USER_FP3+8(%a6)
   1310	rts
   1311frc2_dst:
   1312	movel	WBTEMP_EX(%a6),USER_FP2(%a6)
   1313	movel	WBTEMP_HI(%a6),USER_FP2+4(%a6)
   1314	movel	WBTEMP_LO(%a6),USER_FP2+8(%a6)
   1315	rts
   1316frc1_dst:
   1317	movel	WBTEMP_EX(%a6),USER_FP1(%a6)
   1318	movel	WBTEMP_HI(%a6),USER_FP1+4(%a6)
   1319	movel	WBTEMP_LO(%a6),USER_FP1+8(%a6)
   1320	rts
   1321frc0_dst:
   1322	movel	WBTEMP_EX(%a6),USER_FP0(%a6)
   1323	movel	WBTEMP_HI(%a6),USER_FP0+4(%a6)
   1324	movel	WBTEMP_LO(%a6),USER_FP0+8(%a6)
   1325	rts
   1326
   1327|
   1328| Write etemp to fpn.
   1329| A check is made on enabled and signalled snan exceptions,
   1330| and the destination is not overwritten if this condition exists.
   1331| This code is designed to make fmoveins of unsupported data types
   1332| faster.
   1333|
   1334wr_etemp:
   1335	btstb	#snan_bit,FPSR_EXCEPT(%a6)	|if snan is set, and
   1336	beqs	fmoveinc		|enabled, force restore
   1337	btstb	#snan_bit,FPCR_ENABLE(%a6) |and don't overwrite
   1338	beqs	fmoveinc		|the dest
   1339	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
   1340|						;snan handler
   1341	tstb	ETEMP(%a6)		|check for negative
   1342	blts	snan_neg
   1343	rts
   1344snan_neg:
   1345	orl	#neg_bit,USER_FPSR(%a6)	|snan is negative; set N
   1346	rts
   1347fmoveinc:
   1348	clrw	NMNEXC(%a6)
   1349	bclrb	#E1,E_BYTE(%a6)
   1350	moveb	STAG(%a6),%d0		|check if stag is inf
   1351	andib	#0xe0,%d0
   1352	cmpib	#0x40,%d0
   1353	bnes	fminc_cnan
   1354	orl	#inf_mask,USER_FPSR(%a6) |if inf, nothing yet has set I
   1355	tstw	LOCAL_EX(%a0)		|check sign
   1356	bges	fminc_con
   1357	orl	#neg_mask,USER_FPSR(%a6)
   1358	bra	fminc_con
   1359fminc_cnan:
   1360	cmpib	#0x60,%d0			|check if stag is NaN
   1361	bnes	fminc_czero
   1362	orl	#nan_mask,USER_FPSR(%a6) |if nan, nothing yet has set NaN
   1363	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
   1364|						;snan handler
   1365	tstw	LOCAL_EX(%a0)		|check sign
   1366	bges	fminc_con
   1367	orl	#neg_mask,USER_FPSR(%a6)
   1368	bra	fminc_con
   1369fminc_czero:
   1370	cmpib	#0x20,%d0			|check if zero
   1371	bnes	fminc_con
   1372	orl	#z_mask,USER_FPSR(%a6)	|if zero, set Z
   1373	tstw	LOCAL_EX(%a0)		|check sign
   1374	bges	fminc_con
   1375	orl	#neg_mask,USER_FPSR(%a6)
   1376fminc_con:
   1377	bfextu	CMDREG1B(%a6){#6:#3},%d0	|extract fp destination register
   1378	cmpib	#3,%d0
   1379	bles	fp0123			|check if dest is fp0-fp3
   1380	movel	#7,%d1
   1381	subl	%d0,%d1
   1382	clrl	%d0
   1383	bsetl	%d1,%d0
   1384	fmovemx ETEMP(%a6),%d0
   1385	rts
   1386
   1387fp0123:
   1388	cmpib	#0,%d0
   1389	beqs	fp0_dst
   1390	cmpib	#1,%d0
   1391	beqs	fp1_dst
   1392	cmpib	#2,%d0
   1393	beqs	fp2_dst
   1394fp3_dst:
   1395	movel	ETEMP_EX(%a6),USER_FP3(%a6)
   1396	movel	ETEMP_HI(%a6),USER_FP3+4(%a6)
   1397	movel	ETEMP_LO(%a6),USER_FP3+8(%a6)
   1398	rts
   1399fp2_dst:
   1400	movel	ETEMP_EX(%a6),USER_FP2(%a6)
   1401	movel	ETEMP_HI(%a6),USER_FP2+4(%a6)
   1402	movel	ETEMP_LO(%a6),USER_FP2+8(%a6)
   1403	rts
   1404fp1_dst:
   1405	movel	ETEMP_EX(%a6),USER_FP1(%a6)
   1406	movel	ETEMP_HI(%a6),USER_FP1+4(%a6)
   1407	movel	ETEMP_LO(%a6),USER_FP1+8(%a6)
   1408	rts
   1409fp0_dst:
   1410	movel	ETEMP_EX(%a6),USER_FP0(%a6)
   1411	movel	ETEMP_HI(%a6),USER_FP0+4(%a6)
   1412	movel	ETEMP_LO(%a6),USER_FP0+8(%a6)
   1413	rts
   1414
   1415opclass3:
   1416	st	CU_ONLY(%a6)
   1417	movew	CMDREG1B(%a6),%d0	|check if packed moveout
   1418	andiw	#0x0c00,%d0	|isolate last 2 bits of size field
   1419	cmpiw	#0x0c00,%d0	|if size is 011 or 111, it is packed
   1420	beq	pack_out	|else it is norm or denorm
   1421	bra	mv_out
   1422
   1423
   1424|
   1425|	MOVE OUT
   1426|
   1427
   1428mv_tbl:
   1429	.long	li
   1430	.long	sgp
   1431	.long	xp
   1432	.long	mvout_end	|should never be taken
   1433	.long	wi
   1434	.long	dp
   1435	.long	bi
   1436	.long	mvout_end	|should never be taken
   1437mv_out:
   1438	bfextu	CMDREG1B(%a6){#3:#3},%d1	|put source specifier in d1
   1439	leal	mv_tbl,%a0
   1440	movel	%a0@(%d1:l:4),%a0
   1441	jmp	(%a0)
   1442
   1443|
   1444| This exit is for move-out to memory.  The aunfl bit is
   1445| set if the result is inex and unfl is signalled.
   1446|
   1447mvout_end:
   1448	btstb	#inex2_bit,FPSR_EXCEPT(%a6)
   1449	beqs	no_aufl
   1450	btstb	#unfl_bit,FPSR_EXCEPT(%a6)
   1451	beqs	no_aufl
   1452	bsetb	#aunfl_bit,FPSR_AEXCEPT(%a6)
   1453no_aufl:
   1454	clrw	NMNEXC(%a6)
   1455	bclrb	#E1,E_BYTE(%a6)
   1456	fmovel	#0,%FPSR			|clear any cc bits from res_func
   1457|
   1458| Return ETEMP to extended format from internal extended format so
   1459| that gen_except will have a correctly signed value for ovfl/unfl
   1460| handlers.
   1461|
   1462	bfclr	ETEMP_SGN(%a6){#0:#8}
   1463	beqs	mvout_con
   1464	bsetb	#sign_bit,ETEMP_EX(%a6)
   1465mvout_con:
   1466	rts
   1467|
   1468| This exit is for move-out to int register.  The aunfl bit is
   1469| not set in any case for this move.
   1470|
   1471mvouti_end:
   1472	clrw	NMNEXC(%a6)
   1473	bclrb	#E1,E_BYTE(%a6)
   1474	fmovel	#0,%FPSR			|clear any cc bits from res_func
   1475|
   1476| Return ETEMP to extended format from internal extended format so
   1477| that gen_except will have a correctly signed value for ovfl/unfl
   1478| handlers.
   1479|
   1480	bfclr	ETEMP_SGN(%a6){#0:#8}
   1481	beqs	mvouti_con
   1482	bsetb	#sign_bit,ETEMP_EX(%a6)
   1483mvouti_con:
   1484	rts
   1485|
   1486| li is used to handle a long integer source specifier
   1487|
   1488
   1489li:
   1490	moveql	#4,%d0		|set byte count
   1491
   1492	btstb	#7,STAG(%a6)	|check for extended denorm
   1493	bne	int_dnrm	|if so, branch
   1494
   1495	fmovemx ETEMP(%a6),%fp0-%fp0
   1496	fcmpd	#0x41dfffffffc00000,%fp0
   1497| 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec
   1498	fbge	lo_plrg
   1499	fcmpd	#0xc1e0000000000000,%fp0
   1500| c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec
   1501	fble	lo_nlrg
   1502|
   1503| at this point, the answer is between the largest pos and neg values
   1504|
   1505	movel	USER_FPCR(%a6),%d1	|use user's rounding mode
   1506	andil	#0x30,%d1
   1507	fmovel	%d1,%fpcr
   1508	fmovel	%fp0,L_SCR1(%a6)	|let the 040 perform conversion
   1509	fmovel %fpsr,%d1
   1510	orl	%d1,USER_FPSR(%a6)	|capture inex2/ainex if set
   1511	bra	int_wrt
   1512
   1513
   1514lo_plrg:
   1515	movel	#0x7fffffff,L_SCR1(%a6)	|answer is largest positive int
   1516	fbeq	int_wrt			|exact answer
   1517	fcmpd	#0x41dfffffffe00000,%fp0
   1518| 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec
   1519	fbge	int_operr		|set operr
   1520	bra	int_inx			|set inexact
   1521
   1522lo_nlrg:
   1523	movel	#0x80000000,L_SCR1(%a6)
   1524	fbeq	int_wrt			|exact answer
   1525	fcmpd	#0xc1e0000000100000,%fp0
   1526| c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec
   1527	fblt	int_operr		|set operr
   1528	bra	int_inx			|set inexact
   1529
   1530|
   1531| wi is used to handle a word integer source specifier
   1532|
   1533
   1534wi:
   1535	moveql	#2,%d0		|set byte count
   1536
   1537	btstb	#7,STAG(%a6)	|check for extended denorm
   1538	bne	int_dnrm	|branch if so
   1539
   1540	fmovemx ETEMP(%a6),%fp0-%fp0
   1541	fcmps	#0x46fffe00,%fp0
   1542| 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec
   1543	fbge	wo_plrg
   1544	fcmps	#0xc7000000,%fp0
   1545| c7000000 in sgl prec = c00e00008000000000000000 in ext prec
   1546	fble	wo_nlrg
   1547
   1548|
   1549| at this point, the answer is between the largest pos and neg values
   1550|
   1551	movel	USER_FPCR(%a6),%d1	|use user's rounding mode
   1552	andil	#0x30,%d1
   1553	fmovel	%d1,%fpcr
   1554	fmovew	%fp0,L_SCR1(%a6)	|let the 040 perform conversion
   1555	fmovel %fpsr,%d1
   1556	orl	%d1,USER_FPSR(%a6)	|capture inex2/ainex if set
   1557	bra	int_wrt
   1558
   1559wo_plrg:
   1560	movew	#0x7fff,L_SCR1(%a6)	|answer is largest positive int
   1561	fbeq	int_wrt			|exact answer
   1562	fcmps	#0x46ffff00,%fp0
   1563| 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec
   1564	fbge	int_operr		|set operr
   1565	bra	int_inx			|set inexact
   1566
   1567wo_nlrg:
   1568	movew	#0x8000,L_SCR1(%a6)
   1569	fbeq	int_wrt			|exact answer
   1570	fcmps	#0xc7000080,%fp0
   1571| c7000080 in sgl prec = c00e00008000800000000000 in ext prec
   1572	fblt	int_operr		|set operr
   1573	bra	int_inx			|set inexact
   1574
   1575|
   1576| bi is used to handle a byte integer source specifier
   1577|
   1578
   1579bi:
   1580	moveql	#1,%d0		|set byte count
   1581
   1582	btstb	#7,STAG(%a6)	|check for extended denorm
   1583	bne	int_dnrm	|branch if so
   1584
   1585	fmovemx ETEMP(%a6),%fp0-%fp0
   1586	fcmps	#0x42fe0000,%fp0
   1587| 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec
   1588	fbge	by_plrg
   1589	fcmps	#0xc3000000,%fp0
   1590| c3000000 in sgl prec = c00600008000000000000000 in ext prec
   1591	fble	by_nlrg
   1592
   1593|
   1594| at this point, the answer is between the largest pos and neg values
   1595|
   1596	movel	USER_FPCR(%a6),%d1	|use user's rounding mode
   1597	andil	#0x30,%d1
   1598	fmovel	%d1,%fpcr
   1599	fmoveb	%fp0,L_SCR1(%a6)	|let the 040 perform conversion
   1600	fmovel %fpsr,%d1
   1601	orl	%d1,USER_FPSR(%a6)	|capture inex2/ainex if set
   1602	bra	int_wrt
   1603
   1604by_plrg:
   1605	moveb	#0x7f,L_SCR1(%a6)		|answer is largest positive int
   1606	fbeq	int_wrt			|exact answer
   1607	fcmps	#0x42ff0000,%fp0
   1608| 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec
   1609	fbge	int_operr		|set operr
   1610	bra	int_inx			|set inexact
   1611
   1612by_nlrg:
   1613	moveb	#0x80,L_SCR1(%a6)
   1614	fbeq	int_wrt			|exact answer
   1615	fcmps	#0xc3008000,%fp0
   1616| c3008000 in sgl prec = c00600008080000000000000 in ext prec
   1617	fblt	int_operr		|set operr
   1618	bra	int_inx			|set inexact
   1619
   1620|
   1621| Common integer routines
   1622|
   1623| int_drnrm---account for possible nonzero result for round up with positive
   1624| operand and round down for negative answer.  In the first case (result = 1)
   1625| byte-width (store in d0) of result must be honored.  In the second case,
   1626| -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out).
   1627
   1628int_dnrm:
   1629	movel	#0,L_SCR1(%a6)	| initialize result to 0
   1630	bfextu	FPCR_MODE(%a6){#2:#2},%d1	| d1 is the rounding mode
   1631	cmpb	#2,%d1
   1632	bmis	int_inx		| if RN or RZ, done
   1633	bnes	int_rp		| if RP, continue below
   1634	tstw	ETEMP(%a6)	| RM: store -1 in L_SCR1 if src is negative
   1635	bpls	int_inx		| otherwise result is 0
   1636	movel	#-1,L_SCR1(%a6)
   1637	bras	int_inx
   1638int_rp:
   1639	tstw	ETEMP(%a6)	| RP: store +1 of proper width in L_SCR1 if
   1640|				; source is greater than 0
   1641	bmis	int_inx		| otherwise, result is 0
   1642	lea	L_SCR1(%a6),%a1	| a1 is address of L_SCR1
   1643	addal	%d0,%a1		| offset by destination width -1
   1644	subal	#1,%a1
   1645	bsetb	#0,(%a1)		| set low bit at a1 address
   1646int_inx:
   1647	oril	#inx2a_mask,USER_FPSR(%a6)
   1648	bras	int_wrt
   1649int_operr:
   1650	fmovemx %fp0-%fp0,FPTEMP(%a6)	|FPTEMP must contain the extended
   1651|				;precision source that needs to be
   1652|				;converted to integer this is required
   1653|				;if the operr exception is enabled.
   1654|				;set operr/aiop (no inex2 on int ovfl)
   1655
   1656	oril	#opaop_mask,USER_FPSR(%a6)
   1657|				;fall through to perform int_wrt
   1658int_wrt:
   1659	movel	EXC_EA(%a6),%a1	|load destination address
   1660	tstl	%a1		|check to see if it is a dest register
   1661	beqs	wrt_dn		|write data register
   1662	lea	L_SCR1(%a6),%a0	|point to supervisor source address
   1663	bsrl	mem_write
   1664	bra	mvouti_end
   1665
   1666wrt_dn:
   1667	movel	%d0,-(%sp)	|d0 currently contains the size to write
   1668	bsrl	get_fline	|get_fline returns Dn in d0
   1669	andiw	#0x7,%d0		|isolate register
   1670	movel	(%sp)+,%d1	|get size
   1671	cmpil	#4,%d1		|most frequent case
   1672	beqs	sz_long
   1673	cmpil	#2,%d1
   1674	bnes	sz_con
   1675	orl	#8,%d0		|add 'word' size to register#
   1676	bras	sz_con
   1677sz_long:
   1678	orl	#0x10,%d0		|add 'long' size to register#
   1679sz_con:
   1680	movel	%d0,%d1		|reg_dest expects size:reg in d1
   1681	bsrl	reg_dest	|load proper data register
   1682	bra	mvouti_end
   1683xp:
   1684	lea	ETEMP(%a6),%a0
   1685	bclrb	#sign_bit,LOCAL_EX(%a0)
   1686	sne	LOCAL_SGN(%a0)
   1687	btstb	#7,STAG(%a6)	|check for extended denorm
   1688	bne	xdnrm
   1689	clrl	%d0
   1690	bras	do_fp		|do normal case
   1691sgp:
   1692	lea	ETEMP(%a6),%a0
   1693	bclrb	#sign_bit,LOCAL_EX(%a0)
   1694	sne	LOCAL_SGN(%a0)
   1695	btstb	#7,STAG(%a6)	|check for extended denorm
   1696	bne	sp_catas	|branch if so
   1697	movew	LOCAL_EX(%a0),%d0
   1698	lea	sp_bnds,%a1
   1699	cmpw	(%a1),%d0
   1700	blt	sp_under
   1701	cmpw	2(%a1),%d0
   1702	bgt	sp_over
   1703	movel	#1,%d0		|set destination format to single
   1704	bras	do_fp		|do normal case
   1705dp:
   1706	lea	ETEMP(%a6),%a0
   1707	bclrb	#sign_bit,LOCAL_EX(%a0)
   1708	sne	LOCAL_SGN(%a0)
   1709
   1710	btstb	#7,STAG(%a6)	|check for extended denorm
   1711	bne	dp_catas	|branch if so
   1712
   1713	movew	LOCAL_EX(%a0),%d0
   1714	lea	dp_bnds,%a1
   1715
   1716	cmpw	(%a1),%d0
   1717	blt	dp_under
   1718	cmpw	2(%a1),%d0
   1719	bgt	dp_over
   1720
   1721	movel	#2,%d0		|set destination format to double
   1722|				;fall through to do_fp
   1723|
   1724do_fp:
   1725	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|rnd mode in d1
   1726	swap	%d0			|rnd prec in upper word
   1727	addl	%d0,%d1			|d1 has PREC/MODE info
   1728
   1729	clrl	%d0			|clear g,r,s
   1730
   1731	bsrl	round			|round
   1732
   1733	movel	%a0,%a1
   1734	movel	EXC_EA(%a6),%a0
   1735
   1736	bfextu	CMDREG1B(%a6){#3:#3},%d1	|extract destination format
   1737|					;at this point only the dest
   1738|					;formats sgl, dbl, ext are
   1739|					;possible
   1740	cmpb	#2,%d1
   1741	bgts	ddbl			|double=5, extended=2, single=1
   1742	bnes	dsgl
   1743|					;fall through to dext
   1744dext:
   1745	bsrl	dest_ext
   1746	bra	mvout_end
   1747dsgl:
   1748	bsrl	dest_sgl
   1749	bra	mvout_end
   1750ddbl:
   1751	bsrl	dest_dbl
   1752	bra	mvout_end
   1753
   1754|
   1755| Handle possible denorm or catastrophic underflow cases here
   1756|
   1757xdnrm:
   1758	bsr	set_xop		|initialize WBTEMP
   1759	bsetb	#wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15
   1760
   1761	movel	%a0,%a1
   1762	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
   1763	bsrl	dest_ext	|store to memory
   1764	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
   1765	bra	mvout_end
   1766
   1767sp_under:
   1768	bsetb	#etemp15_bit,STAG(%a6)
   1769
   1770	cmpw	4(%a1),%d0
   1771	blts	sp_catas	|catastrophic underflow case
   1772
   1773	movel	#1,%d0		|load in round precision
   1774	movel	#sgl_thresh,%d1	|load in single denorm threshold
   1775	bsrl	dpspdnrm	|expects d1 to have the proper
   1776|				;denorm threshold
   1777	bsrl	dest_sgl	|stores value to destination
   1778	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
   1779	bra	mvout_end	|exit
   1780
   1781dp_under:
   1782	bsetb	#etemp15_bit,STAG(%a6)
   1783
   1784	cmpw	4(%a1),%d0
   1785	blts	dp_catas	|catastrophic underflow case
   1786
   1787	movel	#dbl_thresh,%d1	|load in double precision threshold
   1788	movel	#2,%d0
   1789	bsrl	dpspdnrm	|expects d1 to have proper
   1790|				;denorm threshold
   1791|				;expects d0 to have round precision
   1792	bsrl	dest_dbl	|store value to destination
   1793	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
   1794	bra	mvout_end	|exit
   1795
   1796|
   1797| Handle catastrophic underflow cases here
   1798|
   1799sp_catas:
   1800| Temp fix for z bit set in unf_sub
   1801	movel	USER_FPSR(%a6),-(%a7)
   1802
   1803	movel	#1,%d0		|set round precision to sgl
   1804
   1805	bsrl	unf_sub		|a0 points to result
   1806
   1807	movel	(%a7)+,USER_FPSR(%a6)
   1808
   1809	movel	#1,%d0
   1810	subw	%d0,LOCAL_EX(%a0) |account for difference between
   1811|				;denorm/norm bias
   1812
   1813	movel	%a0,%a1		|a1 has the operand input
   1814	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
   1815
   1816	bsrl	dest_sgl	|store the result
   1817	oril	#unfinx_mask,USER_FPSR(%a6)
   1818	bra	mvout_end
   1819
   1820dp_catas:
   1821| Temp fix for z bit set in unf_sub
   1822	movel	USER_FPSR(%a6),-(%a7)
   1823
   1824	movel	#2,%d0		|set round precision to dbl
   1825	bsrl	unf_sub		|a0 points to result
   1826
   1827	movel	(%a7)+,USER_FPSR(%a6)
   1828
   1829	movel	#1,%d0
   1830	subw	%d0,LOCAL_EX(%a0) |account for difference between
   1831|				;denorm/norm bias
   1832
   1833	movel	%a0,%a1		|a1 has the operand input
   1834	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
   1835
   1836	bsrl	dest_dbl	|store the result
   1837	oril	#unfinx_mask,USER_FPSR(%a6)
   1838	bra	mvout_end
   1839
   1840|
   1841| Handle catastrophic overflow cases here
   1842|
   1843sp_over:
   1844| Temp fix for z bit set in unf_sub
   1845	movel	USER_FPSR(%a6),-(%a7)
   1846
   1847	movel	#1,%d0
   1848	leal	FP_SCR1(%a6),%a0	|use FP_SCR1 for creating result
   1849	movel	ETEMP_EX(%a6),(%a0)
   1850	movel	ETEMP_HI(%a6),4(%a0)
   1851	movel	ETEMP_LO(%a6),8(%a0)
   1852	bsrl	ovf_res
   1853
   1854	movel	(%a7)+,USER_FPSR(%a6)
   1855
   1856	movel	%a0,%a1
   1857	movel	EXC_EA(%a6),%a0
   1858	bsrl	dest_sgl
   1859	orl	#ovfinx_mask,USER_FPSR(%a6)
   1860	bra	mvout_end
   1861
   1862dp_over:
   1863| Temp fix for z bit set in ovf_res
   1864	movel	USER_FPSR(%a6),-(%a7)
   1865
   1866	movel	#2,%d0
   1867	leal	FP_SCR1(%a6),%a0	|use FP_SCR1 for creating result
   1868	movel	ETEMP_EX(%a6),(%a0)
   1869	movel	ETEMP_HI(%a6),4(%a0)
   1870	movel	ETEMP_LO(%a6),8(%a0)
   1871	bsrl	ovf_res
   1872
   1873	movel	(%a7)+,USER_FPSR(%a6)
   1874
   1875	movel	%a0,%a1
   1876	movel	EXC_EA(%a6),%a0
   1877	bsrl	dest_dbl
   1878	orl	#ovfinx_mask,USER_FPSR(%a6)
   1879	bra	mvout_end
   1880
   1881|
   1882|	DPSPDNRM
   1883|
   1884| This subroutine takes an extended normalized number and denormalizes
   1885| it to the given round precision. This subroutine also decrements
   1886| the input operand's exponent by 1 to account for the fact that
   1887| dest_sgl or dest_dbl expects a normalized number's bias.
   1888|
   1889| Input: a0  points to a normalized number in internal extended format
   1890|	 d0  is the round precision (=1 for sgl; =2 for dbl)
   1891|	 d1  is the single precision or double precision
   1892|	     denorm threshold
   1893|
   1894| Output: (In the format for dest_sgl or dest_dbl)
   1895|	 a0   points to the destination
   1896|	 a1   points to the operand
   1897|
   1898| Exceptions: Reports inexact 2 exception by setting USER_FPSR bits
   1899|
   1900dpspdnrm:
   1901	movel	%d0,-(%a7)	|save round precision
   1902	clrl	%d0		|clear initial g,r,s
   1903	bsrl	dnrm_lp		|careful with d0, it's needed by round
   1904
   1905	bfextu	FPCR_MODE(%a6){#2:#2},%d1 |get rounding mode
   1906	swap	%d1
   1907	movew	2(%a7),%d1	|set rounding precision
   1908	swap	%d1		|at this point d1 has PREC/MODE info
   1909	bsrl	round		|round result, sets the inex bit in
   1910|				;USER_FPSR if needed
   1911
   1912	movew	#1,%d0
   1913	subw	%d0,LOCAL_EX(%a0) |account for difference in denorm
   1914|				;vs norm bias
   1915
   1916	movel	%a0,%a1		|a1 has the operand input
   1917	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
   1918	addw	#4,%a7		|pop stack
   1919	rts
   1920|
   1921| SET_XOP initialized WBTEMP with the value pointed to by a0
   1922| input: a0 points to input operand in the internal extended format
   1923|
   1924set_xop:
   1925	movel	LOCAL_EX(%a0),WBTEMP_EX(%a6)
   1926	movel	LOCAL_HI(%a0),WBTEMP_HI(%a6)
   1927	movel	LOCAL_LO(%a0),WBTEMP_LO(%a6)
   1928	bfclr	WBTEMP_SGN(%a6){#0:#8}
   1929	beqs	sxop
   1930	bsetb	#sign_bit,WBTEMP_EX(%a6)
   1931sxop:
   1932	bfclr	STAG(%a6){#5:#4}	|clear wbtm66,wbtm1,wbtm0,sbit
   1933	rts
   1934|
   1935|	P_MOVE
   1936|
   1937p_movet:
   1938	.long	p_move
   1939	.long	p_movez
   1940	.long	p_movei
   1941	.long	p_moven
   1942	.long	p_move
   1943p_regd:
   1944	.long	p_dyd0
   1945	.long	p_dyd1
   1946	.long	p_dyd2
   1947	.long	p_dyd3
   1948	.long	p_dyd4
   1949	.long	p_dyd5
   1950	.long	p_dyd6
   1951	.long	p_dyd7
   1952
   1953pack_out:
   1954	leal	p_movet,%a0	|load jmp table address
   1955	movew	STAG(%a6),%d0	|get source tag
   1956	bfextu	%d0{#16:#3},%d0	|isolate source bits
   1957	movel	(%a0,%d0.w*4),%a0	|load a0 with routine label for tag
   1958	jmp	(%a0)		|go to the routine
   1959
   1960p_write:
   1961	movel	#0x0c,%d0	|get byte count
   1962	movel	EXC_EA(%a6),%a1	|get the destination address
   1963	bsr	mem_write	|write the user's destination
   1964	moveb	#0,CU_SAVEPC(%a6) |set the cu save pc to all 0's
   1965
   1966|
   1967| Also note that the dtag must be set to norm here - this is because
   1968| the 040 uses the dtag to execute the correct microcode.
   1969|
   1970        bfclr    DTAG(%a6){#0:#3}  |set dtag to norm
   1971
   1972	rts
   1973
   1974| Notes on handling of special case (zero, inf, and nan) inputs:
   1975|	1. Operr is not signalled if the k-factor is greater than 18.
   1976|	2. Per the manual, status bits are not set.
   1977|
   1978
   1979p_move:
   1980	movew	CMDREG1B(%a6),%d0
   1981	btstl	#kfact_bit,%d0	|test for dynamic k-factor
   1982	beqs	statick		|if clear, k-factor is static
   1983dynamick:
   1984	bfextu	%d0{#25:#3},%d0	|isolate register for dynamic k-factor
   1985	lea	p_regd,%a0
   1986	movel	%a0@(%d0:l:4),%a0
   1987	jmp	(%a0)
   1988statick:
   1989	andiw	#0x007f,%d0	|get k-factor
   1990	bfexts	%d0{#25:#7},%d0	|sign extend d0 for bindec
   1991	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
   1992	bsrl	bindec		|perform the convert; data at a6
   1993	leal	FP_SCR1(%a6),%a0	|load a0 with result address
   1994	bral	p_write
   1995p_movez:
   1996	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
   1997	clrw	2(%a0)		|clear lower word of exp
   1998	clrl	4(%a0)		|load second lword of ZERO
   1999	clrl	8(%a0)		|load third lword of ZERO
   2000	bra	p_write		|go write results
   2001p_movei:
   2002	fmovel	#0,%FPSR		|clear aiop
   2003	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
   2004	clrw	2(%a0)		|clear lower word of exp
   2005	bra	p_write		|go write the result
   2006p_moven:
   2007	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
   2008	clrw	2(%a0)		|clear lower word of exp
   2009	bra	p_write		|go write the result
   2010
   2011|
   2012| Routines to read the dynamic k-factor from Dn.
   2013|
   2014p_dyd0:
   2015	movel	USER_D0(%a6),%d0
   2016	bras	statick
   2017p_dyd1:
   2018	movel	USER_D1(%a6),%d0
   2019	bras	statick
   2020p_dyd2:
   2021	movel	%d2,%d0
   2022	bras	statick
   2023p_dyd3:
   2024	movel	%d3,%d0
   2025	bras	statick
   2026p_dyd4:
   2027	movel	%d4,%d0
   2028	bras	statick
   2029p_dyd5:
   2030	movel	%d5,%d0
   2031	bras	statick
   2032p_dyd6:
   2033	movel	%d6,%d0
   2034	bra	statick
   2035p_dyd7:
   2036	movel	%d7,%d0
   2037	bra	statick
   2038
   2039	|end