fp_util.S - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
fp_util.S (36342B)
      1/*
      2 * fp_util.S
      3 *
      4 * Copyright Roman Zippel, 1997.  All rights reserved.
      5 *
      6 * Redistribution and use in source and binary forms, with or without
      7 * modification, are permitted provided that the following conditions
      8 * are met:
      9 * 1. Redistributions of source code must retain the above copyright
     10 *    notice, and the entire permission notice in its entirety,
     11 *    including the disclaimer of warranties.
     12 * 2. Redistributions in binary form must reproduce the above copyright
     13 *    notice, this list of conditions and the following disclaimer in the
     14 *    documentation and/or other materials provided with the distribution.
     15 * 3. The name of the author may not be used to endorse or promote
     16 *    products derived from this software without specific prior
     17 *    written permission.
     18 *
     19 * ALTERNATIVELY, this product may be distributed under the terms of
     20 * the GNU General Public License, in which case the provisions of the GPL are
     21 * required INSTEAD OF the above restrictions.  (This clause is
     22 * necessary due to a potential bad interaction between the GPL and
     23 * the restrictions contained in a BSD-style copyright.)
     24 *
     25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
     26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     28 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
     29 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     30 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
     35 * OF THE POSSIBILITY OF SUCH DAMAGE.
     36 */
     37
     38#include "fp_emu.h"
     39
     40/*
     41 * Here are lots of conversion and normalization functions mainly
     42 * used by fp_scan.S
     43 * Note that these functions are optimized for "normal" numbers,
     44 * these are handled first and exit as fast as possible, this is
     45 * especially important for fp_normalize_ext/fp_conv_ext2ext, as
     46 * it's called very often.
     47 * The register usage is optimized for fp_scan.S and which register
     48 * is currently at that time unused, be careful if you want change
     49 * something here. %d0 and %d1 is always usable, sometimes %d2 (or
     50 * only the lower half) most function have to return the %a0
     51 * unmodified, so that the caller can immediately reuse it.
     52 */
     53
     54	.globl	fp_ill, fp_end
     55
     56	| exits from fp_scan:
     57	| illegal instruction
     58fp_ill:
     59	printf	,"fp_illegal\n"
     60	rts
     61	| completed instruction
     62fp_end:
     63	tst.l	(TASK_MM-8,%a2)
     64	jmi	1f
     65	tst.l	(TASK_MM-4,%a2)
     66	jmi	1f
     67	tst.l	(TASK_MM,%a2)
     68	jpl	2f
     691:	printf	,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM)
     702:	clr.l	%d0
     71	rts
     72
     73	.globl	fp_conv_long2ext, fp_conv_single2ext
     74	.globl	fp_conv_double2ext, fp_conv_ext2ext
     75	.globl	fp_normalize_ext, fp_normalize_double
     76	.globl	fp_normalize_single, fp_normalize_single_fast
     77	.globl	fp_conv_ext2double, fp_conv_ext2single
     78	.globl	fp_conv_ext2long, fp_conv_ext2short
     79	.globl	fp_conv_ext2byte
     80	.globl	fp_finalrounding_single, fp_finalrounding_single_fast
     81	.globl	fp_finalrounding_double
     82	.globl	fp_finalrounding, fp_finaltest, fp_final
     83
     84/*
     85 * First several conversion functions from a source operand
     86 * into the extended format. Note, that only fp_conv_ext2ext
     87 * normalizes the number and is always called after the other
     88 * conversion functions, which only move the information into
     89 * fp_ext structure.
     90 */
     91
     92	| fp_conv_long2ext:
     93	|
     94	| args:	%d0 = source (32-bit long)
     95	|	%a0 = destination (ptr to struct fp_ext)
     96
     97fp_conv_long2ext:
     98	printf	PCONV,"l2e: %p -> %p(",2,%d0,%a0
     99	clr.l	%d1			| sign defaults to zero
    100	tst.l	%d0
    101	jeq	fp_l2e_zero		| is source zero?
    102	jpl	1f			| positive?
    103	moveq	#1,%d1
    104	neg.l	%d0
    1051:	swap	%d1
    106	move.w	#0x3fff+31,%d1
    107	move.l	%d1,(%a0)+		| set sign / exp
    108	move.l	%d0,(%a0)+		| set mantissa
    109	clr.l	(%a0)
    110	subq.l	#8,%a0			| restore %a0
    111	printx	PCONV,%a0@
    112	printf	PCONV,")\n"
    113	rts
    114	| source is zero
    115fp_l2e_zero:
    116	clr.l	(%a0)+
    117	clr.l	(%a0)+
    118	clr.l	(%a0)
    119	subq.l	#8,%a0
    120	printx	PCONV,%a0@
    121	printf	PCONV,")\n"
    122	rts
    123
    124	| fp_conv_single2ext
    125	| args:	%d0 = source (single-precision fp value)
    126	|	%a0 = dest (struct fp_ext *)
    127
    128fp_conv_single2ext:
    129	printf	PCONV,"s2e: %p -> %p(",2,%d0,%a0
    130	move.l	%d0,%d1
    131	lsl.l	#8,%d0			| shift mantissa
    132	lsr.l	#8,%d1			| exponent / sign
    133	lsr.l	#7,%d1
    134	lsr.w	#8,%d1
    135	jeq	fp_s2e_small		| zero / denormal?
    136	cmp.w	#0xff,%d1		| NaN / Inf?
    137	jeq	fp_s2e_large
    138	bset	#31,%d0			| set explizit bit
    139	add.w	#0x3fff-0x7f,%d1	| re-bias the exponent.
    1409:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
    141	move.l	%d0,(%a0)+		| high lword of fp_ext.mant
    142	clr.l	(%a0)			| low lword = 0
    143	subq.l	#8,%a0
    144	printx	PCONV,%a0@
    145	printf	PCONV,")\n"
    146	rts
    147	| zeros and denormalized
    148fp_s2e_small:
    149	| exponent is zero, so explizit bit is already zero too
    150	tst.l	%d0
    151	jeq	9b
    152	move.w	#0x4000-0x7f,%d1
    153	jra	9b
    154	| infinities and NAN
    155fp_s2e_large:
    156	bclr	#31,%d0			| clear explizit bit
    157	move.w	#0x7fff,%d1
    158	jra	9b
    159
    160fp_conv_double2ext:
    161#ifdef FPU_EMU_DEBUG
    162	getuser.l %a1@(0),%d0,fp_err_ua2,%a1
    163	getuser.l %a1@(4),%d1,fp_err_ua2,%a1
    164	printf	PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0
    165#endif
    166	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
    167	move.l	%d0,%d1
    168	lsl.l	#8,%d0			| shift high mantissa
    169	lsl.l	#3,%d0
    170	lsr.l	#8,%d1			| exponent / sign
    171	lsr.l	#7,%d1
    172	lsr.w	#5,%d1
    173	jeq	fp_d2e_small		| zero / denormal?
    174	cmp.w	#0x7ff,%d1		| NaN / Inf?
    175	jeq	fp_d2e_large
    176	bset	#31,%d0			| set explizit bit
    177	add.w	#0x3fff-0x3ff,%d1	| re-bias the exponent.
    1789:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
    179	move.l	%d0,(%a0)+
    180	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
    181	move.l	%d0,%d1
    182	lsl.l	#8,%d0
    183	lsl.l	#3,%d0
    184	move.l	%d0,(%a0)
    185	moveq	#21,%d0
    186	lsr.l	%d0,%d1
    187	or.l	%d1,-(%a0)
    188	subq.l	#4,%a0
    189	printx	PCONV,%a0@
    190	printf	PCONV,")\n"
    191	rts
    192	| zeros and denormalized
    193fp_d2e_small:
    194	| exponent is zero, so explizit bit is already zero too
    195	tst.l	%d0
    196	jeq	9b
    197	move.w	#0x4000-0x3ff,%d1
    198	jra	9b
    199	| infinities and NAN
    200fp_d2e_large:
    201	bclr	#31,%d0			| clear explizit bit
    202	move.w	#0x7fff,%d1
    203	jra	9b
    204
    205	| fp_conv_ext2ext:
    206	| originally used to get longdouble from userspace, now it's
    207	| called before arithmetic operations to make sure the number
    208	| is normalized [maybe rename it?].
    209	| args:	%a0 = dest (struct fp_ext *)
    210	| returns 0 in %d0 for a NaN, otherwise 1
    211
    212fp_conv_ext2ext:
    213	printf	PCONV,"e2e: %p(",1,%a0
    214	printx	PCONV,%a0@
    215	printf	PCONV,"), "
    216	move.l	(%a0)+,%d0
    217	cmp.w	#0x7fff,%d0		| Inf / NaN?
    218	jeq	fp_e2e_large
    219	move.l	(%a0),%d0
    220	jpl	fp_e2e_small		| zero / denorm?
    221	| The high bit is set, so normalization is irrelevant.
    222fp_e2e_checkround:
    223	subq.l	#4,%a0
    224#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
    225	move.b	(%a0),%d0
    226	jne	fp_e2e_round
    227#endif
    228	printf	PCONV,"%p(",1,%a0
    229	printx	PCONV,%a0@
    230	printf	PCONV,")\n"
    231	moveq	#1,%d0
    232	rts
    233#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
    234fp_e2e_round:
    235	fp_set_sr FPSR_EXC_INEX2
    236	clr.b	(%a0)
    237	move.w	(FPD_RND,FPDATA),%d2
    238	jne	fp_e2e_roundother	| %d2 == 0, round to nearest
    239	tst.b	%d0			| test guard bit
    240	jpl	9f			| zero is closer
    241	btst	#0,(11,%a0)		| test lsb bit
    242	jne	fp_e2e_doroundup	| round to infinity
    243	lsl.b	#1,%d0			| check low bits
    244	jeq	9f			| round to zero
    245fp_e2e_doroundup:
    246	addq.l	#1,(8,%a0)
    247	jcc	9f
    248	addq.l	#1,(4,%a0)
    249	jcc	9f
    250	move.w	#0x8000,(4,%a0)
    251	addq.w	#1,(2,%a0)
    2529:	printf	PNORM,"%p(",1,%a0
    253	printx	PNORM,%a0@
    254	printf	PNORM,")\n"
    255	rts
    256fp_e2e_roundother:
    257	subq.w	#2,%d2
    258	jcs	9b			| %d2 < 2, round to zero
    259	jhi	1f			| %d2 > 2, round to +infinity
    260	tst.b	(1,%a0)			| to -inf
    261	jne	fp_e2e_doroundup	| negative, round to infinity
    262	jra	9b			| positive, round to zero
    2631:	tst.b	(1,%a0)			| to +inf
    264	jeq	fp_e2e_doroundup	| positive, round to infinity
    265	jra	9b			| negative, round to zero
    266#endif
    267	| zeros and subnormals:
    268	| try to normalize these anyway.
    269fp_e2e_small:
    270	jne	fp_e2e_small1		| high lword zero?
    271	move.l	(4,%a0),%d0
    272	jne	fp_e2e_small2
    273#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
    274	clr.l	%d0
    275	move.b	(-4,%a0),%d0
    276	jne	fp_e2e_small3
    277#endif
    278	| Genuine zero.
    279	clr.w	-(%a0)
    280	subq.l	#2,%a0
    281	printf	PNORM,"%p(",1,%a0
    282	printx	PNORM,%a0@
    283	printf	PNORM,")\n"
    284	moveq	#1,%d0
    285	rts
    286	| definitely subnormal, need to shift all 64 bits
    287fp_e2e_small1:
    288	bfffo	%d0{#0,#32},%d1
    289	move.w	-(%a0),%d2
    290	sub.w	%d1,%d2
    291	jcc	1f
    292	| Pathologically small, denormalize.
    293	add.w	%d2,%d1
    294	clr.w	%d2
    2951:	move.w	%d2,(%a0)+
    296	move.w	%d1,%d2
    297	jeq	fp_e2e_checkround
    298	| fancy 64-bit double-shift begins here
    299	lsl.l	%d2,%d0
    300	move.l	%d0,(%a0)+
    301	move.l	(%a0),%d0
    302	move.l	%d0,%d1
    303	lsl.l	%d2,%d0
    304	move.l	%d0,(%a0)
    305	neg.w	%d2
    306	and.w	#0x1f,%d2
    307	lsr.l	%d2,%d1
    308	or.l	%d1,-(%a0)
    309#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
    310fp_e2e_extra1:
    311	clr.l	%d0
    312	move.b	(-4,%a0),%d0
    313	neg.w	%d2
    314	add.w	#24,%d2
    315	jcc	1f
    316	clr.b	(-4,%a0)
    317	lsl.l	%d2,%d0
    318	or.l	%d0,(4,%a0)
    319	jra	fp_e2e_checkround
    3201:	addq.w	#8,%d2
    321	lsl.l	%d2,%d0
    322	move.b	%d0,(-4,%a0)
    323	lsr.l	#8,%d0
    324	or.l	%d0,(4,%a0)
    325#endif
    326	jra	fp_e2e_checkround
    327	| pathologically small subnormal
    328fp_e2e_small2:
    329	bfffo	%d0{#0,#32},%d1
    330	add.w	#32,%d1
    331	move.w	-(%a0),%d2
    332	sub.w	%d1,%d2
    333	jcc	1f
    334	| Beyond pathologically small, denormalize.
    335	add.w	%d2,%d1
    336	clr.w	%d2
    3371:	move.w	%d2,(%a0)+
    338	ext.l	%d1
    339	jeq	fp_e2e_checkround
    340	clr.l	(4,%a0)
    341	sub.w	#32,%d2
    342	jcs	1f
    343	lsl.l	%d1,%d0			| lower lword needs only to be shifted
    344	move.l	%d0,(%a0)		| into the higher lword
    345#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
    346	clr.l	%d0
    347	move.b	(-4,%a0),%d0
    348	clr.b	(-4,%a0)
    349	neg.w	%d1
    350	add.w	#32,%d1
    351	bfins	%d0,(%a0){%d1,#8}
    352#endif
    353	jra	fp_e2e_checkround
    3541:	neg.w	%d1			| lower lword is splitted between
    355	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
    356#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
    357	jra	fp_e2e_checkround
    358#else
    359	move.w	%d1,%d2
    360	jra	fp_e2e_extra1
    361	| These are extremely small numbers, that will mostly end up as zero
    362	| anyway, so this is only important for correct rounding.
    363fp_e2e_small3:
    364	bfffo	%d0{#24,#8},%d1
    365	add.w	#40,%d1
    366	move.w	-(%a0),%d2
    367	sub.w	%d1,%d2
    368	jcc	1f
    369	| Pathologically small, denormalize.
    370	add.w	%d2,%d1
    371	clr.w	%d2
    3721:	move.w	%d2,(%a0)+
    373	ext.l	%d1
    374	jeq	fp_e2e_checkround
    375	cmp.w	#8,%d1
    376	jcs	2f
    3771:	clr.b	(-4,%a0)
    378	sub.w	#64,%d1
    379	jcs	1f
    380	add.w	#24,%d1
    381	lsl.l	%d1,%d0
    382	move.l	%d0,(%a0)
    383	jra	fp_e2e_checkround
    3841:	neg.w	%d1
    385	bfins	%d0,(%a0){%d1,#8}
    386	jra	fp_e2e_checkround
    3872:	lsl.l	%d1,%d0
    388	move.b	%d0,(-4,%a0)
    389	lsr.l	#8,%d0
    390	move.b	%d0,(7,%a0)
    391	jra	fp_e2e_checkround
    392#endif
    3931:	move.l	%d0,%d1			| lower lword is splitted between
    394	lsl.l	%d2,%d0			| higher and lower lword
    395	move.l	%d0,(%a0)
    396	move.l	%d1,%d0
    397	neg.w	%d2
    398	add.w	#32,%d2
    399	lsr.l	%d2,%d0
    400	move.l	%d0,-(%a0)
    401	jra	fp_e2e_checkround
    402	| Infinities and NaNs
    403fp_e2e_large:
    404	move.l	(%a0)+,%d0
    405	jne	3f
    4061:	tst.l	(%a0)
    407	jne	4f
    408	moveq	#1,%d0
    4092:	subq.l	#8,%a0
    410	printf	PCONV,"%p(",1,%a0
    411	printx	PCONV,%a0@
    412	printf	PCONV,")\n"
    413	rts
    414	| we have maybe a NaN, shift off the highest bit
    4153:	lsl.l	#1,%d0
    416	jeq	1b
    417	| we have a NaN, clear the return value
    4184:	clrl	%d0
    419	jra	2b
    420
    421
    422/*
    423 * Normalization functions.  Call these on the output of general
    424 * FP operators, and before any conversion into the destination
    425 * formats. fp_normalize_ext has always to be called first, the
    426 * following conversion functions expect an already normalized
    427 * number.
    428 */
    429
    430	| fp_normalize_ext:
    431	| normalize an extended in extended (unpacked) format, basically
    432	| it does the same as fp_conv_ext2ext, additionally it also does
    433	| the necessary postprocessing checks.
    434	| args:	%a0 (struct fp_ext *)
    435	| NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2
    436
    437fp_normalize_ext:
    438	printf	PNORM,"ne: %p(",1,%a0
    439	printx	PNORM,%a0@
    440	printf	PNORM,"), "
    441	move.l	(%a0)+,%d0
    442	cmp.w	#0x7fff,%d0		| Inf / NaN?
    443	jeq	fp_ne_large
    444	move.l	(%a0),%d0
    445	jpl	fp_ne_small		| zero / denorm?
    446	| The high bit is set, so normalization is irrelevant.
    447fp_ne_checkround:
    448	subq.l	#4,%a0
    449#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
    450	move.b	(%a0),%d0
    451	jne	fp_ne_round
    452#endif
    453	printf	PNORM,"%p(",1,%a0
    454	printx	PNORM,%a0@
    455	printf	PNORM,")\n"
    456	rts
    457#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
    458fp_ne_round:
    459	fp_set_sr FPSR_EXC_INEX2
    460	clr.b	(%a0)
    461	move.w	(FPD_RND,FPDATA),%d2
    462	jne	fp_ne_roundother	| %d2 == 0, round to nearest
    463	tst.b	%d0			| test guard bit
    464	jpl	9f			| zero is closer
    465	btst	#0,(11,%a0)		| test lsb bit
    466	jne	fp_ne_doroundup		| round to infinity
    467	lsl.b	#1,%d0			| check low bits
    468	jeq	9f			| round to zero
    469fp_ne_doroundup:
    470	addq.l	#1,(8,%a0)
    471	jcc	9f
    472	addq.l	#1,(4,%a0)
    473	jcc	9f
    474	addq.w	#1,(2,%a0)
    475	move.w	#0x8000,(4,%a0)
    4769:	printf	PNORM,"%p(",1,%a0
    477	printx	PNORM,%a0@
    478	printf	PNORM,")\n"
    479	rts
    480fp_ne_roundother:
    481	subq.w	#2,%d2
    482	jcs	9b			| %d2 < 2, round to zero
    483	jhi	1f			| %d2 > 2, round to +infinity
    484	tst.b	(1,%a0)			| to -inf
    485	jne	fp_ne_doroundup		| negative, round to infinity
    486	jra	9b			| positive, round to zero
    4871:	tst.b	(1,%a0)			| to +inf
    488	jeq	fp_ne_doroundup		| positive, round to infinity
    489	jra	9b			| negative, round to zero
    490#endif
    491	| Zeros and subnormal numbers
    492	| These are probably merely subnormal, rather than "denormalized"
    493	|  numbers, so we will try to make them normal again.
    494fp_ne_small:
    495	jne	fp_ne_small1		| high lword zero?
    496	move.l	(4,%a0),%d0
    497	jne	fp_ne_small2
    498#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
    499	clr.l	%d0
    500	move.b	(-4,%a0),%d0
    501	jne	fp_ne_small3
    502#endif
    503	| Genuine zero.
    504	clr.w	-(%a0)
    505	subq.l	#2,%a0
    506	printf	PNORM,"%p(",1,%a0
    507	printx	PNORM,%a0@
    508	printf	PNORM,")\n"
    509	rts
    510	| Subnormal.
    511fp_ne_small1:
    512	bfffo	%d0{#0,#32},%d1
    513	move.w	-(%a0),%d2
    514	sub.w	%d1,%d2
    515	jcc	1f
    516	| Pathologically small, denormalize.
    517	add.w	%d2,%d1
    518	clr.w	%d2
    519	fp_set_sr FPSR_EXC_UNFL
    5201:	move.w	%d2,(%a0)+
    521	move.w	%d1,%d2
    522	jeq	fp_ne_checkround
    523	| This is exactly the same 64-bit double shift as seen above.
    524	lsl.l	%d2,%d0
    525	move.l	%d0,(%a0)+
    526	move.l	(%a0),%d0
    527	move.l	%d0,%d1
    528	lsl.l	%d2,%d0
    529	move.l	%d0,(%a0)
    530	neg.w	%d2
    531	and.w	#0x1f,%d2
    532	lsr.l	%d2,%d1
    533	or.l	%d1,-(%a0)
    534#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
    535fp_ne_extra1:
    536	clr.l	%d0
    537	move.b	(-4,%a0),%d0
    538	neg.w	%d2
    539	add.w	#24,%d2
    540	jcc	1f
    541	clr.b	(-4,%a0)
    542	lsl.l	%d2,%d0
    543	or.l	%d0,(4,%a0)
    544	jra	fp_ne_checkround
    5451:	addq.w	#8,%d2
    546	lsl.l	%d2,%d0
    547	move.b	%d0,(-4,%a0)
    548	lsr.l	#8,%d0
    549	or.l	%d0,(4,%a0)
    550#endif
    551	jra	fp_ne_checkround
    552	| May or may not be subnormal, if so, only 32 bits to shift.
    553fp_ne_small2:
    554	bfffo	%d0{#0,#32},%d1
    555	add.w	#32,%d1
    556	move.w	-(%a0),%d2
    557	sub.w	%d1,%d2
    558	jcc	1f
    559	| Beyond pathologically small, denormalize.
    560	add.w	%d2,%d1
    561	clr.w	%d2
    562	fp_set_sr FPSR_EXC_UNFL
    5631:	move.w	%d2,(%a0)+
    564	ext.l	%d1
    565	jeq	fp_ne_checkround
    566	clr.l	(4,%a0)
    567	sub.w	#32,%d1
    568	jcs	1f
    569	lsl.l	%d1,%d0			| lower lword needs only to be shifted
    570	move.l	%d0,(%a0)		| into the higher lword
    571#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
    572	clr.l	%d0
    573	move.b	(-4,%a0),%d0
    574	clr.b	(-4,%a0)
    575	neg.w	%d1
    576	add.w	#32,%d1
    577	bfins	%d0,(%a0){%d1,#8}
    578#endif
    579	jra	fp_ne_checkround
    5801:	neg.w	%d1			| lower lword is splitted between
    581	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
    582#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
    583	jra	fp_ne_checkround
    584#else
    585	move.w	%d1,%d2
    586	jra	fp_ne_extra1
    587	| These are extremely small numbers, that will mostly end up as zero
    588	| anyway, so this is only important for correct rounding.
    589fp_ne_small3:
    590	bfffo	%d0{#24,#8},%d1
    591	add.w	#40,%d1
    592	move.w	-(%a0),%d2
    593	sub.w	%d1,%d2
    594	jcc	1f
    595	| Pathologically small, denormalize.
    596	add.w	%d2,%d1
    597	clr.w	%d2
    5981:	move.w	%d2,(%a0)+
    599	ext.l	%d1
    600	jeq	fp_ne_checkround
    601	cmp.w	#8,%d1
    602	jcs	2f
    6031:	clr.b	(-4,%a0)
    604	sub.w	#64,%d1
    605	jcs	1f
    606	add.w	#24,%d1
    607	lsl.l	%d1,%d0
    608	move.l	%d0,(%a0)
    609	jra	fp_ne_checkround
    6101:	neg.w	%d1
    611	bfins	%d0,(%a0){%d1,#8}
    612	jra	fp_ne_checkround
    6132:	lsl.l	%d1,%d0
    614	move.b	%d0,(-4,%a0)
    615	lsr.l	#8,%d0
    616	move.b	%d0,(7,%a0)
    617	jra	fp_ne_checkround
    618#endif
    619	| Infinities and NaNs, again, same as above.
    620fp_ne_large:
    621	move.l	(%a0)+,%d0
    622	jne	3f
    6231:	tst.l	(%a0)
    624	jne	4f
    6252:	subq.l	#8,%a0
    626	printf	PNORM,"%p(",1,%a0
    627	printx	PNORM,%a0@
    628	printf	PNORM,")\n"
    629	rts
    630	| we have maybe a NaN, shift off the highest bit
    6313:	move.l	%d0,%d1
    632	lsl.l	#1,%d1
    633	jne	4f
    634	clr.l	(-4,%a0)
    635	jra	1b
    636	| we have a NaN, test if it is signaling
    6374:	bset	#30,%d0
    638	jne	2b
    639	fp_set_sr FPSR_EXC_SNAN
    640	move.l	%d0,(-4,%a0)
    641	jra	2b
    642
    643	| these next two do rounding as per the IEEE standard.
    644	| values for the rounding modes appear to be:
    645	| 0:	Round to nearest
    646	| 1:	Round to zero
    647	| 2:	Round to -Infinity
    648	| 3:	Round to +Infinity
    649	| both functions expect that fp_normalize was already
    650	| called (and extended argument is already normalized
    651	| as far as possible), these are used if there is different
    652	| rounding precision is selected and before converting
    653	| into single/double
    654
    655	| fp_normalize_double:
    656	| normalize an extended with double (52-bit) precision
    657	| args:	 %a0 (struct fp_ext *)
    658
    659fp_normalize_double:
    660	printf	PNORM,"nd: %p(",1,%a0
    661	printx	PNORM,%a0@
    662	printf	PNORM,"), "
    663	move.l	(%a0)+,%d2
    664	tst.w	%d2
    665	jeq	fp_nd_zero		| zero / denormalized
    666	cmp.w	#0x7fff,%d2
    667	jeq	fp_nd_huge		| NaN / infinitive.
    668	sub.w	#0x4000-0x3ff,%d2	| will the exponent fit?
    669	jcs	fp_nd_small		| too small.
    670	cmp.w	#0x7fe,%d2
    671	jcc	fp_nd_large		| too big.
    672	addq.l	#4,%a0
    673	move.l	(%a0),%d0		| low lword of mantissa
    674	| now, round off the low 11 bits.
    675fp_nd_round:
    676	moveq	#21,%d1
    677	lsl.l	%d1,%d0			| keep 11 low bits.
    678	jne	fp_nd_checkround	| Are they non-zero?
    679	| nothing to do here
    6809:	subq.l	#8,%a0
    681	printf	PNORM,"%p(",1,%a0
    682	printx	PNORM,%a0@
    683	printf	PNORM,")\n"
    684	rts
    685	| Be careful with the X bit! It contains the lsb
    686	| from the shift above, it is needed for round to nearest.
    687fp_nd_checkround:
    688	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
    689	and.w	#0xf800,(2,%a0)		| clear bits 0-10
    690	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
    691	jne	2f			| %d2 == 0, round to nearest
    692	tst.l	%d0			| test guard bit
    693	jpl	9b			| zero is closer
    694	| here we test the X bit by adding it to %d2
    695	clr.w	%d2			| first set z bit, addx only clears it
    696	addx.w	%d2,%d2			| test lsb bit
    697	| IEEE754-specified "round to even" behaviour.  If the guard
    698	| bit is set, then the number is odd, so rounding works like
    699	| in grade-school arithmetic (i.e. 1.5 rounds to 2.0)
    700	| Otherwise, an equal distance rounds towards zero, so as not
    701	| to produce an odd number.  This is strange, but it is what
    702	| the standard says.
    703	jne	fp_nd_doroundup		| round to infinity
    704	lsl.l	#1,%d0			| check low bits
    705	jeq	9b			| round to zero
    706fp_nd_doroundup:
    707	| round (the mantissa, that is) towards infinity
    708	add.l	#0x800,(%a0)
    709	jcc	9b			| no overflow, good.
    710	addq.l	#1,-(%a0)		| extend to high lword
    711	jcc	1f			| no overflow, good.
    712	| Yow! we have managed to overflow the mantissa.  Since this
    713	| only happens when %d1 was 0xfffff800, it is now zero, so
    714	| reset the high bit, and increment the exponent.
    715	move.w	#0x8000,(%a0)
    716	addq.w	#1,-(%a0)
    717	cmp.w	#0x43ff,(%a0)+		| exponent now overflown?
    718	jeq	fp_nd_large		| yes, so make it infinity.
    7191:	subq.l	#4,%a0
    720	printf	PNORM,"%p(",1,%a0
    721	printx	PNORM,%a0@
    722	printf	PNORM,")\n"
    723	rts
    7242:	subq.w	#2,%d2
    725	jcs	9b			| %d2 < 2, round to zero
    726	jhi	3f			| %d2 > 2, round to +infinity
    727	| Round to +Inf or -Inf.  High word of %d2 contains the
    728	| sign of the number, by the way.
    729	swap	%d2			| to -inf
    730	tst.b	%d2
    731	jne	fp_nd_doroundup		| negative, round to infinity
    732	jra	9b			| positive, round to zero
    7333:	swap	%d2			| to +inf
    734	tst.b	%d2
    735	jeq	fp_nd_doroundup		| positive, round to infinity
    736	jra	9b			| negative, round to zero
    737	| Exponent underflow.  Try to make a denormal, and set it to
    738	| the smallest possible fraction if this fails.
    739fp_nd_small:
    740	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
    741	move.w	#0x3c01,(-2,%a0)	| 2**-1022
    742	neg.w	%d2			| degree of underflow
    743	cmp.w	#32,%d2			| single or double shift?
    744	jcc	1f
    745	| Again, another 64-bit double shift.
    746	move.l	(%a0),%d0
    747	move.l	%d0,%d1
    748	lsr.l	%d2,%d0
    749	move.l	%d0,(%a0)+
    750	move.l	(%a0),%d0
    751	lsr.l	%d2,%d0
    752	neg.w	%d2
    753	add.w	#32,%d2
    754	lsl.l	%d2,%d1
    755	or.l	%d1,%d0
    756	move.l	(%a0),%d1
    757	move.l	%d0,(%a0)
    758	| Check to see if we shifted off any significant bits
    759	lsl.l	%d2,%d1
    760	jeq	fp_nd_round		| Nope, round.
    761	bset	#0,%d0			| Yes, so set the "sticky bit".
    762	jra	fp_nd_round		| Now, round.
    763	| Another 64-bit single shift and store
    7641:	sub.w	#32,%d2
    765	cmp.w	#32,%d2			| Do we really need to shift?
    766	jcc	2f			| No, the number is too small.
    767	move.l	(%a0),%d0
    768	clr.l	(%a0)+
    769	move.l	%d0,%d1
    770	lsr.l	%d2,%d0
    771	neg.w	%d2
    772	add.w	#32,%d2
    773	| Again, check to see if we shifted off any significant bits.
    774	tst.l	(%a0)
    775	jeq	1f
    776	bset	#0,%d0			| Sticky bit.
    7771:	move.l	%d0,(%a0)
    778	lsl.l	%d2,%d1
    779	jeq	fp_nd_round
    780	bset	#0,%d0
    781	jra	fp_nd_round
    782	| Sorry, the number is just too small.
    7832:	clr.l	(%a0)+
    784	clr.l	(%a0)
    785	moveq	#1,%d0			| Smallest possible fraction,
    786	jra	fp_nd_round		| round as desired.
    787	| zero and denormalized
    788fp_nd_zero:
    789	tst.l	(%a0)+
    790	jne	1f
    791	tst.l	(%a0)
    792	jne	1f
    793	subq.l	#8,%a0
    794	printf	PNORM,"%p(",1,%a0
    795	printx	PNORM,%a0@
    796	printf	PNORM,")\n"
    797	rts				| zero.  nothing to do.
    798	| These are not merely subnormal numbers, but true denormals,
    799	| i.e. pathologically small (exponent is 2**-16383) numbers.
    800	| It is clearly impossible for even a normal extended number
    801	| with that exponent to fit into double precision, so just
    802	| write these ones off as "too darn small".
    8031:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
    804	clr.l	(%a0)
    805	clr.l	-(%a0)
    806	move.w	#0x3c01,-(%a0)		| i.e. 2**-1022
    807	addq.l	#6,%a0
    808	moveq	#1,%d0
    809	jra	fp_nd_round		| round.
    810	| Exponent overflow.  Just call it infinity.
    811fp_nd_large:
    812	move.w	#0x7ff,%d0
    813	and.w	(6,%a0),%d0
    814	jeq	1f
    815	fp_set_sr FPSR_EXC_INEX2
    8161:	fp_set_sr FPSR_EXC_OVFL
    817	move.w	(FPD_RND,FPDATA),%d2
    818	jne	3f			| %d2 = 0 round to nearest
    8191:	move.w	#0x7fff,(-2,%a0)
    820	clr.l	(%a0)+
    821	clr.l	(%a0)
    8222:	subq.l	#8,%a0
    823	printf	PNORM,"%p(",1,%a0
    824	printx	PNORM,%a0@
    825	printf	PNORM,")\n"
    826	rts
    8273:	subq.w	#2,%d2
    828	jcs	5f			| %d2 < 2, round to zero
    829	jhi	4f			| %d2 > 2, round to +infinity
    830	tst.b	(-3,%a0)		| to -inf
    831	jne	1b
    832	jra	5f
    8334:	tst.b	(-3,%a0)		| to +inf
    834	jeq	1b
    8355:	move.w	#0x43fe,(-2,%a0)
    836	moveq	#-1,%d0
    837	move.l	%d0,(%a0)+
    838	move.w	#0xf800,%d0
    839	move.l	%d0,(%a0)
    840	jra	2b
    841	| Infinities or NaNs
    842fp_nd_huge:
    843	subq.l	#4,%a0
    844	printf	PNORM,"%p(",1,%a0
    845	printx	PNORM,%a0@
    846	printf	PNORM,")\n"
    847	rts
    848
    849	| fp_normalize_single:
    850	| normalize an extended with single (23-bit) precision
    851	| args:	 %a0 (struct fp_ext *)
    852
    853fp_normalize_single:
    854	printf	PNORM,"ns: %p(",1,%a0
    855	printx	PNORM,%a0@
    856	printf	PNORM,") "
    857	addq.l	#2,%a0
    858	move.w	(%a0)+,%d2
    859	jeq	fp_ns_zero		| zero / denormalized
    860	cmp.w	#0x7fff,%d2
    861	jeq	fp_ns_huge		| NaN / infinitive.
    862	sub.w	#0x4000-0x7f,%d2	| will the exponent fit?
    863	jcs	fp_ns_small		| too small.
    864	cmp.w	#0xfe,%d2
    865	jcc	fp_ns_large		| too big.
    866	move.l	(%a0)+,%d0		| get high lword of mantissa
    867fp_ns_round:
    868	tst.l	(%a0)			| check the low lword
    869	jeq	1f
    870	| Set a sticky bit if it is non-zero.  This should only
    871	| affect the rounding in what would otherwise be equal-
    872	| distance situations, which is what we want it to do.
    873	bset	#0,%d0
    8741:	clr.l	(%a0)			| zap it from memory.
    875	| now, round off the low 8 bits of the hi lword.
    876	tst.b	%d0			| 8 low bits.
    877	jne	fp_ns_checkround	| Are they non-zero?
    878	| nothing to do here
    879	subq.l	#8,%a0
    880	printf	PNORM,"%p(",1,%a0
    881	printx	PNORM,%a0@
    882	printf	PNORM,")\n"
    883	rts
    884fp_ns_checkround:
    885	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
    886	clr.b	-(%a0)			| clear low byte of high lword
    887	subq.l	#3,%a0
    888	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
    889	jne	2f			| %d2 == 0, round to nearest
    890	tst.b	%d0			| test guard bit
    891	jpl	9f			| zero is closer
    892	btst	#8,%d0			| test lsb bit
    893	| round to even behaviour, see above.
    894	jne	fp_ns_doroundup		| round to infinity
    895	lsl.b	#1,%d0			| check low bits
    896	jeq	9f			| round to zero
    897fp_ns_doroundup:
    898	| round (the mantissa, that is) towards infinity
    899	add.l	#0x100,(%a0)
    900	jcc	9f			| no overflow, good.
    901	| Overflow.  This means that the %d1 was 0xffffff00, so it
    902	| is now zero.  We will set the mantissa to reflect this, and
    903	| increment the exponent (checking for overflow there too)
    904	move.w	#0x8000,(%a0)
    905	addq.w	#1,-(%a0)
    906	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
    907	jeq	fp_ns_large		| yes, so make it infinity.
    9089:	subq.l	#4,%a0
    909	printf	PNORM,"%p(",1,%a0
    910	printx	PNORM,%a0@
    911	printf	PNORM,")\n"
    912	rts
    913	| check nondefault rounding modes
    9142:	subq.w	#2,%d2
    915	jcs	9b			| %d2 < 2, round to zero
    916	jhi	3f			| %d2 > 2, round to +infinity
    917	tst.b	(-3,%a0)		| to -inf
    918	jne	fp_ns_doroundup		| negative, round to infinity
    919	jra	9b			| positive, round to zero
    9203:	tst.b	(-3,%a0)		| to +inf
    921	jeq	fp_ns_doroundup		| positive, round to infinity
    922	jra	9b			| negative, round to zero
    923	| Exponent underflow.  Try to make a denormal, and set it to
    924	| the smallest possible fraction if this fails.
    925fp_ns_small:
    926	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
    927	move.w	#0x3f81,(-2,%a0)	| 2**-126
    928	neg.w	%d2			| degree of underflow
    929	cmp.w	#32,%d2			| single or double shift?
    930	jcc	2f
    931	| a 32-bit shift.
    932	move.l	(%a0),%d0
    933	move.l	%d0,%d1
    934	lsr.l	%d2,%d0
    935	move.l	%d0,(%a0)+
    936	| Check to see if we shifted off any significant bits.
    937	neg.w	%d2
    938	add.w	#32,%d2
    939	lsl.l	%d2,%d1
    940	jeq	1f
    941	bset	#0,%d0			| Sticky bit.
    942	| Check the lower lword
    9431:	tst.l	(%a0)
    944	jeq	fp_ns_round
    945	clr	(%a0)
    946	bset	#0,%d0			| Sticky bit.
    947	jra	fp_ns_round
    948	| Sorry, the number is just too small.
    9492:	clr.l	(%a0)+
    950	clr.l	(%a0)
    951	moveq	#1,%d0			| Smallest possible fraction,
    952	jra	fp_ns_round		| round as desired.
    953	| Exponent overflow.  Just call it infinity.
    954fp_ns_large:
    955	tst.b	(3,%a0)
    956	jeq	1f
    957	fp_set_sr FPSR_EXC_INEX2
    9581:	fp_set_sr FPSR_EXC_OVFL
    959	move.w	(FPD_RND,FPDATA),%d2
    960	jne	3f			| %d2 = 0 round to nearest
    9611:	move.w	#0x7fff,(-2,%a0)
    962	clr.l	(%a0)+
    963	clr.l	(%a0)
    9642:	subq.l	#8,%a0
    965	printf	PNORM,"%p(",1,%a0
    966	printx	PNORM,%a0@
    967	printf	PNORM,")\n"
    968	rts
    9693:	subq.w	#2,%d2
    970	jcs	5f			| %d2 < 2, round to zero
    971	jhi	4f			| %d2 > 2, round to +infinity
    972	tst.b	(-3,%a0)		| to -inf
    973	jne	1b
    974	jra	5f
    9754:	tst.b	(-3,%a0)		| to +inf
    976	jeq	1b
    9775:	move.w	#0x407e,(-2,%a0)
    978	move.l	#0xffffff00,(%a0)+
    979	clr.l	(%a0)
    980	jra	2b
    981	| zero and denormalized
    982fp_ns_zero:
    983	tst.l	(%a0)+
    984	jne	1f
    985	tst.l	(%a0)
    986	jne	1f
    987	subq.l	#8,%a0
    988	printf	PNORM,"%p(",1,%a0
    989	printx	PNORM,%a0@
    990	printf	PNORM,")\n"
    991	rts				| zero.  nothing to do.
    992	| These are not merely subnormal numbers, but true denormals,
    993	| i.e. pathologically small (exponent is 2**-16383) numbers.
    994	| It is clearly impossible for even a normal extended number
    995	| with that exponent to fit into single precision, so just
    996	| write these ones off as "too darn small".
    9971:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
    998	clr.l	(%a0)
    999	clr.l	-(%a0)
   1000	move.w	#0x3f81,-(%a0)		| i.e. 2**-126
   1001	addq.l	#6,%a0
   1002	moveq	#1,%d0
   1003	jra	fp_ns_round		| round.
   1004	| Infinities or NaNs
   1005fp_ns_huge:
   1006	subq.l	#4,%a0
   1007	printf	PNORM,"%p(",1,%a0
   1008	printx	PNORM,%a0@
   1009	printf	PNORM,")\n"
   1010	rts
   1011
   1012	| fp_normalize_single_fast:
   1013	| normalize an extended with single (23-bit) precision
   1014	| this is only used by fsgldiv/fsgdlmul, where the
   1015	| operand is not completly normalized.
   1016	| args:	 %a0 (struct fp_ext *)
   1017
   1018fp_normalize_single_fast:
   1019	printf	PNORM,"nsf: %p(",1,%a0
   1020	printx	PNORM,%a0@
   1021	printf	PNORM,") "
   1022	addq.l	#2,%a0
   1023	move.w	(%a0)+,%d2
   1024	cmp.w	#0x7fff,%d2
   1025	jeq	fp_nsf_huge		| NaN / infinitive.
   1026	move.l	(%a0)+,%d0		| get high lword of mantissa
   1027fp_nsf_round:
   1028	tst.l	(%a0)			| check the low lword
   1029	jeq	1f
   1030	| Set a sticky bit if it is non-zero.  This should only
   1031	| affect the rounding in what would otherwise be equal-
   1032	| distance situations, which is what we want it to do.
   1033	bset	#0,%d0
   10341:	clr.l	(%a0)			| zap it from memory.
   1035	| now, round off the low 8 bits of the hi lword.
   1036	tst.b	%d0			| 8 low bits.
   1037	jne	fp_nsf_checkround	| Are they non-zero?
   1038	| nothing to do here
   1039	subq.l	#8,%a0
   1040	printf	PNORM,"%p(",1,%a0
   1041	printx	PNORM,%a0@
   1042	printf	PNORM,")\n"
   1043	rts
   1044fp_nsf_checkround:
   1045	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
   1046	clr.b	-(%a0)			| clear low byte of high lword
   1047	subq.l	#3,%a0
   1048	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
   1049	jne	2f			| %d2 == 0, round to nearest
   1050	tst.b	%d0			| test guard bit
   1051	jpl	9f			| zero is closer
   1052	btst	#8,%d0			| test lsb bit
   1053	| round to even behaviour, see above.
   1054	jne	fp_nsf_doroundup		| round to infinity
   1055	lsl.b	#1,%d0			| check low bits
   1056	jeq	9f			| round to zero
   1057fp_nsf_doroundup:
   1058	| round (the mantissa, that is) towards infinity
   1059	add.l	#0x100,(%a0)
   1060	jcc	9f			| no overflow, good.
   1061	| Overflow.  This means that the %d1 was 0xffffff00, so it
   1062	| is now zero.  We will set the mantissa to reflect this, and
   1063	| increment the exponent (checking for overflow there too)
   1064	move.w	#0x8000,(%a0)
   1065	addq.w	#1,-(%a0)
   1066	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
   1067	jeq	fp_nsf_large		| yes, so make it infinity.
   10689:	subq.l	#4,%a0
   1069	printf	PNORM,"%p(",1,%a0
   1070	printx	PNORM,%a0@
   1071	printf	PNORM,")\n"
   1072	rts
   1073	| check nondefault rounding modes
   10742:	subq.w	#2,%d2
   1075	jcs	9b			| %d2 < 2, round to zero
   1076	jhi	3f			| %d2 > 2, round to +infinity
   1077	tst.b	(-3,%a0)		| to -inf
   1078	jne	fp_nsf_doroundup	| negative, round to infinity
   1079	jra	9b			| positive, round to zero
   10803:	tst.b	(-3,%a0)		| to +inf
   1081	jeq	fp_nsf_doroundup		| positive, round to infinity
   1082	jra	9b			| negative, round to zero
   1083	| Exponent overflow.  Just call it infinity.
   1084fp_nsf_large:
   1085	tst.b	(3,%a0)
   1086	jeq	1f
   1087	fp_set_sr FPSR_EXC_INEX2
   10881:	fp_set_sr FPSR_EXC_OVFL
   1089	move.w	(FPD_RND,FPDATA),%d2
   1090	jne	3f			| %d2 = 0 round to nearest
   10911:	move.w	#0x7fff,(-2,%a0)
   1092	clr.l	(%a0)+
   1093	clr.l	(%a0)
   10942:	subq.l	#8,%a0
   1095	printf	PNORM,"%p(",1,%a0
   1096	printx	PNORM,%a0@
   1097	printf	PNORM,")\n"
   1098	rts
   10993:	subq.w	#2,%d2
   1100	jcs	5f			| %d2 < 2, round to zero
   1101	jhi	4f			| %d2 > 2, round to +infinity
   1102	tst.b	(-3,%a0)		| to -inf
   1103	jne	1b
   1104	jra	5f
   11054:	tst.b	(-3,%a0)		| to +inf
   1106	jeq	1b
   11075:	move.w	#0x407e,(-2,%a0)
   1108	move.l	#0xffffff00,(%a0)+
   1109	clr.l	(%a0)
   1110	jra	2b
   1111	| Infinities or NaNs
   1112fp_nsf_huge:
   1113	subq.l	#4,%a0
   1114	printf	PNORM,"%p(",1,%a0
   1115	printx	PNORM,%a0@
   1116	printf	PNORM,")\n"
   1117	rts
   1118
   1119	| conv_ext2int (macro):
   1120	| Generates a subroutine that converts an extended value to an
   1121	| integer of a given size, again, with the appropriate type of
   1122	| rounding.
   1123
   1124	| Macro arguments:
   1125	| s:	size, as given in an assembly instruction.
   1126	| b:	number of bits in that size.
   1127
   1128	| Subroutine arguments:
   1129	| %a0:	source (struct fp_ext *)
   1130
   1131	| Returns the integer in %d0 (like it should)
   1132
   1133.macro conv_ext2int s,b
   1134	.set	inf,(1<<(\b-1))-1	| i.e. MAXINT
   1135	printf	PCONV,"e2i%d: %p(",2,#\b,%a0
   1136	printx	PCONV,%a0@
   1137	printf	PCONV,") "
   1138	addq.l	#2,%a0
   1139	move.w	(%a0)+,%d2		| exponent
   1140	jeq	fp_e2i_zero\b		| zero / denorm (== 0, here)
   1141	cmp.w	#0x7fff,%d2
   1142	jeq	fp_e2i_huge\b		| Inf / NaN
   1143	sub.w	#0x3ffe,%d2
   1144	jcs	fp_e2i_small\b
   1145	cmp.w	#\b,%d2
   1146	jhi	fp_e2i_large\b
   1147	move.l	(%a0),%d0
   1148	move.l	%d0,%d1
   1149	lsl.l	%d2,%d1
   1150	jne	fp_e2i_round\b
   1151	tst.l	(4,%a0)
   1152	jne	fp_e2i_round\b
   1153	neg.w	%d2
   1154	add.w	#32,%d2
   1155	lsr.l	%d2,%d0
   11569:	tst.w	(-4,%a0)
   1157	jne	1f
   1158	tst.\s	%d0
   1159	jmi	fp_e2i_large\b
   1160	printf	PCONV,"-> %p\n",1,%d0
   1161	rts
   11621:	neg.\s	%d0
   1163	jeq	1f
   1164	jpl	fp_e2i_large\b
   11651:	printf	PCONV,"-> %p\n",1,%d0
   1166	rts
   1167fp_e2i_round\b:
   1168	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
   1169	neg.w	%d2
   1170	add.w	#32,%d2
   1171	.if	\b>16
   1172	jeq	5f
   1173	.endif
   1174	lsr.l	%d2,%d0
   1175	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
   1176	jne	2f			| %d2 == 0, round to nearest
   1177	tst.l	%d1			| test guard bit
   1178	jpl	9b			| zero is closer
   1179	btst	%d2,%d0			| test lsb bit (%d2 still 0)
   1180	jne	fp_e2i_doroundup\b
   1181	lsl.l	#1,%d1			| check low bits
   1182	jne	fp_e2i_doroundup\b
   1183	tst.l	(4,%a0)
   1184	jeq	9b
   1185fp_e2i_doroundup\b:
   1186	addq.l	#1,%d0
   1187	jra	9b
   1188	| check nondefault rounding modes
   11892:	subq.w	#2,%d2
   1190	jcs	9b			| %d2 < 2, round to zero
   1191	jhi	3f			| %d2 > 2, round to +infinity
   1192	tst.w	(-4,%a0)		| to -inf
   1193	jne	fp_e2i_doroundup\b	| negative, round to infinity
   1194	jra	9b			| positive, round to zero
   11953:	tst.w	(-4,%a0)		| to +inf
   1196	jeq	fp_e2i_doroundup\b	| positive, round to infinity
   1197	jra	9b	| negative, round to zero
   1198	| we are only want -2**127 get correctly rounded here,
   1199	| since the guard bit is in the lower lword.
   1200	| everything else ends up anyway as overflow.
   1201	.if	\b>16
   12025:	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
   1203	jne	2b			| %d2 == 0, round to nearest
   1204	move.l	(4,%a0),%d1		| test guard bit
   1205	jpl	9b			| zero is closer
   1206	lsl.l	#1,%d1			| check low bits
   1207	jne	fp_e2i_doroundup\b
   1208	jra	9b
   1209	.endif
   1210fp_e2i_zero\b:
   1211	clr.l	%d0
   1212	tst.l	(%a0)+
   1213	jne	1f
   1214	tst.l	(%a0)
   1215	jeq	3f
   12161:	subq.l	#4,%a0
   1217	fp_clr_sr FPSR_EXC_UNFL		| fp_normalize_ext has set this bit
   1218fp_e2i_small\b:
   1219	fp_set_sr FPSR_EXC_INEX2
   1220	clr.l	%d0
   1221	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
   1222	subq.w	#2,%d2
   1223	jcs	3f			| %d2 < 2, round to nearest/zero
   1224	jhi	2f			| %d2 > 2, round to +infinity
   1225	tst.w	(-4,%a0)		| to -inf
   1226	jeq	3f
   1227	subq.\s	#1,%d0
   1228	jra	3f
   12292:	tst.w	(-4,%a0)		| to +inf
   1230	jne	3f
   1231	addq.\s	#1,%d0
   12323:	printf	PCONV,"-> %p\n",1,%d0
   1233	rts
   1234fp_e2i_large\b:
   1235	fp_set_sr FPSR_EXC_OPERR
   1236	move.\s	#inf,%d0
   1237	tst.w	(-4,%a0)
   1238	jeq	1f
   1239	addq.\s	#1,%d0
   12401:	printf	PCONV,"-> %p\n",1,%d0
   1241	rts
   1242fp_e2i_huge\b:
   1243	move.\s	(%a0),%d0
   1244	tst.l	(%a0)
   1245	jne	1f
   1246	tst.l	(%a0)
   1247	jeq	fp_e2i_large\b
   1248	| fp_normalize_ext has set this bit already
   1249	| and made the number nonsignaling
   12501:	fp_tst_sr FPSR_EXC_SNAN
   1251	jne	1f
   1252	fp_set_sr FPSR_EXC_OPERR
   12531:	printf	PCONV,"-> %p\n",1,%d0
   1254	rts
   1255.endm
   1256
   1257fp_conv_ext2long:
   1258	conv_ext2int l,32
   1259
   1260fp_conv_ext2short:
   1261	conv_ext2int w,16
   1262
   1263fp_conv_ext2byte:
   1264	conv_ext2int b,8
   1265
   1266fp_conv_ext2double:
   1267	jsr	fp_normalize_double
   1268	printf	PCONV,"e2d: %p(",1,%a0
   1269	printx	PCONV,%a0@
   1270	printf	PCONV,"), "
   1271	move.l	(%a0)+,%d2
   1272	cmp.w	#0x7fff,%d2
   1273	jne	1f
   1274	move.w	#0x7ff,%d2
   1275	move.l	(%a0)+,%d0
   1276	jra	2f
   12771:	sub.w	#0x3fff-0x3ff,%d2
   1278	move.l	(%a0)+,%d0
   1279	jmi	2f
   1280	clr.w	%d2
   12812:	lsl.w	#5,%d2
   1282	lsl.l	#7,%d2
   1283	lsl.l	#8,%d2
   1284	move.l	%d0,%d1
   1285	lsl.l	#1,%d0
   1286	lsr.l	#4,%d0
   1287	lsr.l	#8,%d0
   1288	or.l	%d2,%d0
   1289	putuser.l %d0,(%a1)+,fp_err_ua2,%a1
   1290	moveq	#21,%d0
   1291	lsl.l	%d0,%d1
   1292	move.l	(%a0),%d0
   1293	lsr.l	#4,%d0
   1294	lsr.l	#7,%d0
   1295	or.l	%d1,%d0
   1296	putuser.l %d0,(%a1),fp_err_ua2,%a1
   1297#ifdef FPU_EMU_DEBUG
   1298	getuser.l %a1@(-4),%d0,fp_err_ua2,%a1
   1299	getuser.l %a1@(0),%d1,fp_err_ua2,%a1
   1300	printf	PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1
   1301#endif
   1302	rts
   1303
   1304fp_conv_ext2single:
   1305	jsr	fp_normalize_single
   1306	printf	PCONV,"e2s: %p(",1,%a0
   1307	printx	PCONV,%a0@
   1308	printf	PCONV,"), "
   1309	move.l	(%a0)+,%d1
   1310	cmp.w	#0x7fff,%d1
   1311	jne	1f
   1312	move.w	#0xff,%d1
   1313	move.l	(%a0)+,%d0
   1314	jra	2f
   13151:	sub.w	#0x3fff-0x7f,%d1
   1316	move.l	(%a0)+,%d0
   1317	jmi	2f
   1318	clr.w	%d1
   13192:	lsl.w	#8,%d1
   1320	lsl.l	#7,%d1
   1321	lsl.l	#8,%d1
   1322	bclr	#31,%d0
   1323	lsr.l	#8,%d0
   1324	or.l	%d1,%d0
   1325	printf	PCONV,"%08x\n",1,%d0
   1326	rts
   1327
   1328	| special return addresses for instr that
   1329	| encode the rounding precision in the opcode
   1330	| (e.g. fsmove,fdmove)
   1331
   1332fp_finalrounding_single:
   1333	addq.l	#8,%sp
   1334	jsr	fp_normalize_ext
   1335	jsr	fp_normalize_single
   1336	jra	fp_finaltest
   1337
   1338fp_finalrounding_single_fast:
   1339	addq.l	#8,%sp
   1340	jsr	fp_normalize_ext
   1341	jsr	fp_normalize_single_fast
   1342	jra	fp_finaltest
   1343
   1344fp_finalrounding_double:
   1345	addq.l	#8,%sp
   1346	jsr	fp_normalize_ext
   1347	jsr	fp_normalize_double
   1348	jra	fp_finaltest
   1349
   1350	| fp_finaltest:
   1351	| set the emulated status register based on the outcome of an
   1352	| emulated instruction.
   1353
   1354fp_finalrounding:
   1355	addq.l	#8,%sp
   1356|	printf	,"f: %p\n",1,%a0
   1357	jsr	fp_normalize_ext
   1358	move.w	(FPD_PREC,FPDATA),%d0
   1359	subq.w	#1,%d0
   1360	jcs	fp_finaltest
   1361	jne	1f
   1362	jsr	fp_normalize_single
   1363	jra	2f
   13641:	jsr	fp_normalize_double
   13652:|	printf	,"f: %p\n",1,%a0
   1366fp_finaltest:
   1367	| First, we do some of the obvious tests for the exception
   1368	| status byte and condition code bytes of fp_sr here, so that
   1369	| they do not have to be handled individually by every
   1370	| emulated instruction.
   1371	clr.l	%d0
   1372	addq.l	#1,%a0
   1373	tst.b	(%a0)+			| sign
   1374	jeq	1f
   1375	bset	#FPSR_CC_NEG-24,%d0	| N bit
   13761:	cmp.w	#0x7fff,(%a0)+		| exponent
   1377	jeq	2f
   1378	| test for zero
   1379	moveq	#FPSR_CC_Z-24,%d1
   1380	tst.l	(%a0)+
   1381	jne	9f
   1382	tst.l	(%a0)
   1383	jne	9f
   1384	jra	8f
   1385	| infinitiv and NAN
   13862:	moveq	#FPSR_CC_NAN-24,%d1
   1387	move.l	(%a0)+,%d2
   1388	lsl.l	#1,%d2			| ignore high bit
   1389	jne	8f
   1390	tst.l	(%a0)
   1391	jne	8f
   1392	moveq	#FPSR_CC_INF-24,%d1
   13938:	bset	%d1,%d0
   13949:	move.b	%d0,(FPD_FPSR+0,FPDATA)	| set condition test result
   1395	| move instructions enter here
   1396	| Here, we test things in the exception status byte, and set
   1397	| other things in the accrued exception byte accordingly.
   1398	| Emulated instructions can set various things in the former,
   1399	| as defined in fp_emu.h.
   1400fp_final:
   1401	move.l	(FPD_FPSR,FPDATA),%d0
   1402#if 0
   1403	btst	#FPSR_EXC_SNAN,%d0	| EXC_SNAN
   1404	jne	1f
   1405	btst	#FPSR_EXC_OPERR,%d0	| EXC_OPERR
   1406	jeq	2f
   14071:	bset	#FPSR_AEXC_IOP,%d0	| set IOP bit
   14082:	btst	#FPSR_EXC_OVFL,%d0	| EXC_OVFL
   1409	jeq	1f
   1410	bset	#FPSR_AEXC_OVFL,%d0	| set OVFL bit
   14111:	btst	#FPSR_EXC_UNFL,%d0	| EXC_UNFL
   1412	jeq	1f
   1413	btst	#FPSR_EXC_INEX2,%d0	| EXC_INEX2
   1414	jeq	1f
   1415	bset	#FPSR_AEXC_UNFL,%d0	| set UNFL bit
   14161:	btst	#FPSR_EXC_DZ,%d0	| EXC_INEX1
   1417	jeq	1f
   1418	bset	#FPSR_AEXC_DZ,%d0	| set DZ bit
   14191:	btst	#FPSR_EXC_OVFL,%d0	| EXC_OVFL
   1420	jne	1f
   1421	btst	#FPSR_EXC_INEX2,%d0	| EXC_INEX2
   1422	jne	1f
   1423	btst	#FPSR_EXC_INEX1,%d0	| EXC_INEX1
   1424	jeq	2f
   14251:	bset	#FPSR_AEXC_INEX,%d0	| set INEX bit
   14262:	move.l	%d0,(FPD_FPSR,FPDATA)
   1427#else
   1428	| same as above, greatly optimized, but untested (yet)
   1429	move.l	%d0,%d2
   1430	lsr.l	#5,%d0
   1431	move.l	%d0,%d1
   1432	lsr.l	#4,%d1
   1433	or.l	%d0,%d1
   1434	and.b	#0x08,%d1
   1435	move.l	%d2,%d0
   1436	lsr.l	#6,%d0
   1437	or.l	%d1,%d0
   1438	move.l	%d2,%d1
   1439	lsr.l	#4,%d1
   1440	or.b	#0xdf,%d1
   1441	and.b	%d1,%d0
   1442	move.l	%d2,%d1
   1443	lsr.l	#7,%d1
   1444	and.b	#0x80,%d1
   1445	or.b	%d1,%d0
   1446	and.b	#0xf8,%d0
   1447	or.b	%d0,%d2
   1448	move.l	%d2,(FPD_FPSR,FPDATA)
   1449#endif
   1450	move.b	(FPD_FPSR+2,FPDATA),%d0
   1451	and.b	(FPD_FPCR+2,FPDATA),%d0
   1452	jeq	1f
   1453	printf	,"send signal!!!\n"
   14541:	jra	fp_end