cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

x_unfl.S (6961B)


      1|
      2|	x_unfl.sa 3.4 7/1/91
      3|
      4|	fpsp_unfl --- FPSP handler for underflow exception
      5|
      6| Trap disabled results
      7|	For 881/2 compatibility, sw must denormalize the intermediate
      8| result, then store the result.  Denormalization is accomplished
      9| by taking the intermediate result (which is always normalized) and
     10| shifting the mantissa right while incrementing the exponent until
     11| it is equal to the denormalized exponent for the destination
     12| format.  After denormalization, the result is rounded to the
     13| destination format.
     14|
     15| Trap enabled results
     16|	All trap disabled code applies.	In addition the exceptional
     17| operand needs to made available to the user with a bias of $6000
     18| added to the exponent.
     19|
     20
     21|		Copyright (C) Motorola, Inc. 1990
     22|			All Rights Reserved
     23|
     24|       For details on the license for this file, please see the
     25|       file, README, in this same directory.
     26
     27X_UNFL:	|idnt    2,1 | Motorola 040 Floating Point Software Package
     28
     29	|section	8
     30
     31#include "fpsp.h"
     32
     33	|xref	denorm
     34	|xref	round
     35	|xref	store
     36	|xref	g_rndpr
     37	|xref	g_opcls
     38	|xref	g_dfmtou
     39	|xref	real_unfl
     40	|xref	real_inex
     41	|xref	fpsp_done
     42	|xref	b1238_fix
     43
     44	.global	fpsp_unfl
     45fpsp_unfl:
     46	link		%a6,#-LOCAL_SIZE
     47	fsave		-(%a7)
     48	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
     49	fmovemx	%fp0-%fp3,USER_FP0(%a6)
     50	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
     51
     52|
     53	bsrl		unf_res	|denormalize, round & store interm op
     54|
     55| If underflow exceptions are not enabled, check for inexact
     56| exception
     57|
     58	btstb		#unfl_bit,FPCR_ENABLE(%a6)
     59	beqs		ck_inex
     60
     61	btstb		#E3,E_BYTE(%a6)
     62	beqs		no_e3_1
     63|
     64| Clear dirty bit on dest resister in the frame before branching
     65| to b1238_fix.
     66|
     67	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
     68	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
     69	bsrl		b1238_fix		|test for bug1238 case
     70	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
     71	orl		#sx_mask,E_BYTE(%a6)
     72no_e3_1:
     73	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
     74	fmovemx	USER_FP0(%a6),%fp0-%fp3
     75	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
     76	frestore	(%a7)+
     77	unlk		%a6
     78	bral		real_unfl
     79|
     80| It is possible to have either inex2 or inex1 exceptions with the
     81| unfl.  If the inex enable bit is set in the FPCR, and either
     82| inex2 or inex1 occurred, we must clean up and branch to the
     83| real inex handler.
     84|
     85ck_inex:
     86	moveb		FPCR_ENABLE(%a6),%d0
     87	andb		FPSR_EXCEPT(%a6),%d0
     88	andib		#0x3,%d0
     89	beqs		unfl_done
     90
     91|
     92| Inexact enabled and reported, and we must take an inexact exception
     93|
     94take_inex:
     95	btstb		#E3,E_BYTE(%a6)
     96	beqs		no_e3_2
     97|
     98| Clear dirty bit on dest resister in the frame before branching
     99| to b1238_fix.
    100|
    101	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
    102	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
    103	bsrl		b1238_fix		|test for bug1238 case
    104	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
    105	orl		#sx_mask,E_BYTE(%a6)
    106no_e3_2:
    107	moveb		#INEX_VEC,EXC_VEC+1(%a6)
    108	moveml         USER_DA(%a6),%d0-%d1/%a0-%a1
    109	fmovemx        USER_FP0(%a6),%fp0-%fp3
    110	fmoveml        USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
    111	frestore        (%a7)+
    112	unlk            %a6
    113	bral		real_inex
    114
    115unfl_done:
    116	bclrb		#E3,E_BYTE(%a6)
    117	beqs		e1_set		|if set then branch
    118|
    119| Clear dirty bit on dest resister in the frame before branching
    120| to b1238_fix.
    121|
    122	bfextu		CMDREG3B(%a6){#6:#3},%d0		|get dest reg no
    123	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
    124	bsrl		b1238_fix		|test for bug1238 case
    125	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
    126	orl		#sx_mask,E_BYTE(%a6)
    127	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
    128	fmovemx	USER_FP0(%a6),%fp0-%fp3
    129	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
    130	frestore	(%a7)+
    131	unlk		%a6
    132	bral		fpsp_done
    133e1_set:
    134	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
    135	fmovemx	USER_FP0(%a6),%fp0-%fp3
    136	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
    137	unlk		%a6
    138	bral		fpsp_done
    139|
    140|	unf_res --- underflow result calculation
    141|
    142unf_res:
    143	bsrl		g_rndpr		|returns RND_PREC in d0 0=ext,
    144|					;1=sgl, 2=dbl
    145|					;we need the RND_PREC in the
    146|					;upper word for round
    147	movew		#0,-(%a7)
    148	movew		%d0,-(%a7)	|copy RND_PREC to stack
    149|
    150|
    151| If the exception bit set is E3, the exceptional operand from the
    152| fpu is in WBTEMP; else it is in FPTEMP.
    153|
    154	btstb		#E3,E_BYTE(%a6)
    155	beqs		unf_E1
    156unf_E3:
    157	lea		WBTEMP(%a6),%a0	|a0 now points to operand
    158|
    159| Test for fsgldiv and fsglmul.  If the inst was one of these, then
    160| force the precision to extended for the denorm routine.  Use
    161| the user's precision for the round routine.
    162|
    163	movew		CMDREG3B(%a6),%d1	|check for fsgldiv or fsglmul
    164	andiw		#0x7f,%d1
    165	cmpiw		#0x30,%d1		|check for sgldiv
    166	beqs		unf_sgl
    167	cmpiw		#0x33,%d1		|check for sglmul
    168	bnes		unf_cont	|if not, use fpcr prec in round
    169unf_sgl:
    170	clrl		%d0
    171	movew		#0x1,(%a7)	|override g_rndpr precision
    172|					;force single
    173	bras		unf_cont
    174unf_E1:
    175	lea		FPTEMP(%a6),%a0	|a0 now points to operand
    176unf_cont:
    177	bclrb		#sign_bit,LOCAL_EX(%a0)	|clear sign bit
    178	sne		LOCAL_SGN(%a0)		|store sign
    179
    180	bsrl		denorm		|returns denorm, a0 points to it
    181|
    182| WARNING:
    183|				;d0 has guard,round sticky bit
    184|				;make sure that it is not corrupted
    185|				;before it reaches the round subroutine
    186|				;also ensure that a0 isn't corrupted
    187
    188|
    189| Set up d1 for round subroutine d1 contains the PREC/MODE
    190| information respectively on upper/lower register halves.
    191|
    192	bfextu		FPCR_MODE(%a6){#2:#2},%d1	|get mode from FPCR
    193|						;mode in lower d1
    194	addl		(%a7)+,%d1		|merge PREC/MODE
    195|
    196| WARNING: a0 and d0 are assumed to be intact between the denorm and
    197| round subroutines. All code between these two subroutines
    198| must not corrupt a0 and d0.
    199|
    200|
    201| Perform Round
    202|	Input:		a0 points to input operand
    203|			d0{31:29} has guard, round, sticky
    204|			d1{01:00} has rounding mode
    205|			d1{17:16} has rounding precision
    206|	Output:		a0 points to rounded operand
    207|
    208
    209	bsrl		round		|returns rounded denorm at (a0)
    210|
    211| Differentiate between store to memory vs. store to register
    212|
    213unf_store:
    214	bsrl		g_opcls		|returns opclass in d0{2:0}
    215	cmpib		#0x3,%d0
    216	bnes		not_opc011
    217|
    218| At this point, a store to memory is pending
    219|
    220opc011:
    221	bsrl		g_dfmtou
    222	tstb		%d0
    223	beqs		ext_opc011	|If extended, do not subtract
    224|				;If destination format is sgl/dbl,
    225	tstb		LOCAL_HI(%a0)	|If rounded result is normal,don't
    226|					;subtract
    227	bmis		ext_opc011
    228	subqw		#1,LOCAL_EX(%a0)	|account for denorm bias vs.
    229|				;normalized bias
    230|				;          normalized   denormalized
    231|				;single       $7f           $7e
    232|				;double       $3ff          $3fe
    233|
    234ext_opc011:
    235	bsrl		store		|stores to memory
    236	bras		unf_done	|finish up
    237
    238|
    239| At this point, a store to a float register is pending
    240|
    241not_opc011:
    242	bsrl		store	|stores to float register
    243|				;a0 is not corrupted on a store to a
    244|				;float register.
    245|
    246| Set the condition codes according to result
    247|
    248	tstl		LOCAL_HI(%a0)	|check upper mantissa
    249	bnes		ck_sgn
    250	tstl		LOCAL_LO(%a0)	|check lower mantissa
    251	bnes		ck_sgn
    252	bsetb		#z_bit,FPSR_CC(%a6) |set condition codes if zero
    253ck_sgn:
    254	btstb		#sign_bit,LOCAL_EX(%a0)	|check the sign bit
    255	beqs		unf_done
    256	bsetb		#neg_bit,FPSR_CC(%a6)
    257
    258|
    259| Finish.
    260|
    261unf_done:
    262	btstb		#inex2_bit,FPSR_EXCEPT(%a6)
    263	beqs		no_aunfl
    264	bsetb		#aunfl_bit,FPSR_AEXCEPT(%a6)
    265no_aunfl:
    266	rts
    267
    268	|end