decbin.S - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
decbin.S (15728B)
      1|
      2|	decbin.sa 3.3 12/19/90
      3|
      4|	Description: Converts normalized packed bcd value pointed to by
      5|	register A6 to extended-precision value in FP0.
      6|
      7|	Input: Normalized packed bcd value in ETEMP(a6).
      8|
      9|	Output:	Exact floating-point representation of the packed bcd value.
     10|
     11|	Saves and Modifies: D2-D5
     12|
     13|	Speed: The program decbin takes ??? cycles to execute.
     14|
     15|	Object Size:
     16|
     17|	External Reference(s): None.
     18|
     19|	Algorithm:
     20|	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,
     21|	and NaN operands are dispatched without entering this routine)
     22|	value in 68881/882 format at location ETEMP(A6).
     23|
     24|	A1.	Convert the bcd exponent to binary by successive adds and muls.
     25|	Set the sign according to SE. Subtract 16 to compensate
     26|	for the mantissa which is to be interpreted as 17 integer
     27|	digits, rather than 1 integer and 16 fraction digits.
     28|	Note: this operation can never overflow.
     29|
     30|	A2. Convert the bcd mantissa to binary by successive
     31|	adds and muls in FP0. Set the sign according to SM.
     32|	The mantissa digits will be converted with the decimal point
     33|	assumed following the least-significant digit.
     34|	Note: this operation can never overflow.
     35|
     36|	A3. Count the number of leading/trailing zeros in the
     37|	bcd string.  If SE is positive, count the leading zeros;
     38|	if negative, count the trailing zeros.  Set the adjusted
     39|	exponent equal to the exponent from A1 and the zero count
     40|	added if SM = 1 and subtracted if SM = 0.  Scale the
     41|	mantissa the equivalent of forcing in the bcd value:
     42|
     43|	SM = 0	a non-zero digit in the integer position
     44|	SM = 1	a non-zero digit in Mant0, lsd of the fraction
     45|
     46|	this will insure that any value, regardless of its
     47|	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted
     48|	consistently.
     49|
     50|	A4. Calculate the factor 10^exp in FP1 using a table of
     51|	10^(2^n) values.  To reduce the error in forming factors
     52|	greater than 10^27, a directed rounding scheme is used with
     53|	tables rounded to RN, RM, and RP, according to the table
     54|	in the comments of the pwrten section.
     55|
     56|	A5. Form the final binary number by scaling the mantissa by
     57|	the exponent factor.  This is done by multiplying the
     58|	mantissa in FP0 by the factor in FP1 if the adjusted
     59|	exponent sign is positive, and dividing FP0 by FP1 if
     60|	it is negative.
     61|
     62|	Clean up and return.  Check if the final mul or div resulted
     63|	in an inex2 exception.  If so, set inex1 in the fpsr and
     64|	check if the inex1 exception is enabled.  If so, set d7 upper
     65|	word to $0100.  This will signal unimp.sa that an enabled inex1
     66|	exception occurred.  Unimp will fix the stack.
     67|
     68
     69|		Copyright (C) Motorola, Inc. 1990
     70|			All Rights Reserved
     71|
     72|       For details on the license for this file, please see the
     73|       file, README, in this same directory.
     74
     75|DECBIN    idnt    2,1 | Motorola 040 Floating Point Software Package
     76
     77	|section	8
     78
     79#include "fpsp.h"
     80
     81|
     82|	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
     83|	to nearest, minus, and plus, respectively.  The tables include
     84|	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
     85|	is required until the power is greater than 27, however, all
     86|	tables include the first 5 for ease of indexing.
     87|
     88	|xref	PTENRN
     89	|xref	PTENRM
     90	|xref	PTENRP
     91
     92RTABLE:	.byte	0,0,0,0
     93	.byte	2,3,2,3
     94	.byte	2,3,3,2
     95	.byte	3,2,2,3
     96
     97	.global	decbin
     98	.global	calc_e
     99	.global	pwrten
    100	.global	calc_m
    101	.global	norm
    102	.global	ap_st_z
    103	.global	ap_st_n
    104|
    105	.set	FNIBS,7
    106	.set	FSTRT,0
    107|
    108	.set	ESTRT,4
    109	.set	EDIGITS,2	|
    110|
    111| Constants in single precision
    112FZERO:	.long	0x00000000
    113FONE:	.long	0x3F800000
    114FTEN:	.long	0x41200000
    115
    116	.set	TEN,10
    117
    118|
    119decbin:
    120	| fmovel	#0,FPCR		;clr real fpcr
    121	moveml	%d2-%d5,-(%a7)
    122|
    123| Calculate exponent:
    124|  1. Copy bcd value in memory for use as a working copy.
    125|  2. Calculate absolute value of exponent in d1 by mul and add.
    126|  3. Correct for exponent sign.
    127|  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
    128|     (i.e., all digits assumed left of the decimal point.)
    129|
    130| Register usage:
    131|
    132|  calc_e:
    133|	(*)  d0: temp digit storage
    134|	(*)  d1: accumulator for binary exponent
    135|	(*)  d2: digit count
    136|	(*)  d3: offset pointer
    137|	( )  d4: first word of bcd
    138|	( )  a0: pointer to working bcd value
    139|	( )  a6: pointer to original bcd value
    140|	(*)  FP_SCR1: working copy of original bcd value
    141|	(*)  L_SCR1: copy of original exponent word
    142|
    143calc_e:
    144	movel	#EDIGITS,%d2	|# of nibbles (digits) in fraction part
    145	moveql	#ESTRT,%d3	|counter to pick up digits
    146	leal	FP_SCR1(%a6),%a0	|load tmp bcd storage address
    147	movel	ETEMP(%a6),(%a0)	|save input bcd value
    148	movel	ETEMP_HI(%a6),4(%a0) |save words 2 and 3
    149	movel	ETEMP_LO(%a6),8(%a0) |and work with these
    150	movel	(%a0),%d4	|get first word of bcd
    151	clrl	%d1		|zero d1 for accumulator
    152e_gd:
    153	mulul	#TEN,%d1	|mul partial product by one digit place
    154	bfextu	%d4{%d3:#4},%d0	|get the digit and zero extend into d0
    155	addl	%d0,%d1		|d1 = d1 + d0
    156	addqb	#4,%d3		|advance d3 to the next digit
    157	dbf	%d2,e_gd	|if we have used all 3 digits, exit loop
    158	btst	#30,%d4		|get SE
    159	beqs	e_pos		|don't negate if pos
    160	negl	%d1		|negate before subtracting
    161e_pos:
    162	subl	#16,%d1		|sub to compensate for shift of mant
    163	bges	e_save		|if still pos, do not neg
    164	negl	%d1		|now negative, make pos and set SE
    165	orl	#0x40000000,%d4	|set SE in d4,
    166	orl	#0x40000000,(%a0)	|and in working bcd
    167e_save:
    168	movel	%d1,L_SCR1(%a6)	|save exp in memory
    169|
    170|
    171| Calculate mantissa:
    172|  1. Calculate absolute value of mantissa in fp0 by mul and add.
    173|  2. Correct for mantissa sign.
    174|     (i.e., all digits assumed left of the decimal point.)
    175|
    176| Register usage:
    177|
    178|  calc_m:
    179|	(*)  d0: temp digit storage
    180|	(*)  d1: lword counter
    181|	(*)  d2: digit count
    182|	(*)  d3: offset pointer
    183|	( )  d4: words 2 and 3 of bcd
    184|	( )  a0: pointer to working bcd value
    185|	( )  a6: pointer to original bcd value
    186|	(*) fp0: mantissa accumulator
    187|	( )  FP_SCR1: working copy of original bcd value
    188|	( )  L_SCR1: copy of original exponent word
    189|
    190calc_m:
    191	moveql	#1,%d1		|word counter, init to 1
    192	fmoves	FZERO,%fp0	|accumulator
    193|
    194|
    195|  Since the packed number has a long word between the first & second parts,
    196|  get the integer digit then skip down & get the rest of the
    197|  mantissa.  We will unroll the loop once.
    198|
    199	bfextu	(%a0){#28:#4},%d0	|integer part is ls digit in long word
    200	faddb	%d0,%fp0		|add digit to sum in fp0
    201|
    202|
    203|  Get the rest of the mantissa.
    204|
    205loadlw:
    206	movel	(%a0,%d1.L*4),%d4	|load mantissa longword into d4
    207	moveql	#FSTRT,%d3	|counter to pick up digits
    208	moveql	#FNIBS,%d2	|reset number of digits per a0 ptr
    209md2b:
    210	fmuls	FTEN,%fp0	|fp0 = fp0 * 10
    211	bfextu	%d4{%d3:#4},%d0	|get the digit and zero extend
    212	faddb	%d0,%fp0	|fp0 = fp0 + digit
    213|
    214|
    215|  If all the digits (8) in that long word have been converted (d2=0),
    216|  then inc d1 (=2) to point to the next long word and reset d3 to 0
    217|  to initialize the digit offset, and set d2 to 7 for the digit count;
    218|  else continue with this long word.
    219|
    220	addqb	#4,%d3		|advance d3 to the next digit
    221	dbf	%d2,md2b		|check for last digit in this lw
    222nextlw:
    223	addql	#1,%d1		|inc lw pointer in mantissa
    224	cmpl	#2,%d1		|test for last lw
    225	ble	loadlw		|if not, get last one
    226
    227|
    228|  Check the sign of the mant and make the value in fp0 the same sign.
    229|
    230m_sign:
    231	btst	#31,(%a0)	|test sign of the mantissa
    232	beq	ap_st_z		|if clear, go to append/strip zeros
    233	fnegx	%fp0		|if set, negate fp0
    234
    235|
    236| Append/strip zeros:
    237|
    238|  For adjusted exponents which have an absolute value greater than 27*,
    239|  this routine calculates the amount needed to normalize the mantissa
    240|  for the adjusted exponent.  That number is subtracted from the exp
    241|  if the exp was positive, and added if it was negative.  The purpose
    242|  of this is to reduce the value of the exponent and the possibility
    243|  of error in calculation of pwrten.
    244|
    245|  1. Branch on the sign of the adjusted exponent.
    246|  2p.(positive exp)
    247|   2. Check M16 and the digits in lwords 2 and 3 in descending order.
    248|   3. Add one for each zero encountered until a non-zero digit.
    249|   4. Subtract the count from the exp.
    250|   5. Check if the exp has crossed zero in #3 above; make the exp abs
    251|	   and set SE.
    252|	6. Multiply the mantissa by 10**count.
    253|  2n.(negative exp)
    254|   2. Check the digits in lwords 3 and 2 in descending order.
    255|   3. Add one for each zero encountered until a non-zero digit.
    256|   4. Add the count to the exp.
    257|   5. Check if the exp has crossed zero in #3 above; clear SE.
    258|   6. Divide the mantissa by 10**count.
    259|
    260|  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
    261|   any adjustment due to append/strip zeros will drive the resultant
    262|   exponent towards zero.  Since all pwrten constants with a power
    263|   of 27 or less are exact, there is no need to use this routine to
    264|   attempt to lessen the resultant exponent.
    265|
    266| Register usage:
    267|
    268|  ap_st_z:
    269|	(*)  d0: temp digit storage
    270|	(*)  d1: zero count
    271|	(*)  d2: digit count
    272|	(*)  d3: offset pointer
    273|	( )  d4: first word of bcd
    274|	(*)  d5: lword counter
    275|	( )  a0: pointer to working bcd value
    276|	( )  FP_SCR1: working copy of original bcd value
    277|	( )  L_SCR1: copy of original exponent word
    278|
    279|
    280| First check the absolute value of the exponent to see if this
    281| routine is necessary.  If so, then check the sign of the exponent
    282| and do append (+) or strip (-) zeros accordingly.
    283| This section handles a positive adjusted exponent.
    284|
    285ap_st_z:
    286	movel	L_SCR1(%a6),%d1	|load expA for range test
    287	cmpl	#27,%d1		|test is with 27
    288	ble	pwrten		|if abs(expA) <28, skip ap/st zeros
    289	btst	#30,(%a0)	|check sign of exp
    290	bne	ap_st_n		|if neg, go to neg side
    291	clrl	%d1		|zero count reg
    292	movel	(%a0),%d4		|load lword 1 to d4
    293	bfextu	%d4{#28:#4},%d0	|get M16 in d0
    294	bnes	ap_p_fx		|if M16 is non-zero, go fix exp
    295	addql	#1,%d1		|inc zero count
    296	moveql	#1,%d5		|init lword counter
    297	movel	(%a0,%d5.L*4),%d4	|get lword 2 to d4
    298	bnes	ap_p_cl		|if lw 2 is zero, skip it
    299	addql	#8,%d1		|and inc count by 8
    300	addql	#1,%d5		|inc lword counter
    301	movel	(%a0,%d5.L*4),%d4	|get lword 3 to d4
    302ap_p_cl:
    303	clrl	%d3		|init offset reg
    304	moveql	#7,%d2		|init digit counter
    305ap_p_gd:
    306	bfextu	%d4{%d3:#4},%d0	|get digit
    307	bnes	ap_p_fx		|if non-zero, go to fix exp
    308	addql	#4,%d3		|point to next digit
    309	addql	#1,%d1		|inc digit counter
    310	dbf	%d2,ap_p_gd	|get next digit
    311ap_p_fx:
    312	movel	%d1,%d0		|copy counter to d2
    313	movel	L_SCR1(%a6),%d1	|get adjusted exp from memory
    314	subl	%d0,%d1		|subtract count from exp
    315	bges	ap_p_fm		|if still pos, go to pwrten
    316	negl	%d1		|now its neg; get abs
    317	movel	(%a0),%d4		|load lword 1 to d4
    318	orl	#0x40000000,%d4	| and set SE in d4
    319	orl	#0x40000000,(%a0)	| and in memory
    320|
    321| Calculate the mantissa multiplier to compensate for the striping of
    322| zeros from the mantissa.
    323|
    324ap_p_fm:
    325	movel	#PTENRN,%a1	|get address of power-of-ten table
    326	clrl	%d3		|init table index
    327	fmoves	FONE,%fp1	|init fp1 to 1
    328	moveql	#3,%d2		|init d2 to count bits in counter
    329ap_p_el:
    330	asrl	#1,%d0		|shift lsb into carry
    331	bccs	ap_p_en		|if 1, mul fp1 by pwrten factor
    332	fmulx	(%a1,%d3),%fp1	|mul by 10**(d3_bit_no)
    333ap_p_en:
    334	addl	#12,%d3		|inc d3 to next rtable entry
    335	tstl	%d0		|check if d0 is zero
    336	bnes	ap_p_el		|if not, get next bit
    337	fmulx	%fp1,%fp0		|mul mantissa by 10**(no_bits_shifted)
    338	bra	pwrten		|go calc pwrten
    339|
    340| This section handles a negative adjusted exponent.
    341|
    342ap_st_n:
    343	clrl	%d1		|clr counter
    344	moveql	#2,%d5		|set up d5 to point to lword 3
    345	movel	(%a0,%d5.L*4),%d4	|get lword 3
    346	bnes	ap_n_cl		|if not zero, check digits
    347	subl	#1,%d5		|dec d5 to point to lword 2
    348	addql	#8,%d1		|inc counter by 8
    349	movel	(%a0,%d5.L*4),%d4	|get lword 2
    350ap_n_cl:
    351	movel	#28,%d3		|point to last digit
    352	moveql	#7,%d2		|init digit counter
    353ap_n_gd:
    354	bfextu	%d4{%d3:#4},%d0	|get digit
    355	bnes	ap_n_fx		|if non-zero, go to exp fix
    356	subql	#4,%d3		|point to previous digit
    357	addql	#1,%d1		|inc digit counter
    358	dbf	%d2,ap_n_gd	|get next digit
    359ap_n_fx:
    360	movel	%d1,%d0		|copy counter to d0
    361	movel	L_SCR1(%a6),%d1	|get adjusted exp from memory
    362	subl	%d0,%d1		|subtract count from exp
    363	bgts	ap_n_fm		|if still pos, go fix mantissa
    364	negl	%d1		|take abs of exp and clr SE
    365	movel	(%a0),%d4		|load lword 1 to d4
    366	andl	#0xbfffffff,%d4	| and clr SE in d4
    367	andl	#0xbfffffff,(%a0)	| and in memory
    368|
    369| Calculate the mantissa multiplier to compensate for the appending of
    370| zeros to the mantissa.
    371|
    372ap_n_fm:
    373	movel	#PTENRN,%a1	|get address of power-of-ten table
    374	clrl	%d3		|init table index
    375	fmoves	FONE,%fp1	|init fp1 to 1
    376	moveql	#3,%d2		|init d2 to count bits in counter
    377ap_n_el:
    378	asrl	#1,%d0		|shift lsb into carry
    379	bccs	ap_n_en		|if 1, mul fp1 by pwrten factor
    380	fmulx	(%a1,%d3),%fp1	|mul by 10**(d3_bit_no)
    381ap_n_en:
    382	addl	#12,%d3		|inc d3 to next rtable entry
    383	tstl	%d0		|check if d0 is zero
    384	bnes	ap_n_el		|if not, get next bit
    385	fdivx	%fp1,%fp0		|div mantissa by 10**(no_bits_shifted)
    386|
    387|
    388| Calculate power-of-ten factor from adjusted and shifted exponent.
    389|
    390| Register usage:
    391|
    392|  pwrten:
    393|	(*)  d0: temp
    394|	( )  d1: exponent
    395|	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
    396|	(*)  d3: FPCR work copy
    397|	( )  d4: first word of bcd
    398|	(*)  a1: RTABLE pointer
    399|  calc_p:
    400|	(*)  d0: temp
    401|	( )  d1: exponent
    402|	(*)  d3: PWRTxx table index
    403|	( )  a0: pointer to working copy of bcd
    404|	(*)  a1: PWRTxx pointer
    405|	(*) fp1: power-of-ten accumulator
    406|
    407| Pwrten calculates the exponent factor in the selected rounding mode
    408| according to the following table:
    409|
    410|	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
    411|
    412|	ANY	  ANY	RN	RN
    413|
    414|	 +	   +	RP	RP
    415|	 -	   +	RP	RM
    416|	 +	   -	RP	RM
    417|	 -	   -	RP	RP
    418|
    419|	 +	   +	RM	RM
    420|	 -	   +	RM	RP
    421|	 +	   -	RM	RP
    422|	 -	   -	RM	RM
    423|
    424|	 +	   +	RZ	RM
    425|	 -	   +	RZ	RM
    426|	 +	   -	RZ	RP
    427|	 -	   -	RZ	RP
    428|
    429|
    430pwrten:
    431	movel	USER_FPCR(%a6),%d3 |get user's FPCR
    432	bfextu	%d3{#26:#2},%d2	|isolate rounding mode bits
    433	movel	(%a0),%d4		|reload 1st bcd word to d4
    434	asll	#2,%d2		|format d2 to be
    435	bfextu	%d4{#0:#2},%d0	| {FPCR[6],FPCR[5],SM,SE}
    436	addl	%d0,%d2		|in d2 as index into RTABLE
    437	leal	RTABLE,%a1	|load rtable base
    438	moveb	(%a1,%d2),%d0	|load new rounding bits from table
    439	clrl	%d3			|clear d3 to force no exc and extended
    440	bfins	%d0,%d3{#26:#2}	|stuff new rounding bits in FPCR
    441	fmovel	%d3,%FPCR		|write new FPCR
    442	asrl	#1,%d0		|write correct PTENxx table
    443	bccs	not_rp		|to a1
    444	leal	PTENRP,%a1	|it is RP
    445	bras	calc_p		|go to init section
    446not_rp:
    447	asrl	#1,%d0		|keep checking
    448	bccs	not_rm
    449	leal	PTENRM,%a1	|it is RM
    450	bras	calc_p		|go to init section
    451not_rm:
    452	leal	PTENRN,%a1	|it is RN
    453calc_p:
    454	movel	%d1,%d0		|copy exp to d0;use d0
    455	bpls	no_neg		|if exp is negative,
    456	negl	%d0		|invert it
    457	orl	#0x40000000,(%a0)	|and set SE bit
    458no_neg:
    459	clrl	%d3		|table index
    460	fmoves	FONE,%fp1	|init fp1 to 1
    461e_loop:
    462	asrl	#1,%d0		|shift next bit into carry
    463	bccs	e_next		|if zero, skip the mul
    464	fmulx	(%a1,%d3),%fp1	|mul by 10**(d3_bit_no)
    465e_next:
    466	addl	#12,%d3		|inc d3 to next rtable entry
    467	tstl	%d0		|check if d0 is zero
    468	bnes	e_loop		|not zero, continue shifting
    469|
    470|
    471|  Check the sign of the adjusted exp and make the value in fp0 the
    472|  same sign. If the exp was pos then multiply fp1*fp0;
    473|  else divide fp0/fp1.
    474|
    475| Register Usage:
    476|  norm:
    477|	( )  a0: pointer to working bcd value
    478|	(*) fp0: mantissa accumulator
    479|	( ) fp1: scaling factor - 10**(abs(exp))
    480|
    481norm:
    482	btst	#30,(%a0)	|test the sign of the exponent
    483	beqs	mul		|if clear, go to multiply
    484div:
    485	fdivx	%fp1,%fp0		|exp is negative, so divide mant by exp
    486	bras	end_dec
    487mul:
    488	fmulx	%fp1,%fp0		|exp is positive, so multiply by exp
    489|
    490|
    491| Clean up and return with result in fp0.
    492|
    493| If the final mul/div in decbin incurred an inex exception,
    494| it will be inex2, but will be reported as inex1 by get_op.
    495|
    496end_dec:
    497	fmovel	%FPSR,%d0		|get status register
    498	bclrl	#inex2_bit+8,%d0	|test for inex2 and clear it
    499	fmovel	%d0,%FPSR		|return status reg w/o inex2
    500	beqs	no_exc		|skip this if no exc
    501	orl	#inx1a_mask,USER_FPSR(%a6) |set inex1/ainex
    502no_exc:
    503	moveml	(%a7)+,%d2-%d5
    504	rts
    505	|end