cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

stwotox.S (12262B)


      1|
      2|	stwotox.sa 3.1 12/10/90
      3|
      4|	stwotox  --- 2**X
      5|	stwotoxd --- 2**X for denormalized X
      6|	stentox  --- 10**X
      7|	stentoxd --- 10**X for denormalized X
      8|
      9|	Input: Double-extended number X in location pointed to
     10|		by address register a0.
     11|
     12|	Output: The function values are returned in Fp0.
     13|
     14|	Accuracy and Monotonicity: The returned result is within 2 ulps in
     15|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
     16|		result is subsequently rounded to double precision. The
     17|		result is provably monotonic in double precision.
     18|
     19|	Speed: The program stwotox takes approximately 190 cycles and the
     20|		program stentox takes approximately 200 cycles.
     21|
     22|	Algorithm:
     23|
     24|	twotox
     25|	1. If |X| > 16480, go to ExpBig.
     26|
     27|	2. If |X| < 2**(-70), go to ExpSm.
     28|
     29|	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
     30|		decompose N as
     31|		 N = 64(M + M') + j,  j = 0,1,2,...,63.
     32|
     33|	4. Overwrite r := r * log2. Then
     34|		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
     35|		Go to expr to compute that expression.
     36|
     37|	tentox
     38|	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
     39|
     40|	2. If |X| < 2**(-70), go to ExpSm.
     41|
     42|	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
     43|		N := round-to-int(y). Decompose N as
     44|		 N = 64(M + M') + j,  j = 0,1,2,...,63.
     45|
     46|	4. Define r as
     47|		r := ((X - N*L1)-N*L2) * L10
     48|		where L1, L2 are the leading and trailing parts of log_10(2)/64
     49|		and L10 is the natural log of 10. Then
     50|		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
     51|		Go to expr to compute that expression.
     52|
     53|	expr
     54|	1. Fetch 2**(j/64) from table as Fact1 and Fact2.
     55|
     56|	2. Overwrite Fact1 and Fact2 by
     57|		Fact1 := 2**(M) * Fact1
     58|		Fact2 := 2**(M) * Fact2
     59|		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
     60|
     61|	3. Calculate P where 1 + P approximates exp(r):
     62|		P = r + r*r*(A1+r*(A2+...+r*A5)).
     63|
     64|	4. Let AdjFact := 2**(M'). Return
     65|		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
     66|		Exit.
     67|
     68|	ExpBig
     69|	1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
     70|		underflow by Tiny * Tiny.
     71|
     72|	ExpSm
     73|	1. Return 1 + X.
     74|
     75
     76|		Copyright (C) Motorola, Inc. 1990
     77|			All Rights Reserved
     78|
     79|       For details on the license for this file, please see the
     80|       file, README, in this same directory.
     81
     82|STWOTOX	idnt	2,1 | Motorola 040 Floating Point Software Package
     83
     84	|section	8
     85
     86#include "fpsp.h"
     87
     88BOUNDS1:	.long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
     89BOUNDS2:	.long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
     90
     91L2TEN64:	.long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
     92L10TWO1:	.long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
     93
     94L10TWO2:	.long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
     95
     96LOG10:	.long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
     97
     98LOG2:	.long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
     99
    100EXPA5:	.long 0x3F56C16D,0x6F7BD0B2
    101EXPA4:	.long 0x3F811112,0x302C712C
    102EXPA3:	.long 0x3FA55555,0x55554CC1
    103EXPA2:	.long 0x3FC55555,0x55554A54
    104EXPA1:	.long 0x3FE00000,0x00000000,0x00000000,0x00000000
    105
    106HUGE:	.long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
    107TINY:	.long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
    108
    109EXPTBL:
    110	.long  0x3FFF0000,0x80000000,0x00000000,0x3F738000
    111	.long  0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
    112	.long  0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
    113	.long  0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
    114	.long  0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
    115	.long  0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
    116	.long  0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
    117	.long  0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
    118	.long  0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
    119	.long  0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
    120	.long  0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
    121	.long  0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
    122	.long  0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
    123	.long  0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
    124	.long  0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
    125	.long  0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
    126	.long  0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
    127	.long  0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
    128	.long  0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
    129	.long  0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
    130	.long  0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
    131	.long  0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
    132	.long  0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
    133	.long  0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
    134	.long  0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
    135	.long  0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
    136	.long  0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
    137	.long  0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
    138	.long  0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
    139	.long  0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
    140	.long  0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
    141	.long  0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
    142	.long  0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
    143	.long  0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
    144	.long  0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
    145	.long  0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
    146	.long  0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
    147	.long  0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
    148	.long  0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
    149	.long  0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
    150	.long  0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
    151	.long  0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
    152	.long  0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
    153	.long  0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
    154	.long  0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
    155	.long  0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
    156	.long  0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
    157	.long  0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
    158	.long  0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
    159	.long  0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
    160	.long  0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
    161	.long  0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
    162	.long  0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
    163	.long  0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
    164	.long  0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
    165	.long  0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
    166	.long  0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
    167	.long  0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
    168	.long  0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
    169	.long  0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
    170	.long  0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
    171	.long  0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
    172	.long  0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
    173	.long  0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
    174
    175	.set	N,L_SCR1
    176
    177	.set	X,FP_SCR1
    178	.set	XDCARE,X+2
    179	.set	XFRAC,X+4
    180
    181	.set	ADJFACT,FP_SCR2
    182
    183	.set	FACT1,FP_SCR3
    184	.set	FACT1HI,FACT1+4
    185	.set	FACT1LOW,FACT1+8
    186
    187	.set	FACT2,FP_SCR4
    188	.set	FACT2HI,FACT2+4
    189	.set	FACT2LOW,FACT2+8
    190
    191	| xref	t_unfl
    192	|xref	t_ovfl
    193	|xref	t_frcinx
    194
    195	.global	stwotoxd
    196stwotoxd:
    197|--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
    198
    199	fmovel		%d1,%fpcr		| ...set user's rounding mode/precision
    200	fmoves		#0x3F800000,%fp0  | ...RETURN 1 + X
    201	movel		(%a0),%d0
    202	orl		#0x00800001,%d0
    203	fadds		%d0,%fp0
    204	bra		t_frcinx
    205
    206	.global	stwotox
    207stwotox:
    208|--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
    209	fmovemx	(%a0),%fp0-%fp0	| ...LOAD INPUT, do not set cc's
    210
    211	movel		(%a0),%d0
    212	movew		4(%a0),%d0
    213	fmovex		%fp0,X(%a6)
    214	andil		#0x7FFFFFFF,%d0
    215
    216	cmpil		#0x3FB98000,%d0		| ...|X| >= 2**(-70)?
    217	bges		TWOOK1
    218	bra		EXPBORS
    219
    220TWOOK1:
    221	cmpil		#0x400D80C0,%d0		| ...|X| > 16480?
    222	bles		TWOMAIN
    223	bra		EXPBORS
    224
    225
    226TWOMAIN:
    227|--USUAL CASE, 2^(-70) <= |X| <= 16480
    228
    229	fmovex		%fp0,%fp1
    230	fmuls		#0x42800000,%fp1  | ...64 * X
    231
    232	fmovel		%fp1,N(%a6)		| ...N = ROUND-TO-INT(64 X)
    233	movel		%d2,-(%sp)
    234	lea		EXPTBL,%a1	| ...LOAD ADDRESS OF TABLE OF 2^(J/64)
    235	fmovel		N(%a6),%fp1		| ...N --> FLOATING FMT
    236	movel		N(%a6),%d0
    237	movel		%d0,%d2
    238	andil		#0x3F,%d0		| ...D0 IS J
    239	asll		#4,%d0		| ...DISPLACEMENT FOR 2^(J/64)
    240	addal		%d0,%a1		| ...ADDRESS FOR 2^(J/64)
    241	asrl		#6,%d2		| ...d2 IS L, N = 64L + J
    242	movel		%d2,%d0
    243	asrl		#1,%d0		| ...D0 IS M
    244	subl		%d0,%d2		| ...d2 IS M', N = 64(M+M') + J
    245	addil		#0x3FFF,%d2
    246	movew		%d2,ADJFACT(%a6)	| ...ADJFACT IS 2^(M')
    247	movel		(%sp)+,%d2
    248|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
    249|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
    250|--ADJFACT = 2^(M').
    251|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
    252
    253	fmuls		#0x3C800000,%fp1  | ...(1/64)*N
    254	movel		(%a1)+,FACT1(%a6)
    255	movel		(%a1)+,FACT1HI(%a6)
    256	movel		(%a1)+,FACT1LOW(%a6)
    257	movew		(%a1)+,FACT2(%a6)
    258	clrw		FACT2+2(%a6)
    259
    260	fsubx		%fp1,%fp0		| ...X - (1/64)*INT(64 X)
    261
    262	movew		(%a1)+,FACT2HI(%a6)
    263	clrw		FACT2HI+2(%a6)
    264	clrl		FACT2LOW(%a6)
    265	addw		%d0,FACT1(%a6)
    266
    267	fmulx		LOG2,%fp0	| ...FP0 IS R
    268	addw		%d0,FACT2(%a6)
    269
    270	bra		expr
    271
    272EXPBORS:
    273|--FPCR, D0 SAVED
    274	cmpil		#0x3FFF8000,%d0
    275	bgts		EXPBIG
    276
    277EXPSM:
    278|--|X| IS SMALL, RETURN 1 + X
    279
    280	fmovel		%d1,%FPCR		|restore users exceptions
    281	fadds		#0x3F800000,%fp0  | ...RETURN 1 + X
    282
    283	bra		t_frcinx
    284
    285EXPBIG:
    286|--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
    287|--REGISTERS SAVE SO FAR ARE FPCR AND  D0
    288	movel		X(%a6),%d0
    289	cmpil		#0,%d0
    290	blts		EXPNEG
    291
    292	bclrb		#7,(%a0)		|t_ovfl expects positive value
    293	bra		t_ovfl
    294
    295EXPNEG:
    296	bclrb		#7,(%a0)		|t_unfl expects positive value
    297	bra		t_unfl
    298
    299	.global	stentoxd
    300stentoxd:
    301|--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
    302
    303	fmovel		%d1,%fpcr		| ...set user's rounding mode/precision
    304	fmoves		#0x3F800000,%fp0  | ...RETURN 1 + X
    305	movel		(%a0),%d0
    306	orl		#0x00800001,%d0
    307	fadds		%d0,%fp0
    308	bra		t_frcinx
    309
    310	.global	stentox
    311stentox:
    312|--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
    313	fmovemx	(%a0),%fp0-%fp0	| ...LOAD INPUT, do not set cc's
    314
    315	movel		(%a0),%d0
    316	movew		4(%a0),%d0
    317	fmovex		%fp0,X(%a6)
    318	andil		#0x7FFFFFFF,%d0
    319
    320	cmpil		#0x3FB98000,%d0		| ...|X| >= 2**(-70)?
    321	bges		TENOK1
    322	bra		EXPBORS
    323
    324TENOK1:
    325	cmpil		#0x400B9B07,%d0		| ...|X| <= 16480*log2/log10 ?
    326	bles		TENMAIN
    327	bra		EXPBORS
    328
    329TENMAIN:
    330|--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
    331
    332	fmovex		%fp0,%fp1
    333	fmuld		L2TEN64,%fp1	| ...X*64*LOG10/LOG2
    334
    335	fmovel		%fp1,N(%a6)		| ...N=INT(X*64*LOG10/LOG2)
    336	movel		%d2,-(%sp)
    337	lea		EXPTBL,%a1	| ...LOAD ADDRESS OF TABLE OF 2^(J/64)
    338	fmovel		N(%a6),%fp1		| ...N --> FLOATING FMT
    339	movel		N(%a6),%d0
    340	movel		%d0,%d2
    341	andil		#0x3F,%d0		| ...D0 IS J
    342	asll		#4,%d0		| ...DISPLACEMENT FOR 2^(J/64)
    343	addal		%d0,%a1		| ...ADDRESS FOR 2^(J/64)
    344	asrl		#6,%d2		| ...d2 IS L, N = 64L + J
    345	movel		%d2,%d0
    346	asrl		#1,%d0		| ...D0 IS M
    347	subl		%d0,%d2		| ...d2 IS M', N = 64(M+M') + J
    348	addil		#0x3FFF,%d2
    349	movew		%d2,ADJFACT(%a6)	| ...ADJFACT IS 2^(M')
    350	movel		(%sp)+,%d2
    351
    352|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
    353|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
    354|--ADJFACT = 2^(M').
    355|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
    356
    357	fmovex		%fp1,%fp2
    358
    359	fmuld		L10TWO1,%fp1	| ...N*(LOG2/64LOG10)_LEAD
    360	movel		(%a1)+,FACT1(%a6)
    361
    362	fmulx		L10TWO2,%fp2	| ...N*(LOG2/64LOG10)_TRAIL
    363
    364	movel		(%a1)+,FACT1HI(%a6)
    365	movel		(%a1)+,FACT1LOW(%a6)
    366	fsubx		%fp1,%fp0		| ...X - N L_LEAD
    367	movew		(%a1)+,FACT2(%a6)
    368
    369	fsubx		%fp2,%fp0		| ...X - N L_TRAIL
    370
    371	clrw		FACT2+2(%a6)
    372	movew		(%a1)+,FACT2HI(%a6)
    373	clrw		FACT2HI+2(%a6)
    374	clrl		FACT2LOW(%a6)
    375
    376	fmulx		LOG10,%fp0	| ...FP0 IS R
    377
    378	addw		%d0,FACT1(%a6)
    379	addw		%d0,FACT2(%a6)
    380
    381expr:
    382|--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
    383|--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
    384|--FP0 IS R. THE FOLLOWING CODE COMPUTES
    385|--	2**(M'+M) * 2**(J/64) * EXP(R)
    386
    387	fmovex		%fp0,%fp1
    388	fmulx		%fp1,%fp1		| ...FP1 IS S = R*R
    389
    390	fmoved		EXPA5,%fp2	| ...FP2 IS A5
    391	fmoved		EXPA4,%fp3	| ...FP3 IS A4
    392
    393	fmulx		%fp1,%fp2		| ...FP2 IS S*A5
    394	fmulx		%fp1,%fp3		| ...FP3 IS S*A4
    395
    396	faddd		EXPA3,%fp2	| ...FP2 IS A3+S*A5
    397	faddd		EXPA2,%fp3	| ...FP3 IS A2+S*A4
    398
    399	fmulx		%fp1,%fp2		| ...FP2 IS S*(A3+S*A5)
    400	fmulx		%fp1,%fp3		| ...FP3 IS S*(A2+S*A4)
    401
    402	faddd		EXPA1,%fp2	| ...FP2 IS A1+S*(A3+S*A5)
    403	fmulx		%fp0,%fp3		| ...FP3 IS R*S*(A2+S*A4)
    404
    405	fmulx		%fp1,%fp2		| ...FP2 IS S*(A1+S*(A3+S*A5))
    406	faddx		%fp3,%fp0		| ...FP0 IS R+R*S*(A2+S*A4)
    407
    408	faddx		%fp2,%fp0		| ...FP0 IS EXP(R) - 1
    409
    410
    411|--FINAL RECONSTRUCTION PROCESS
    412|--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
    413
    414	fmulx		FACT1(%a6),%fp0
    415	faddx		FACT2(%a6),%fp0
    416	faddx		FACT1(%a6),%fp0
    417
    418	fmovel		%d1,%FPCR		|restore users exceptions
    419	clrw		ADJFACT+2(%a6)
    420	movel		#0x80000000,ADJFACT+4(%a6)
    421	clrl		ADJFACT+8(%a6)
    422	fmulx		ADJFACT(%a6),%fp0	| ...FINAL ADJUSTMENT
    423
    424	bra		t_frcinx
    425
    426	|end