cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

slogn.S (19468B)


      1|
      2|	slogn.sa 3.1 12/10/90
      3|
      4|	slogn computes the natural logarithm of an
      5|	input value. slognd does the same except the input value is a
      6|	denormalized number. slognp1 computes log(1+X), and slognp1d
      7|	computes log(1+X) for denormalized X.
      8|
      9|	Input: Double-extended value in memory location pointed to by address
     10|		register a0.
     11|
     12|	Output:	log(X) or log(1+X) returned in floating-point register Fp0.
     13|
     14|	Accuracy and Monotonicity: The returned result is within 2 ulps in
     15|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
     16|		result is subsequently rounded to double precision. The
     17|		result is provably monotonic in double precision.
     18|
     19|	Speed: The program slogn takes approximately 190 cycles for input
     20|		argument X such that |X-1| >= 1/16, which is the usual
     21|		situation. For those arguments, slognp1 takes approximately
     22|		 210 cycles. For the less common arguments, the program will
     23|		 run no worse than 10% slower.
     24|
     25|	Algorithm:
     26|	LOGN:
     27|	Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
     28|		u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
     29|
     30|	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
     31|		significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
     32|		2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
     33|
     34|	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
     35|		log(1+u) = poly.
     36|
     37|	Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
     38|		by k*log(2) + (log(F) + poly). The values of log(F) are calculated
     39|		beforehand and stored in the program.
     40|
     41|	lognp1:
     42|	Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
     43|		u where u = 2X/(2+X). Otherwise, move on to Step 2.
     44|
     45|	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
     46|		of the algorithm for LOGN and compute log(1+X) as
     47|		k*log(2) + log(F) + poly where poly approximates log(1+u),
     48|		u = (Y-F)/F.
     49|
     50|	Implementation Notes:
     51|	Note 1. There are 64 different possible values for F, thus 64 log(F)'s
     52|		need to be tabulated. Moreover, the values of 1/F are also
     53|		tabulated so that the division in (Y-F)/F can be performed by a
     54|		multiplication.
     55|
     56|	Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
     57|		Y-F has to be calculated carefully when 1/2 <= X < 3/2.
     58|
     59|	Note 3. To fully exploit the pipeline, polynomials are usually separated
     60|		into two parts evaluated independently before being added up.
     61|
     62
     63|		Copyright (C) Motorola, Inc. 1990
     64|			All Rights Reserved
     65|
     66|       For details on the license for this file, please see the
     67|       file, README, in this same directory.
     68
     69|slogn	idnt	2,1 | Motorola 040 Floating Point Software Package
     70
     71	|section	8
     72
     73#include "fpsp.h"
     74
     75BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
     76BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
     77
     78LOGOF2:	.long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
     79
     80one:	.long 0x3F800000
     81zero:	.long 0x00000000
     82infty:	.long 0x7F800000
     83negone:	.long 0xBF800000
     84
     85LOGA6:	.long 0x3FC2499A,0xB5E4040B
     86LOGA5:	.long 0xBFC555B5,0x848CB7DB
     87
     88LOGA4:	.long 0x3FC99999,0x987D8730
     89LOGA3:	.long 0xBFCFFFFF,0xFF6F7E97
     90
     91LOGA2:	.long 0x3FD55555,0x555555a4
     92LOGA1:	.long 0xBFE00000,0x00000008
     93
     94LOGB5:	.long 0x3F175496,0xADD7DAD6
     95LOGB4:	.long 0x3F3C71C2,0xFE80C7E0
     96
     97LOGB3:	.long 0x3F624924,0x928BCCFF
     98LOGB2:	.long 0x3F899999,0x999995EC
     99
    100LOGB1:	.long 0x3FB55555,0x55555555
    101TWO:	.long 0x40000000,0x00000000
    102
    103LTHOLD:	.long 0x3f990000,0x80000000,0x00000000,0x00000000
    104
    105LOGTBL:
    106	.long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
    107	.long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
    108	.long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
    109	.long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
    110	.long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
    111	.long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
    112	.long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
    113	.long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
    114	.long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
    115	.long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
    116	.long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
    117	.long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
    118	.long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
    119	.long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
    120	.long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
    121	.long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
    122	.long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
    123	.long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
    124	.long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
    125	.long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
    126	.long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
    127	.long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
    128	.long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
    129	.long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
    130	.long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
    131	.long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
    132	.long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
    133	.long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
    134	.long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
    135	.long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
    136	.long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
    137	.long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
    138	.long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
    139	.long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
    140	.long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
    141	.long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
    142	.long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
    143	.long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
    144	.long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
    145	.long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
    146	.long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
    147	.long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
    148	.long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
    149	.long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
    150	.long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
    151	.long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
    152	.long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
    153	.long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
    154	.long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
    155	.long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
    156	.long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
    157	.long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
    158	.long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
    159	.long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
    160	.long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
    161	.long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
    162	.long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
    163	.long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
    164	.long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
    165	.long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
    166	.long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
    167	.long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
    168	.long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
    169	.long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
    170	.long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
    171	.long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
    172	.long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
    173	.long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
    174	.long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
    175	.long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
    176	.long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
    177	.long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
    178	.long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
    179	.long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
    180	.long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
    181	.long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
    182	.long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
    183	.long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
    184	.long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
    185	.long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
    186	.long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
    187	.long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
    188	.long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
    189	.long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
    190	.long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
    191	.long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
    192	.long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
    193	.long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
    194	.long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
    195	.long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
    196	.long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
    197	.long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
    198	.long  0x3FFE0000,0x94458094,0x45809446,0x00000000
    199	.long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
    200	.long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
    201	.long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
    202	.long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
    203	.long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
    204	.long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
    205	.long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
    206	.long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
    207	.long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
    208	.long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
    209	.long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
    210	.long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
    211	.long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
    212	.long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
    213	.long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
    214	.long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
    215	.long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
    216	.long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
    217	.long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
    218	.long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
    219	.long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
    220	.long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
    221	.long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
    222	.long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
    223	.long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
    224	.long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
    225	.long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
    226	.long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
    227	.long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
    228	.long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
    229	.long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
    230	.long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
    231	.long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
    232	.long  0x3FFE0000,0x80808080,0x80808081,0x00000000
    233	.long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
    234
    235	.set	ADJK,L_SCR1
    236
    237	.set	X,FP_SCR1
    238	.set	XDCARE,X+2
    239	.set	XFRAC,X+4
    240
    241	.set	F,FP_SCR2
    242	.set	FFRAC,F+4
    243
    244	.set	KLOG2,FP_SCR3
    245
    246	.set	SAVEU,FP_SCR4
    247
    248	| xref	t_frcinx
    249	|xref	t_extdnrm
    250	|xref	t_operr
    251	|xref	t_dz
    252
    253	.global	slognd
    254slognd:
    255|--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
    256
    257	movel		#-100,ADJK(%a6)	| ...INPUT = 2^(ADJK) * FP0
    258
    259|----normalize the input value by left shifting k bits (k to be determined
    260|----below), adjusting exponent and storing -k to  ADJK
    261|----the value TWOTO100 is no longer needed.
    262|----Note that this code assumes the denormalized input is NON-ZERO.
    263
    264     moveml	%d2-%d7,-(%a7)		| ...save some registers
    265     movel	#0x00000000,%d3		| ...D3 is exponent of smallest norm. #
    266     movel	4(%a0),%d4
    267     movel	8(%a0),%d5		| ...(D4,D5) is (Hi_X,Lo_X)
    268     clrl	%d2			| ...D2 used for holding K
    269
    270     tstl	%d4
    271     bnes	HiX_not0
    272
    273HiX_0:
    274     movel	%d5,%d4
    275     clrl	%d5
    276     movel	#32,%d2
    277     clrl	%d6
    278     bfffo      %d4{#0:#32},%d6
    279     lsll      %d6,%d4
    280     addl	%d6,%d2			| ...(D3,D4,D5) is normalized
    281
    282     movel	%d3,X(%a6)
    283     movel	%d4,XFRAC(%a6)
    284     movel	%d5,XFRAC+4(%a6)
    285     negl	%d2
    286     movel	%d2,ADJK(%a6)
    287     fmovex	X(%a6),%fp0
    288     moveml	(%a7)+,%d2-%d7		| ...restore registers
    289     lea	X(%a6),%a0
    290     bras	LOGBGN			| ...begin regular log(X)
    291
    292
    293HiX_not0:
    294     clrl	%d6
    295     bfffo	%d4{#0:#32},%d6		| ...find first 1
    296     movel	%d6,%d2			| ...get k
    297     lsll	%d6,%d4
    298     movel	%d5,%d7			| ...a copy of D5
    299     lsll	%d6,%d5
    300     negl	%d6
    301     addil	#32,%d6
    302     lsrl	%d6,%d7
    303     orl	%d7,%d4			| ...(D3,D4,D5) normalized
    304
    305     movel	%d3,X(%a6)
    306     movel	%d4,XFRAC(%a6)
    307     movel	%d5,XFRAC+4(%a6)
    308     negl	%d2
    309     movel	%d2,ADJK(%a6)
    310     fmovex	X(%a6),%fp0
    311     moveml	(%a7)+,%d2-%d7		| ...restore registers
    312     lea	X(%a6),%a0
    313     bras	LOGBGN			| ...begin regular log(X)
    314
    315
    316	.global	slogn
    317slogn:
    318|--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
    319
    320	fmovex		(%a0),%fp0	| ...LOAD INPUT
    321	movel		#0x00000000,ADJK(%a6)
    322
    323LOGBGN:
    324|--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
    325|--A FINITE, NON-ZERO, NORMALIZED NUMBER.
    326
    327	movel	(%a0),%d0
    328	movew	4(%a0),%d0
    329
    330	movel	(%a0),X(%a6)
    331	movel	4(%a0),X+4(%a6)
    332	movel	8(%a0),X+8(%a6)
    333
    334	cmpil	#0,%d0		| ...CHECK IF X IS NEGATIVE
    335	blt	LOGNEG		| ...LOG OF NEGATIVE ARGUMENT IS INVALID
    336	cmp2l	BOUNDS1,%d0	| ...X IS POSITIVE, CHECK IF X IS NEAR 1
    337	bcc	LOGNEAR1	| ...BOUNDS IS ROUGHLY [15/16, 17/16]
    338
    339LOGMAIN:
    340|--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
    341
    342|--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
    343|--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
    344|--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
    345|--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
    346|--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
    347|--LOG(1+U) CAN BE VERY EFFICIENT.
    348|--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
    349|--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
    350
    351|--GET K, Y, F, AND ADDRESS OF 1/F.
    352	asrl	#8,%d0
    353	asrl	#8,%d0		| ...SHIFTED 16 BITS, BIASED EXPO. OF X
    354	subil	#0x3FFF,%d0	| ...THIS IS K
    355	addl	ADJK(%a6),%d0	| ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
    356	lea	LOGTBL,%a0	| ...BASE ADDRESS OF 1/F AND LOG(F)
    357	fmovel	%d0,%fp1		| ...CONVERT K TO FLOATING-POINT FORMAT
    358
    359|--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
    360	movel	#0x3FFF0000,X(%a6)	| ...X IS NOW Y, I.E. 2^(-K)*X
    361	movel	XFRAC(%a6),FFRAC(%a6)
    362	andil	#0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
    363	oril	#0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
    364	movel	FFRAC(%a6),%d0	| ...READY TO GET ADDRESS OF 1/F
    365	andil	#0x7E000000,%d0
    366	asrl	#8,%d0
    367	asrl	#8,%d0
    368	asrl	#4,%d0		| ...SHIFTED 20, D0 IS THE DISPLACEMENT
    369	addal	%d0,%a0		| ...A0 IS THE ADDRESS FOR 1/F
    370
    371	fmovex	X(%a6),%fp0
    372	movel	#0x3fff0000,F(%a6)
    373	clrl	F+8(%a6)
    374	fsubx	F(%a6),%fp0		| ...Y-F
    375	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...SAVE FP2 WHILE FP0 IS NOT READY
    376|--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
    377|--REGISTERS SAVED: FPCR, FP1, FP2
    378
    379LP1CONT1:
    380|--AN RE-ENTRY POINT FOR LOGNP1
    381	fmulx	(%a0),%fp0	| ...FP0 IS U = (Y-F)/F
    382	fmulx	LOGOF2,%fp1	| ...GET K*LOG2 WHILE FP0 IS NOT READY
    383	fmovex	%fp0,%fp2
    384	fmulx	%fp2,%fp2		| ...FP2 IS V=U*U
    385	fmovex	%fp1,KLOG2(%a6)	| ...PUT K*LOG2 IN MEMORY, FREE FP1
    386
    387|--LOG(1+U) IS APPROXIMATED BY
    388|--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
    389|--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
    390
    391	fmovex	%fp2,%fp3
    392	fmovex	%fp2,%fp1
    393
    394	fmuld	LOGA6,%fp1	| ...V*A6
    395	fmuld	LOGA5,%fp2	| ...V*A5
    396
    397	faddd	LOGA4,%fp1	| ...A4+V*A6
    398	faddd	LOGA3,%fp2	| ...A3+V*A5
    399
    400	fmulx	%fp3,%fp1		| ...V*(A4+V*A6)
    401	fmulx	%fp3,%fp2		| ...V*(A3+V*A5)
    402
    403	faddd	LOGA2,%fp1	| ...A2+V*(A4+V*A6)
    404	faddd	LOGA1,%fp2	| ...A1+V*(A3+V*A5)
    405
    406	fmulx	%fp3,%fp1		| ...V*(A2+V*(A4+V*A6))
    407	addal	#16,%a0		| ...ADDRESS OF LOG(F)
    408	fmulx	%fp3,%fp2		| ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
    409
    410	fmulx	%fp0,%fp1		| ...U*V*(A2+V*(A4+V*A6))
    411	faddx	%fp2,%fp0		| ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
    412
    413	faddx	(%a0),%fp1	| ...LOG(F)+U*V*(A2+V*(A4+V*A6))
    414	fmovemx  (%sp)+,%fp2-%fp2/%fp3	| ...RESTORE FP2
    415	faddx	%fp1,%fp0		| ...FP0 IS LOG(F) + LOG(1+U)
    416
    417	fmovel	%d1,%fpcr
    418	faddx	KLOG2(%a6),%fp0	| ...FINAL ADD
    419	bra	t_frcinx
    420
    421
    422LOGNEAR1:
    423|--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
    424	fmovex	%fp0,%fp1
    425	fsubs	one,%fp1		| ...FP1 IS X-1
    426	fadds	one,%fp0		| ...FP0 IS X+1
    427	faddx	%fp1,%fp1		| ...FP1 IS 2(X-1)
    428|--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
    429|--IN U, U = 2(X-1)/(X+1) = FP1/FP0
    430
    431LP1CONT2:
    432|--THIS IS AN RE-ENTRY POINT FOR LOGNP1
    433	fdivx	%fp0,%fp1		| ...FP1 IS U
    434	fmovemx %fp2-%fp2/%fp3,-(%sp)	 | ...SAVE FP2
    435|--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
    436|--LET V=U*U, W=V*V, CALCULATE
    437|--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
    438|--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
    439	fmovex	%fp1,%fp0
    440	fmulx	%fp0,%fp0	| ...FP0 IS V
    441	fmovex	%fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
    442	fmovex	%fp0,%fp1
    443	fmulx	%fp1,%fp1	| ...FP1 IS W
    444
    445	fmoved	LOGB5,%fp3
    446	fmoved	LOGB4,%fp2
    447
    448	fmulx	%fp1,%fp3	| ...W*B5
    449	fmulx	%fp1,%fp2	| ...W*B4
    450
    451	faddd	LOGB3,%fp3 | ...B3+W*B5
    452	faddd	LOGB2,%fp2 | ...B2+W*B4
    453
    454	fmulx	%fp3,%fp1	| ...W*(B3+W*B5), FP3 RELEASED
    455
    456	fmulx	%fp0,%fp2	| ...V*(B2+W*B4)
    457
    458	faddd	LOGB1,%fp1 | ...B1+W*(B3+W*B5)
    459	fmulx	SAVEU(%a6),%fp0 | ...FP0 IS U*V
    460
    461	faddx	%fp2,%fp1	| ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
    462	fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
    463
    464	fmulx	%fp1,%fp0	| ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
    465
    466	fmovel	%d1,%fpcr
    467	faddx	SAVEU(%a6),%fp0
    468	bra	t_frcinx
    469	rts
    470
    471LOGNEG:
    472|--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
    473	bra	t_operr
    474
    475	.global	slognp1d
    476slognp1d:
    477|--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
    478| Simply return the denorm
    479
    480	bra	t_extdnrm
    481
    482	.global	slognp1
    483slognp1:
    484|--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
    485
    486	fmovex	(%a0),%fp0	| ...LOAD INPUT
    487	fabsx	%fp0		|test magnitude
    488	fcmpx	LTHOLD,%fp0	|compare with min threshold
    489	fbgt	LP1REAL		|if greater, continue
    490	fmovel	#0,%fpsr		|clr N flag from compare
    491	fmovel	%d1,%fpcr
    492	fmovex	(%a0),%fp0	|return signed argument
    493	bra	t_frcinx
    494
    495LP1REAL:
    496	fmovex	(%a0),%fp0	| ...LOAD INPUT
    497	movel	#0x00000000,ADJK(%a6)
    498	fmovex	%fp0,%fp1	| ...FP1 IS INPUT Z
    499	fadds	one,%fp0	| ...X := ROUND(1+Z)
    500	fmovex	%fp0,X(%a6)
    501	movew	XFRAC(%a6),XDCARE(%a6)
    502	movel	X(%a6),%d0
    503	cmpil	#0,%d0
    504	ble	LP1NEG0	| ...LOG OF ZERO OR -VE
    505	cmp2l	BOUNDS2,%d0
    506	bcs	LOGMAIN	| ...BOUNDS2 IS [1/2,3/2]
    507|--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
    508|--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
    509|--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
    510
    511LP1NEAR1:
    512|--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
    513	cmp2l	BOUNDS1,%d0
    514	bcss	LP1CARE
    515
    516LP1ONE16:
    517|--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
    518|--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
    519	faddx	%fp1,%fp1	| ...FP1 IS 2Z
    520	fadds	one,%fp0	| ...FP0 IS 1+X
    521|--U = FP1/FP0
    522	bra	LP1CONT2
    523
    524LP1CARE:
    525|--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
    526|--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
    527|--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
    528|--THERE ARE ONLY TWO CASES.
    529|--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
    530|--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
    531|--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
    532|--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
    533
    534	movel	XFRAC(%a6),FFRAC(%a6)
    535	andil	#0xFE000000,FFRAC(%a6)
    536	oril	#0x01000000,FFRAC(%a6)	| ...F OBTAINED
    537	cmpil	#0x3FFF8000,%d0	| ...SEE IF 1+Z > 1
    538	bges	KISZERO
    539
    540KISNEG1:
    541	fmoves	TWO,%fp0
    542	movel	#0x3fff0000,F(%a6)
    543	clrl	F+8(%a6)
    544	fsubx	F(%a6),%fp0	| ...2-F
    545	movel	FFRAC(%a6),%d0
    546	andil	#0x7E000000,%d0
    547	asrl	#8,%d0
    548	asrl	#8,%d0
    549	asrl	#4,%d0		| ...D0 CONTAINS DISPLACEMENT FOR 1/F
    550	faddx	%fp1,%fp1		| ...GET 2Z
    551	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...SAVE FP2
    552	faddx	%fp1,%fp0		| ...FP0 IS Y-F = (2-F)+2Z
    553	lea	LOGTBL,%a0	| ...A0 IS ADDRESS OF 1/F
    554	addal	%d0,%a0
    555	fmoves	negone,%fp1	| ...FP1 IS K = -1
    556	bra	LP1CONT1
    557
    558KISZERO:
    559	fmoves	one,%fp0
    560	movel	#0x3fff0000,F(%a6)
    561	clrl	F+8(%a6)
    562	fsubx	F(%a6),%fp0		| ...1-F
    563	movel	FFRAC(%a6),%d0
    564	andil	#0x7E000000,%d0
    565	asrl	#8,%d0
    566	asrl	#8,%d0
    567	asrl	#4,%d0
    568	faddx	%fp1,%fp0		| ...FP0 IS Y-F
    569	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...FP2 SAVED
    570	lea	LOGTBL,%a0
    571	addal	%d0,%a0		| ...A0 IS ADDRESS OF 1/F
    572	fmoves	zero,%fp1	| ...FP1 IS K = 0
    573	bra	LP1CONT1
    574
    575LP1NEG0:
    576|--FPCR SAVED. D0 IS X IN COMPACT FORM.
    577	cmpil	#0,%d0
    578	blts	LP1NEG
    579LP1ZERO:
    580	fmoves	negone,%fp0
    581
    582	fmovel	%d1,%fpcr
    583	bra t_dz
    584
    585LP1NEG:
    586	fmoves	zero,%fp0
    587
    588	fmovel	%d1,%fpcr
    589	bra	t_operr
    590
    591	|end