stwotox.S (12262B)
1| 2| stwotox.sa 3.1 12/10/90 3| 4| stwotox --- 2**X 5| stwotoxd --- 2**X for denormalized X 6| stentox --- 10**X 7| stentoxd --- 10**X for denormalized X 8| 9| Input: Double-extended number X in location pointed to 10| by address register a0. 11| 12| Output: The function values are returned in Fp0. 13| 14| Accuracy and Monotonicity: The returned result is within 2 ulps in 15| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the 16| result is subsequently rounded to double precision. The 17| result is provably monotonic in double precision. 18| 19| Speed: The program stwotox takes approximately 190 cycles and the 20| program stentox takes approximately 200 cycles. 21| 22| Algorithm: 23| 24| twotox 25| 1. If |X| > 16480, go to ExpBig. 26| 27| 2. If |X| < 2**(-70), go to ExpSm. 28| 29| 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore 30| decompose N as 31| N = 64(M + M') + j, j = 0,1,2,...,63. 32| 33| 4. Overwrite r := r * log2. Then 34| 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). 35| Go to expr to compute that expression. 36| 37| tentox 38| 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. 39| 40| 2. If |X| < 2**(-70), go to ExpSm. 41| 42| 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set 43| N := round-to-int(y). Decompose N as 44| N = 64(M + M') + j, j = 0,1,2,...,63. 45| 46| 4. Define r as 47| r := ((X - N*L1)-N*L2) * L10 48| where L1, L2 are the leading and trailing parts of log_10(2)/64 49| and L10 is the natural log of 10. Then 50| 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). 51| Go to expr to compute that expression. 52| 53| expr 54| 1. Fetch 2**(j/64) from table as Fact1 and Fact2. 55| 56| 2. Overwrite Fact1 and Fact2 by 57| Fact1 := 2**(M) * Fact1 58| Fact2 := 2**(M) * Fact2 59| Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). 60| 61| 3. Calculate P where 1 + P approximates exp(r): 62| P = r + r*r*(A1+r*(A2+...+r*A5)). 63| 64| 4. Let AdjFact := 2**(M'). Return 65| AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). 66| Exit. 67| 68| ExpBig 69| 1. Generate overflow by Huge * Huge if X > 0; otherwise, generate 70| underflow by Tiny * Tiny. 71| 72| ExpSm 73| 1. Return 1 + X. 74| 75 76| Copyright (C) Motorola, Inc. 1990 77| All Rights Reserved 78| 79| For details on the license for this file, please see the 80| file, README, in this same directory. 81 82|STWOTOX idnt 2,1 | Motorola 040 Floating Point Software Package 83 84 |section 8 85 86#include "fpsp.h" 87 88BOUNDS1: .long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480 89BOUNDS2: .long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10 90 91L2TEN64: .long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2 92L10TWO1: .long 0x3F734413,0x509F8000 | ... LOG2/64LOG10 93 94L10TWO2: .long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000 95 96LOG10: .long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000 97 98LOG2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 99 100EXPA5: .long 0x3F56C16D,0x6F7BD0B2 101EXPA4: .long 0x3F811112,0x302C712C 102EXPA3: .long 0x3FA55555,0x55554CC1 103EXPA2: .long 0x3FC55555,0x55554A54 104EXPA1: .long 0x3FE00000,0x00000000,0x00000000,0x00000000 105 106HUGE: .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000 107TINY: .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000 108 109EXPTBL: 110 .long 0x3FFF0000,0x80000000,0x00000000,0x3F738000 111 .long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA 112 .long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9 113 .long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9 114 .long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA 115 .long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C 116 .long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1 117 .long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA 118 .long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373 119 .long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670 120 .long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700 121 .long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0 122 .long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D 123 .long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319 124 .long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B 125 .long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5 126 .long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A 127 .long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B 128 .long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF 129 .long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA 130 .long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD 131 .long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E 132 .long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B 133 .long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB 134 .long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB 135 .long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274 136 .long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C 137 .long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00 138 .long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301 139 .long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367 140 .long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F 141 .long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C 142 .long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB 143 .long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB 144 .long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C 145 .long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA 146 .long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD 147 .long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51 148 .long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A 149 .long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2 150 .long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB 151 .long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17 152 .long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C 153 .long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8 154 .long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53 155 .long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE 156 .long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124 157 .long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243 158 .long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A 159 .long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61 160 .long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610 161 .long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1 162 .long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12 163 .long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE 164 .long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4 165 .long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F 166 .long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A 167 .long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A 168 .long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC 169 .long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F 170 .long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A 171 .long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795 172 .long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B 173 .long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581 174 175 .set N,L_SCR1 176 177 .set X,FP_SCR1 178 .set XDCARE,X+2 179 .set XFRAC,X+4 180 181 .set ADJFACT,FP_SCR2 182 183 .set FACT1,FP_SCR3 184 .set FACT1HI,FACT1+4 185 .set FACT1LOW,FACT1+8 186 187 .set FACT2,FP_SCR4 188 .set FACT2HI,FACT2+4 189 .set FACT2LOW,FACT2+8 190 191 | xref t_unfl 192 |xref t_ovfl 193 |xref t_frcinx 194 195 .global stwotoxd 196stwotoxd: 197|--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT 198 199 fmovel %d1,%fpcr | ...set user's rounding mode/precision 200 fmoves #0x3F800000,%fp0 | ...RETURN 1 + X 201 movel (%a0),%d0 202 orl #0x00800001,%d0 203 fadds %d0,%fp0 204 bra t_frcinx 205 206 .global stwotox 207stwotox: 208|--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 209 fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's 210 211 movel (%a0),%d0 212 movew 4(%a0),%d0 213 fmovex %fp0,X(%a6) 214 andil #0x7FFFFFFF,%d0 215 216 cmpil #0x3FB98000,%d0 | ...|X| >= 2**(-70)? 217 bges TWOOK1 218 bra EXPBORS 219 220TWOOK1: 221 cmpil #0x400D80C0,%d0 | ...|X| > 16480? 222 bles TWOMAIN 223 bra EXPBORS 224 225 226TWOMAIN: 227|--USUAL CASE, 2^(-70) <= |X| <= 16480 228 229 fmovex %fp0,%fp1 230 fmuls #0x42800000,%fp1 | ...64 * X 231 232 fmovel %fp1,N(%a6) | ...N = ROUND-TO-INT(64 X) 233 movel %d2,-(%sp) 234 lea EXPTBL,%a1 | ...LOAD ADDRESS OF TABLE OF 2^(J/64) 235 fmovel N(%a6),%fp1 | ...N --> FLOATING FMT 236 movel N(%a6),%d0 237 movel %d0,%d2 238 andil #0x3F,%d0 | ...D0 IS J 239 asll #4,%d0 | ...DISPLACEMENT FOR 2^(J/64) 240 addal %d0,%a1 | ...ADDRESS FOR 2^(J/64) 241 asrl #6,%d2 | ...d2 IS L, N = 64L + J 242 movel %d2,%d0 243 asrl #1,%d0 | ...D0 IS M 244 subl %d0,%d2 | ...d2 IS M', N = 64(M+M') + J 245 addil #0x3FFF,%d2 246 movew %d2,ADJFACT(%a6) | ...ADJFACT IS 2^(M') 247 movel (%sp)+,%d2 248|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), 249|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. 250|--ADJFACT = 2^(M'). 251|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. 252 253 fmuls #0x3C800000,%fp1 | ...(1/64)*N 254 movel (%a1)+,FACT1(%a6) 255 movel (%a1)+,FACT1HI(%a6) 256 movel (%a1)+,FACT1LOW(%a6) 257 movew (%a1)+,FACT2(%a6) 258 clrw FACT2+2(%a6) 259 260 fsubx %fp1,%fp0 | ...X - (1/64)*INT(64 X) 261 262 movew (%a1)+,FACT2HI(%a6) 263 clrw FACT2HI+2(%a6) 264 clrl FACT2LOW(%a6) 265 addw %d0,FACT1(%a6) 266 267 fmulx LOG2,%fp0 | ...FP0 IS R 268 addw %d0,FACT2(%a6) 269 270 bra expr 271 272EXPBORS: 273|--FPCR, D0 SAVED 274 cmpil #0x3FFF8000,%d0 275 bgts EXPBIG 276 277EXPSM: 278|--|X| IS SMALL, RETURN 1 + X 279 280 fmovel %d1,%FPCR |restore users exceptions 281 fadds #0x3F800000,%fp0 | ...RETURN 1 + X 282 283 bra t_frcinx 284 285EXPBIG: 286|--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW 287|--REGISTERS SAVE SO FAR ARE FPCR AND D0 288 movel X(%a6),%d0 289 cmpil #0,%d0 290 blts EXPNEG 291 292 bclrb #7,(%a0) |t_ovfl expects positive value 293 bra t_ovfl 294 295EXPNEG: 296 bclrb #7,(%a0) |t_unfl expects positive value 297 bra t_unfl 298 299 .global stentoxd 300stentoxd: 301|--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT 302 303 fmovel %d1,%fpcr | ...set user's rounding mode/precision 304 fmoves #0x3F800000,%fp0 | ...RETURN 1 + X 305 movel (%a0),%d0 306 orl #0x00800001,%d0 307 fadds %d0,%fp0 308 bra t_frcinx 309 310 .global stentox 311stentox: 312|--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 313 fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's 314 315 movel (%a0),%d0 316 movew 4(%a0),%d0 317 fmovex %fp0,X(%a6) 318 andil #0x7FFFFFFF,%d0 319 320 cmpil #0x3FB98000,%d0 | ...|X| >= 2**(-70)? 321 bges TENOK1 322 bra EXPBORS 323 324TENOK1: 325 cmpil #0x400B9B07,%d0 | ...|X| <= 16480*log2/log10 ? 326 bles TENMAIN 327 bra EXPBORS 328 329TENMAIN: 330|--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 331 332 fmovex %fp0,%fp1 333 fmuld L2TEN64,%fp1 | ...X*64*LOG10/LOG2 334 335 fmovel %fp1,N(%a6) | ...N=INT(X*64*LOG10/LOG2) 336 movel %d2,-(%sp) 337 lea EXPTBL,%a1 | ...LOAD ADDRESS OF TABLE OF 2^(J/64) 338 fmovel N(%a6),%fp1 | ...N --> FLOATING FMT 339 movel N(%a6),%d0 340 movel %d0,%d2 341 andil #0x3F,%d0 | ...D0 IS J 342 asll #4,%d0 | ...DISPLACEMENT FOR 2^(J/64) 343 addal %d0,%a1 | ...ADDRESS FOR 2^(J/64) 344 asrl #6,%d2 | ...d2 IS L, N = 64L + J 345 movel %d2,%d0 346 asrl #1,%d0 | ...D0 IS M 347 subl %d0,%d2 | ...d2 IS M', N = 64(M+M') + J 348 addil #0x3FFF,%d2 349 movew %d2,ADJFACT(%a6) | ...ADJFACT IS 2^(M') 350 movel (%sp)+,%d2 351 352|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), 353|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. 354|--ADJFACT = 2^(M'). 355|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. 356 357 fmovex %fp1,%fp2 358 359 fmuld L10TWO1,%fp1 | ...N*(LOG2/64LOG10)_LEAD 360 movel (%a1)+,FACT1(%a6) 361 362 fmulx L10TWO2,%fp2 | ...N*(LOG2/64LOG10)_TRAIL 363 364 movel (%a1)+,FACT1HI(%a6) 365 movel (%a1)+,FACT1LOW(%a6) 366 fsubx %fp1,%fp0 | ...X - N L_LEAD 367 movew (%a1)+,FACT2(%a6) 368 369 fsubx %fp2,%fp0 | ...X - N L_TRAIL 370 371 clrw FACT2+2(%a6) 372 movew (%a1)+,FACT2HI(%a6) 373 clrw FACT2HI+2(%a6) 374 clrl FACT2LOW(%a6) 375 376 fmulx LOG10,%fp0 | ...FP0 IS R 377 378 addw %d0,FACT1(%a6) 379 addw %d0,FACT2(%a6) 380 381expr: 382|--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN. 383|--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64). 384|--FP0 IS R. THE FOLLOWING CODE COMPUTES 385|-- 2**(M'+M) * 2**(J/64) * EXP(R) 386 387 fmovex %fp0,%fp1 388 fmulx %fp1,%fp1 | ...FP1 IS S = R*R 389 390 fmoved EXPA5,%fp2 | ...FP2 IS A5 391 fmoved EXPA4,%fp3 | ...FP3 IS A4 392 393 fmulx %fp1,%fp2 | ...FP2 IS S*A5 394 fmulx %fp1,%fp3 | ...FP3 IS S*A4 395 396 faddd EXPA3,%fp2 | ...FP2 IS A3+S*A5 397 faddd EXPA2,%fp3 | ...FP3 IS A2+S*A4 398 399 fmulx %fp1,%fp2 | ...FP2 IS S*(A3+S*A5) 400 fmulx %fp1,%fp3 | ...FP3 IS S*(A2+S*A4) 401 402 faddd EXPA1,%fp2 | ...FP2 IS A1+S*(A3+S*A5) 403 fmulx %fp0,%fp3 | ...FP3 IS R*S*(A2+S*A4) 404 405 fmulx %fp1,%fp2 | ...FP2 IS S*(A1+S*(A3+S*A5)) 406 faddx %fp3,%fp0 | ...FP0 IS R+R*S*(A2+S*A4) 407 408 faddx %fp2,%fp0 | ...FP0 IS EXP(R) - 1 409 410 411|--FINAL RECONSTRUCTION PROCESS 412|--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0) 413 414 fmulx FACT1(%a6),%fp0 415 faddx FACT2(%a6),%fp0 416 faddx FACT1(%a6),%fp0 417 418 fmovel %d1,%FPCR |restore users exceptions 419 clrw ADJFACT+2(%a6) 420 movel #0x80000000,ADJFACT+4(%a6) 421 clrl ADJFACT+8(%a6) 422 fmulx ADJFACT(%a6),%fp0 | ...FINAL ADJUSTMENT 423 424 bra t_frcinx 425 426 |end