satan.S (15961B)
1| 2| satan.sa 3.3 12/19/90 3| 4| The entry point satan computes the arctangent of an 5| input value. satand does the same except the input value is a 6| denormalized number. 7| 8| Input: Double-extended value in memory location pointed to by address 9| register a0. 10| 11| Output: Arctan(X) returned in floating-point register Fp0. 12| 13| Accuracy and Monotonicity: The returned result is within 2 ulps in 14| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the 15| result is subsequently rounded to double precision. The 16| result is provably monotonic in double precision. 17| 18| Speed: The program satan takes approximately 160 cycles for input 19| argument X such that 1/16 < |X| < 16. For the other arguments, 20| the program will run no worse than 10% slower. 21| 22| Algorithm: 23| Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. 24| 25| Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3. 26| Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits 27| of X with a bit-1 attached at the 6-th bit position. Define u 28| to be u = (X-F) / (1 + X*F). 29| 30| Step 3. Approximate arctan(u) by a polynomial poly. 31| 32| Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values 33| calculated beforehand. Exit. 34| 35| Step 5. If |X| >= 16, go to Step 7. 36| 37| Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. 38| 39| Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'. 40| Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. 41| 42 43| Copyright (C) Motorola, Inc. 1990 44| All Rights Reserved 45| 46| For details on the license for this file, please see the 47| file, README, in this same directory. 48 49|satan idnt 2,1 | Motorola 040 Floating Point Software Package 50 51 |section 8 52 53#include "fpsp.h" 54 55BOUNDS1: .long 0x3FFB8000,0x4002FFFF 56 57ONE: .long 0x3F800000 58 59 .long 0x00000000 60 61ATANA3: .long 0xBFF6687E,0x314987D8 62ATANA2: .long 0x4002AC69,0x34A26DB3 63 64ATANA1: .long 0xBFC2476F,0x4E1DA28E 65ATANB6: .long 0x3FB34444,0x7F876989 66 67ATANB5: .long 0xBFB744EE,0x7FAF45DB 68ATANB4: .long 0x3FBC71C6,0x46940220 69 70ATANB3: .long 0xBFC24924,0x921872F9 71ATANB2: .long 0x3FC99999,0x99998FA9 72 73ATANB1: .long 0xBFD55555,0x55555555 74ATANC5: .long 0xBFB70BF3,0x98539E6A 75 76ATANC4: .long 0x3FBC7187,0x962D1D7D 77ATANC3: .long 0xBFC24924,0x827107B8 78 79ATANC2: .long 0x3FC99999,0x9996263E 80ATANC1: .long 0xBFD55555,0x55555536 81 82PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 83NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000 84PTINY: .long 0x00010000,0x80000000,0x00000000,0x00000000 85NTINY: .long 0x80010000,0x80000000,0x00000000,0x00000000 86 87ATANTBL: 88 .long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000 89 .long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000 90 .long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000 91 .long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000 92 .long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000 93 .long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000 94 .long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000 95 .long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000 96 .long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000 97 .long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000 98 .long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000 99 .long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000 100 .long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000 101 .long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000 102 .long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000 103 .long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000 104 .long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000 105 .long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000 106 .long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000 107 .long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000 108 .long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000 109 .long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000 110 .long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000 111 .long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000 112 .long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000 113 .long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000 114 .long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000 115 .long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000 116 .long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000 117 .long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000 118 .long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000 119 .long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000 120 .long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000 121 .long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000 122 .long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000 123 .long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000 124 .long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000 125 .long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000 126 .long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000 127 .long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000 128 .long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000 129 .long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000 130 .long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000 131 .long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000 132 .long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000 133 .long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000 134 .long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000 135 .long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000 136 .long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000 137 .long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000 138 .long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000 139 .long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000 140 .long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000 141 .long 0x3FFE0000,0x97731420,0x365E538C,0x00000000 142 .long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000 143 .long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000 144 .long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000 145 .long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000 146 .long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000 147 .long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000 148 .long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000 149 .long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000 150 .long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000 151 .long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000 152 .long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000 153 .long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000 154 .long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000 155 .long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000 156 .long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000 157 .long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000 158 .long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000 159 .long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000 160 .long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000 161 .long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000 162 .long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000 163 .long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000 164 .long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000 165 .long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000 166 .long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000 167 .long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000 168 .long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000 169 .long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000 170 .long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000 171 .long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000 172 .long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000 173 .long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000 174 .long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000 175 .long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000 176 .long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000 177 .long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000 178 .long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000 179 .long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000 180 .long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000 181 .long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000 182 .long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000 183 .long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000 184 .long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000 185 .long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000 186 .long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000 187 .long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000 188 .long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000 189 .long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000 190 .long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000 191 .long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000 192 .long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000 193 .long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000 194 .long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000 195 .long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000 196 .long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000 197 .long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000 198 .long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000 199 .long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000 200 .long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000 201 .long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000 202 .long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000 203 .long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000 204 .long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000 205 .long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000 206 .long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000 207 .long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000 208 .long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000 209 .long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000 210 .long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000 211 .long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000 212 .long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000 213 .long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000 214 .long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000 215 .long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000 216 217 .set X,FP_SCR1 218 .set XDCARE,X+2 219 .set XFRAC,X+4 220 .set XFRACLO,X+8 221 222 .set ATANF,FP_SCR2 223 .set ATANFHI,ATANF+4 224 .set ATANFLO,ATANF+8 225 226 227 | xref t_frcinx 228 |xref t_extdnrm 229 230 .global satand 231satand: 232|--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT 233 234 bra t_extdnrm 235 236 .global satan 237satan: 238|--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 239 240 fmovex (%a0),%fp0 | ...LOAD INPUT 241 242 movel (%a0),%d0 243 movew 4(%a0),%d0 244 fmovex %fp0,X(%a6) 245 andil #0x7FFFFFFF,%d0 246 247 cmpil #0x3FFB8000,%d0 | ...|X| >= 1/16? 248 bges ATANOK1 249 bra ATANSM 250 251ATANOK1: 252 cmpil #0x4002FFFF,%d0 | ...|X| < 16 ? 253 bles ATANMAIN 254 bra ATANBIG 255 256 257|--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE 258|--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). 259|--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN 260|--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE 261|--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS 262|--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR 263|--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO 264|--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE 265|--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL 266|--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE 267|--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION 268|--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION 269|--WILL INVOLVE A VERY LONG POLYNOMIAL. 270 271|--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS 272|--WE CHOSE F TO BE +-2^K * 1.BBBB1 273|--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE 274|--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE 275|--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS 276|-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). 277 278ATANMAIN: 279 280 movew #0x0000,XDCARE(%a6) | ...CLEAN UP X JUST IN CASE 281 andil #0xF8000000,XFRAC(%a6) | ...FIRST 5 BITS 282 oril #0x04000000,XFRAC(%a6) | ...SET 6-TH BIT TO 1 283 movel #0x00000000,XFRACLO(%a6) | ...LOCATION OF X IS NOW F 284 285 fmovex %fp0,%fp1 | ...FP1 IS X 286 fmulx X(%a6),%fp1 | ...FP1 IS X*F, NOTE THAT X*F > 0 287 fsubx X(%a6),%fp0 | ...FP0 IS X-F 288 fadds #0x3F800000,%fp1 | ...FP1 IS 1 + X*F 289 fdivx %fp1,%fp0 | ...FP0 IS U = (X-F)/(1+X*F) 290 291|--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) 292|--CREATE ATAN(F) AND STORE IT IN ATANF, AND 293|--SAVE REGISTERS FP2. 294 295 movel %d2,-(%a7) | ...SAVE d2 TEMPORARILY 296 movel %d0,%d2 | ...THE EXPO AND 16 BITS OF X 297 andil #0x00007800,%d0 | ...4 VARYING BITS OF F'S FRACTION 298 andil #0x7FFF0000,%d2 | ...EXPONENT OF F 299 subil #0x3FFB0000,%d2 | ...K+4 300 asrl #1,%d2 301 addl %d2,%d0 | ...THE 7 BITS IDENTIFYING F 302 asrl #7,%d0 | ...INDEX INTO TBL OF ATAN(|F|) 303 lea ATANTBL,%a1 304 addal %d0,%a1 | ...ADDRESS OF ATAN(|F|) 305 movel (%a1)+,ATANF(%a6) 306 movel (%a1)+,ATANFHI(%a6) 307 movel (%a1)+,ATANFLO(%a6) | ...ATANF IS NOW ATAN(|F|) 308 movel X(%a6),%d0 | ...LOAD SIGN AND EXPO. AGAIN 309 andil #0x80000000,%d0 | ...SIGN(F) 310 orl %d0,ATANF(%a6) | ...ATANF IS NOW SIGN(F)*ATAN(|F|) 311 movel (%a7)+,%d2 | ...RESTORE d2 312 313|--THAT'S ALL I HAVE TO DO FOR NOW, 314|--BUT ALAS, THE DIVIDE IS STILL CRANKING! 315 316|--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS 317|--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U 318|--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. 319|--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) 320|--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. 321|--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT 322|--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED 323 324 325 fmovex %fp0,%fp1 326 fmulx %fp1,%fp1 327 fmoved ATANA3,%fp2 328 faddx %fp1,%fp2 | ...A3+V 329 fmulx %fp1,%fp2 | ...V*(A3+V) 330 fmulx %fp0,%fp1 | ...U*V 331 faddd ATANA2,%fp2 | ...A2+V*(A3+V) 332 fmuld ATANA1,%fp1 | ...A1*U*V 333 fmulx %fp2,%fp1 | ...A1*U*V*(A2+V*(A3+V)) 334 335 faddx %fp1,%fp0 | ...ATAN(U), FP1 RELEASED 336 fmovel %d1,%FPCR |restore users exceptions 337 faddx ATANF(%a6),%fp0 | ...ATAN(X) 338 bra t_frcinx 339 340ATANBORS: 341|--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. 342|--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. 343 cmpil #0x3FFF8000,%d0 344 bgt ATANBIG | ...I.E. |X| >= 16 345 346ATANSM: 347|--|X| <= 1/16 348|--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE 349|--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) 350|--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) 351|--WHERE Y = X*X, AND Z = Y*Y. 352 353 cmpil #0x3FD78000,%d0 354 blt ATANTINY 355|--COMPUTE POLYNOMIAL 356 fmulx %fp0,%fp0 | ...FP0 IS Y = X*X 357 358 359 movew #0x0000,XDCARE(%a6) 360 361 fmovex %fp0,%fp1 362 fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y 363 364 fmoved ATANB6,%fp2 365 fmoved ATANB5,%fp3 366 367 fmulx %fp1,%fp2 | ...Z*B6 368 fmulx %fp1,%fp3 | ...Z*B5 369 370 faddd ATANB4,%fp2 | ...B4+Z*B6 371 faddd ATANB3,%fp3 | ...B3+Z*B5 372 373 fmulx %fp1,%fp2 | ...Z*(B4+Z*B6) 374 fmulx %fp3,%fp1 | ...Z*(B3+Z*B5) 375 376 faddd ATANB2,%fp2 | ...B2+Z*(B4+Z*B6) 377 faddd ATANB1,%fp1 | ...B1+Z*(B3+Z*B5) 378 379 fmulx %fp0,%fp2 | ...Y*(B2+Z*(B4+Z*B6)) 380 fmulx X(%a6),%fp0 | ...X*Y 381 382 faddx %fp2,%fp1 | ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] 383 384 385 fmulx %fp1,%fp0 | ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) 386 387 fmovel %d1,%FPCR |restore users exceptions 388 faddx X(%a6),%fp0 389 390 bra t_frcinx 391 392ATANTINY: 393|--|X| < 2^(-40), ATAN(X) = X 394 movew #0x0000,XDCARE(%a6) 395 396 fmovel %d1,%FPCR |restore users exceptions 397 fmovex X(%a6),%fp0 |last inst - possible exception set 398 399 bra t_frcinx 400 401ATANBIG: 402|--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, 403|--RETURN SIGN(X)*PI/2 + ATAN(-1/X). 404 cmpil #0x40638000,%d0 405 bgt ATANHUGE 406 407|--APPROXIMATE ATAN(-1/X) BY 408|--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' 409|--THIS CAN BE RE-WRITTEN AS 410|--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. 411 412 fmoves #0xBF800000,%fp1 | ...LOAD -1 413 fdivx %fp0,%fp1 | ...FP1 IS -1/X 414 415 416|--DIVIDE IS STILL CRANKING 417 418 fmovex %fp1,%fp0 | ...FP0 IS X' 419 fmulx %fp0,%fp0 | ...FP0 IS Y = X'*X' 420 fmovex %fp1,X(%a6) | ...X IS REALLY X' 421 422 fmovex %fp0,%fp1 423 fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y 424 425 fmoved ATANC5,%fp3 426 fmoved ATANC4,%fp2 427 428 fmulx %fp1,%fp3 | ...Z*C5 429 fmulx %fp1,%fp2 | ...Z*B4 430 431 faddd ATANC3,%fp3 | ...C3+Z*C5 432 faddd ATANC2,%fp2 | ...C2+Z*C4 433 434 fmulx %fp3,%fp1 | ...Z*(C3+Z*C5), FP3 RELEASED 435 fmulx %fp0,%fp2 | ...Y*(C2+Z*C4) 436 437 faddd ATANC1,%fp1 | ...C1+Z*(C3+Z*C5) 438 fmulx X(%a6),%fp0 | ...X'*Y 439 440 faddx %fp2,%fp1 | ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] 441 442 443 fmulx %fp1,%fp0 | ...X'*Y*([B1+Z*(B3+Z*B5)] 444| ... +[Y*(B2+Z*(B4+Z*B6))]) 445 faddx X(%a6),%fp0 446 447 fmovel %d1,%FPCR |restore users exceptions 448 449 btstb #7,(%a0) 450 beqs pos_big 451 452neg_big: 453 faddx NPIBY2,%fp0 454 bra t_frcinx 455 456pos_big: 457 faddx PPIBY2,%fp0 458 bra t_frcinx 459 460ATANHUGE: 461|--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY 462 btstb #7,(%a0) 463 beqs pos_huge 464 465neg_huge: 466 fmovex NPIBY2,%fp0 467 fmovel %d1,%fpcr 468 fsubx NTINY,%fp0 469 bra t_frcinx 470 471pos_huge: 472 fmovex PPIBY2,%fp0 473 fmovel %d1,%fpcr 474 fsubx PTINY,%fp0 475 bra t_frcinx 476 477 |end