e_sqrt.c (15284B)
1/* @(#)e_sqrt.c 5.1 93/09/24 */ 2/* 3 * ==================================================== 4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 5 * 6 * Developed at SunPro, a Sun Microsystems, Inc. business. 7 * Permission to use, copy, modify, and distribute this 8 * software is freely granted, provided that this notice 9 * is preserved. 10 * ==================================================== 11 */ 12 13#if defined(LIBM_SCCS) && !defined(lint) 14static const char rcsid[] = 15 "$NetBSD: e_sqrt.c,v 1.8 1995/05/10 20:46:17 jtc Exp $"; 16#endif 17 18/* __ieee754_sqrt(x) 19 * Return correctly rounded sqrt. 20 * ------------------------------------------ 21 * | Use the hardware sqrt if you have one | 22 * ------------------------------------------ 23 * Method: 24 * Bit by bit method using integer arithmetic. (Slow, but portable) 25 * 1. Normalization 26 * Scale x to y in [1,4) with even powers of 2: 27 * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then 28 * sqrt(x) = 2^k * sqrt(y) 29 * 2. Bit by bit computation 30 * Let q = sqrt(y) truncated to i bit after binary point (q = 1), 31 * i 0 32 * i+1 2 33 * s = 2*q , and y = 2 * ( y - q ). (1) 34 * i i i i 35 * 36 * To compute q from q , one checks whether 37 * i+1 i 38 * 39 * -(i+1) 2 40 * (q + 2 ) <= y. (2) 41 * i 42 * -(i+1) 43 * If (2) is false, then q = q ; otherwise q = q + 2 . 44 * i+1 i i+1 i 45 * 46 * With some algebric manipulation, it is not difficult to see 47 * that (2) is equivalent to 48 * -(i+1) 49 * s + 2 <= y (3) 50 * i i 51 * 52 * The advantage of (3) is that s and y can be computed by 53 * i i 54 * the following recurrence formula: 55 * if (3) is false 56 * 57 * s = s , y = y ; (4) 58 * i+1 i i+1 i 59 * 60 * otherwise, 61 * -i -(i+1) 62 * s = s + 2 , y = y - s - 2 (5) 63 * i+1 i i+1 i i 64 * 65 * One may easily use induction to prove (4) and (5). 66 * Note. Since the left hand side of (3) contain only i+2 bits, 67 * it does not necessary to do a full (53-bit) comparison 68 * in (3). 69 * 3. Final rounding 70 * After generating the 53 bits result, we compute one more bit. 71 * Together with the remainder, we can decide whether the 72 * result is exact, bigger than 1/2ulp, or less than 1/2ulp 73 * (it will never equal to 1/2ulp). 74 * The rounding mode can be detected by checking whether 75 * huge + tiny is equal to huge, and whether huge - tiny is 76 * equal to huge for some floating point number "huge" and "tiny". 77 * 78 * Special cases: 79 * sqrt(+-0) = +-0 ... exact 80 * sqrt(inf) = inf 81 * sqrt(-ve) = NaN ... with invalid signal 82 * sqrt(NaN) = NaN ... with invalid signal for signaling NaN 83 * 84 * Other methods : see the appended file at the end of the program below. 85 *--------------- 86 */ 87 88#include "math_libm.h" 89#include "math_private.h" 90 91#ifdef __STDC__ 92static const double one = 1.0, tiny = 1.0e-300; 93#else 94static double one = 1.0, tiny = 1.0e-300; 95#endif 96 97#ifdef __STDC__ 98double attribute_hidden 99__ieee754_sqrt(double x) 100#else 101double attribute_hidden 102__ieee754_sqrt(x) 103 double x; 104#endif 105{ 106 double z; 107 int32_t sign = (int) 0x80000000; 108 int32_t ix0, s0, q, m, t, i; 109 u_int32_t r, t1, s1, ix1, q1; 110 111 EXTRACT_WORDS(ix0, ix1, x); 112 113 /* take care of Inf and NaN */ 114 if ((ix0 & 0x7ff00000) == 0x7ff00000) { 115 return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf 116 sqrt(-inf)=sNaN */ 117 } 118 /* take care of zero */ 119 if (ix0 <= 0) { 120 if (((ix0 & (~sign)) | ix1) == 0) 121 return x; /* sqrt(+-0) = +-0 */ 122 else if (ix0 < 0) 123 return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ 124 } 125 /* normalize x */ 126 m = (ix0 >> 20); 127 if (m == 0) { /* subnormal x */ 128 while (ix0 == 0) { 129 m -= 21; 130 ix0 |= (ix1 >> 11); 131 ix1 <<= 21; 132 } 133 for (i = 0; (ix0 & 0x00100000) == 0; i++) 134 ix0 <<= 1; 135 m -= i - 1; 136 ix0 |= (ix1 >> (32 - i)); 137 ix1 <<= i; 138 } 139 m -= 1023; /* unbias exponent */ 140 ix0 = (ix0 & 0x000fffff) | 0x00100000; 141 if (m & 1) { /* odd m, double x to make it even */ 142 ix0 += ix0 + ((ix1 & sign) >> 31); 143 ix1 += ix1; 144 } 145 m >>= 1; /* m = [m/2] */ 146 147 /* generate sqrt(x) bit by bit */ 148 ix0 += ix0 + ((ix1 & sign) >> 31); 149 ix1 += ix1; 150 q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ 151 r = 0x00200000; /* r = moving bit from right to left */ 152 153 while (r != 0) { 154 t = s0 + r; 155 if (t <= ix0) { 156 s0 = t + r; 157 ix0 -= t; 158 q += r; 159 } 160 ix0 += ix0 + ((ix1 & sign) >> 31); 161 ix1 += ix1; 162 r >>= 1; 163 } 164 165 r = sign; 166 while (r != 0) { 167 t1 = s1 + r; 168 t = s0; 169 if ((t < ix0) || ((t == ix0) && (t1 <= ix1))) { 170 s1 = t1 + r; 171 if (((t1 & sign) == sign) && (s1 & sign) == 0) 172 s0 += 1; 173 ix0 -= t; 174 if (ix1 < t1) 175 ix0 -= 1; 176 ix1 -= t1; 177 q1 += r; 178 } 179 ix0 += ix0 + ((ix1 & sign) >> 31); 180 ix1 += ix1; 181 r >>= 1; 182 } 183 184 /* use floating add to find out rounding direction */ 185 if ((ix0 | ix1) != 0) { 186 z = one - tiny; /* trigger inexact flag */ 187 if (z >= one) { 188 z = one + tiny; 189 if (q1 == (u_int32_t) 0xffffffff) { 190 q1 = 0; 191 q += 1; 192 } else if (z > one) { 193 if (q1 == (u_int32_t) 0xfffffffe) 194 q += 1; 195 q1 += 2; 196 } else 197 q1 += (q1 & 1); 198 } 199 } 200 ix0 = (q >> 1) + 0x3fe00000; 201 ix1 = q1 >> 1; 202 if ((q & 1) == 1) 203 ix1 |= sign; 204 ix0 += (m << 20); 205 INSERT_WORDS(z, ix0, ix1); 206 return z; 207} 208 209/* 210Other methods (use floating-point arithmetic) 211------------- 212(This is a copy of a drafted paper by Prof W. Kahan 213and K.C. Ng, written in May, 1986) 214 215 Two algorithms are given here to implement sqrt(x) 216 (IEEE double precision arithmetic) in software. 217 Both supply sqrt(x) correctly rounded. The first algorithm (in 218 Section A) uses newton iterations and involves four divisions. 219 The second one uses reciproot iterations to avoid division, but 220 requires more multiplications. Both algorithms need the ability 221 to chop results of arithmetic operations instead of round them, 222 and the INEXACT flag to indicate when an arithmetic operation 223 is executed exactly with no roundoff error, all part of the 224 standard (IEEE 754-1985). The ability to perform shift, add, 225 subtract and logical AND operations upon 32-bit words is needed 226 too, though not part of the standard. 227 228A. sqrt(x) by Newton Iteration 229 230 (1) Initial approximation 231 232 Let x0 and x1 be the leading and the trailing 32-bit words of 233 a floating point number x (in IEEE double format) respectively 234 235 1 11 52 ...widths 236 ------------------------------------------------------ 237 x: |s| e | f | 238 ------------------------------------------------------ 239 msb lsb msb lsb ...order 240 241 242 ------------------------ ------------------------ 243 x0: |s| e | f1 | x1: | f2 | 244 ------------------------ ------------------------ 245 246 By performing shifts and subtracts on x0 and x1 (both regarded 247 as integers), we obtain an 8-bit approximation of sqrt(x) as 248 follows. 249 250 k := (x0>>1) + 0x1ff80000; 251 y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits 252 Here k is a 32-bit integer and T1[] is an integer array containing 253 correction terms. Now magically the floating value of y (y's 254 leading 32-bit word is y0, the value of its trailing word is 0) 255 approximates sqrt(x) to almost 8-bit. 256 257 Value of T1: 258 static int T1[32]= { 259 0, 1024, 3062, 5746, 9193, 13348, 18162, 23592, 260 29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215, 261 83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581, 262 16499, 12183, 8588, 5674, 3403, 1742, 661, 130,}; 263 264 (2) Iterative refinement 265 266 Apply Heron's rule three times to y, we have y approximates 267 sqrt(x) to within 1 ulp (Unit in the Last Place): 268 269 y := (y+x/y)/2 ... almost 17 sig. bits 270 y := (y+x/y)/2 ... almost 35 sig. bits 271 y := y-(y-x/y)/2 ... within 1 ulp 272 273 274 Remark 1. 275 Another way to improve y to within 1 ulp is: 276 277 y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x) 278 y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x) 279 280 2 281 (x-y )*y 282 y := y + 2* ---------- ...within 1 ulp 283 2 284 3y + x 285 286 287 This formula has one division fewer than the one above; however, 288 it requires more multiplications and additions. Also x must be 289 scaled in advance to avoid spurious overflow in evaluating the 290 expression 3y*y+x. Hence it is not recommended uless division 291 is slow. If division is very slow, then one should use the 292 reciproot algorithm given in section B. 293 294 (3) Final adjustment 295 296 By twiddling y's last bit it is possible to force y to be 297 correctly rounded according to the prevailing rounding mode 298 as follows. Let r and i be copies of the rounding mode and 299 inexact flag before entering the square root program. Also we 300 use the expression y+-ulp for the next representable floating 301 numbers (up and down) of y. Note that y+-ulp = either fixed 302 point y+-1, or multiply y by nextafter(1,+-inf) in chopped 303 mode. 304 305 I := FALSE; ... reset INEXACT flag I 306 R := RZ; ... set rounding mode to round-toward-zero 307 z := x/y; ... chopped quotient, possibly inexact 308 If(not I) then { ... if the quotient is exact 309 if(z=y) { 310 I := i; ... restore inexact flag 311 R := r; ... restore rounded mode 312 return sqrt(x):=y. 313 } else { 314 z := z - ulp; ... special rounding 315 } 316 } 317 i := TRUE; ... sqrt(x) is inexact 318 If (r=RN) then z=z+ulp ... rounded-to-nearest 319 If (r=RP) then { ... round-toward-+inf 320 y = y+ulp; z=z+ulp; 321 } 322 y := y+z; ... chopped sum 323 y0:=y0-0x00100000; ... y := y/2 is correctly rounded. 324 I := i; ... restore inexact flag 325 R := r; ... restore rounded mode 326 return sqrt(x):=y. 327 328 (4) Special cases 329 330 Square root of +inf, +-0, or NaN is itself; 331 Square root of a negative number is NaN with invalid signal. 332 333 334B. sqrt(x) by Reciproot Iteration 335 336 (1) Initial approximation 337 338 Let x0 and x1 be the leading and the trailing 32-bit words of 339 a floating point number x (in IEEE double format) respectively 340 (see section A). By performing shifs and subtracts on x0 and y0, 341 we obtain a 7.8-bit approximation of 1/sqrt(x) as follows. 342 343 k := 0x5fe80000 - (x0>>1); 344 y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits 345 346 Here k is a 32-bit integer and T2[] is an integer array 347 containing correction terms. Now magically the floating 348 value of y (y's leading 32-bit word is y0, the value of 349 its trailing word y1 is set to zero) approximates 1/sqrt(x) 350 to almost 7.8-bit. 351 352 Value of T2: 353 static int T2[64]= { 354 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866, 355 0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f, 356 0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d, 357 0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0, 358 0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989, 359 0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd, 360 0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e, 361 0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,}; 362 363 (2) Iterative refinement 364 365 Apply Reciproot iteration three times to y and multiply the 366 result by x to get an approximation z that matches sqrt(x) 367 to about 1 ulp. To be exact, we will have 368 -1ulp < sqrt(x)-z<1.0625ulp. 369 370 ... set rounding mode to Round-to-nearest 371 y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x) 372 y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x) 373 ... special arrangement for better accuracy 374 z := x*y ... 29 bits to sqrt(x), with z*y<1 375 z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x) 376 377 Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that 378 (a) the term z*y in the final iteration is always less than 1; 379 (b) the error in the final result is biased upward so that 380 -1 ulp < sqrt(x) - z < 1.0625 ulp 381 instead of |sqrt(x)-z|<1.03125ulp. 382 383 (3) Final adjustment 384 385 By twiddling y's last bit it is possible to force y to be 386 correctly rounded according to the prevailing rounding mode 387 as follows. Let r and i be copies of the rounding mode and 388 inexact flag before entering the square root program. Also we 389 use the expression y+-ulp for the next representable floating 390 numbers (up and down) of y. Note that y+-ulp = either fixed 391 point y+-1, or multiply y by nextafter(1,+-inf) in chopped 392 mode. 393 394 R := RZ; ... set rounding mode to round-toward-zero 395 switch(r) { 396 case RN: ... round-to-nearest 397 if(x<= z*(z-ulp)...chopped) z = z - ulp; else 398 if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp; 399 break; 400 case RZ:case RM: ... round-to-zero or round-to--inf 401 R:=RP; ... reset rounding mod to round-to-+inf 402 if(x<z*z ... rounded up) z = z - ulp; else 403 if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp; 404 break; 405 case RP: ... round-to-+inf 406 if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else 407 if(x>z*z ...chopped) z = z+ulp; 408 break; 409 } 410 411 Remark 3. The above comparisons can be done in fixed point. For 412 example, to compare x and w=z*z chopped, it suffices to compare 413 x1 and w1 (the trailing parts of x and w), regarding them as 414 two's complement integers. 415 416 ...Is z an exact square root? 417 To determine whether z is an exact square root of x, let z1 be the 418 trailing part of z, and also let x0 and x1 be the leading and 419 trailing parts of x. 420 421 If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0 422 I := 1; ... Raise Inexact flag: z is not exact 423 else { 424 j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2 425 k := z1 >> 26; ... get z's 25-th and 26-th 426 fraction bits 427 I := i or (k&j) or ((k&(j+j+1))!=(x1&3)); 428 } 429 R:= r ... restore rounded mode 430 return sqrt(x):=z. 431 432 If multiplication is cheaper then the foregoing red tape, the 433 Inexact flag can be evaluated by 434 435 I := i; 436 I := (z*z!=x) or I. 437 438 Note that z*z can overwrite I; this value must be sensed if it is 439 True. 440 441 Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be 442 zero. 443 444 -------------------- 445 z1: | f2 | 446 -------------------- 447 bit 31 bit 0 448 449 Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd 450 or even of logb(x) have the following relations: 451 452 ------------------------------------------------- 453 bit 27,26 of z1 bit 1,0 of x1 logb(x) 454 ------------------------------------------------- 455 00 00 odd and even 456 01 01 even 457 10 10 odd 458 10 00 even 459 11 01 even 460 ------------------------------------------------- 461 462 (4) Special cases (see (4) of Section A). 463 464 */