e_pow.c - cscg22-gearboy - CSCG 2022 Challenge 'Gearboy'

	cscg22-gearboy CSCG 2022 Challenge 'Gearboy'
	git clone https://git.sinitax.com/sinitax/cscg22-gearboy
	Log \| Files \| Refs \| sfeed.txt
e_pow.c (13440B)
      1/* @(#)e_pow.c 5.1 93/09/24 */
      2/*
      3 * ====================================================
      4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
      5 *
      6 * Developed at SunPro, a Sun Microsystems, Inc. business.
      7 * Permission to use, copy, modify, and distribute this
      8 * software is freely granted, provided that this notice
      9 * is preserved.
     10 * ====================================================
     11 */
     12
     13#if defined(LIBM_SCCS) && !defined(lint)
     14static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $";
     15#endif
     16
     17/* __ieee754_pow(x,y) return x**y
     18 *
     19 *		      n
     20 * Method:  Let x =  2   * (1+f)
     21 *	1. Compute and return log2(x) in two pieces:
     22 *		log2(x) = w1 + w2,
     23 *	   where w1 has 53-24 = 29 bit trailing zeros.
     24 *	2. Perform y*log2(x) = n+y' by simulating muti-precision
     25 *	   arithmetic, where |y'|<=0.5.
     26 *	3. Return x**y = 2**n*exp(y'*log2)
     27 *
     28 * Special cases:
     29 *	1.  (anything) ** 0  is 1
     30 *	2.  (anything) ** 1  is itself
     31 *	3.  (anything) ** NAN is NAN
     32 *	4.  NAN ** (anything except 0) is NAN
     33 *	5.  +-(|x| > 1) **  +INF is +INF
     34 *	6.  +-(|x| > 1) **  -INF is +0
     35 *	7.  +-(|x| < 1) **  +INF is +0
     36 *	8.  +-(|x| < 1) **  -INF is +INF
     37 *	9.  +-1         ** +-INF is NAN
     38 *	10. +0 ** (+anything except 0, NAN)               is +0
     39 *	11. -0 ** (+anything except 0, NAN, odd integer)  is +0
     40 *	12. +0 ** (-anything except 0, NAN)               is +INF
     41 *	13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
     42 *	14. -0 ** (odd integer) = -( +0 ** (odd integer) )
     43 *	15. +INF ** (+anything except 0,NAN) is +INF
     44 *	16. +INF ** (-anything except 0,NAN) is +0
     45 *	17. -INF ** (anything)  = -0 ** (-anything)
     46 *	18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
     47 *	19. (-anything except 0 and inf) ** (non-integer) is NAN
     48 *
     49 * Accuracy:
     50 *	pow(x,y) returns x**y nearly rounded. In particular
     51 *			pow(integer,integer)
     52 *	always returns the correct integer provided it is
     53 *	representable.
     54 *
     55 * Constants :
     56 * The hexadecimal values are the intended ones for the following
     57 * constants. The decimal values may be used, provided that the
     58 * compiler will convert from decimal to binary accurately enough
     59 * to produce the hexadecimal values shown.
     60 */
     61
     62#include "math_libm.h"
     63#include "math_private.h"
     64
     65libm_hidden_proto(scalbn)
     66    libm_hidden_proto(fabs)
     67#ifdef __STDC__
     68     static const double
     69#else
     70     static double
     71#endif
     72       bp[] = { 1.0, 1.5, }, dp_h[] = {
     73     0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
     74
     75         dp_l[] = {
     76     0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */
     77
     78         zero = 0.0, one = 1.0, two = 2.0, two53 = 9007199254740992.0,  /* 0x43400000, 0x00000000 */
     79         huge_val = 1.0e300, tiny = 1.0e-300,
     80         /* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
     81         L1 = 5.99999999999994648725e-01,       /* 0x3FE33333, 0x33333303 */
     82         L2 = 4.28571428578550184252e-01,       /* 0x3FDB6DB6, 0xDB6FABFF */
     83         L3 = 3.33333329818377432918e-01,       /* 0x3FD55555, 0x518F264D */
     84         L4 = 2.72728123808534006489e-01,       /* 0x3FD17460, 0xA91D4101 */
     85         L5 = 2.30660745775561754067e-01,       /* 0x3FCD864A, 0x93C9DB65 */
     86         L6 = 2.06975017800338417784e-01,       /* 0x3FCA7E28, 0x4A454EEF */
     87         P1 = 1.66666666666666019037e-01,       /* 0x3FC55555, 0x5555553E */
     88         P2 = -2.77777777770155933842e-03,      /* 0xBF66C16C, 0x16BEBD93 */
     89         P3 = 6.61375632143793436117e-05,       /* 0x3F11566A, 0xAF25DE2C */
     90         P4 = -1.65339022054652515390e-06,      /* 0xBEBBBD41, 0xC5D26BF1 */
     91         P5 = 4.13813679705723846039e-08,       /* 0x3E663769, 0x72BEA4D0 */
     92         lg2 = 6.93147180559945286227e-01,      /* 0x3FE62E42, 0xFEFA39EF */
     93         lg2_h = 6.93147182464599609375e-01,    /* 0x3FE62E43, 0x00000000 */
     94         lg2_l = -1.90465429995776804525e-09,   /* 0xBE205C61, 0x0CA86C39 */
     95         ovt = 8.0085662595372944372e-0017,     /* -(1024-log2(ovfl+.5ulp)) */
     96         cp = 9.61796693925975554329e-01,       /* 0x3FEEC709, 0xDC3A03FD =2/(3ln2) */
     97         cp_h = 9.61796700954437255859e-01,     /* 0x3FEEC709, 0xE0000000 =(float)cp */
     98         cp_l = -7.02846165095275826516e-09,    /* 0xBE3E2FE0, 0x145B01F5 =tail of cp_h */
     99         ivln2 = 1.44269504088896338700e+00,    /* 0x3FF71547, 0x652B82FE =1/ln2 */
    100         ivln2_h = 1.44269502162933349609e+00,  /* 0x3FF71547, 0x60000000 =24b 1/ln2 */
    101         ivln2_l = 1.92596299112661746887e-08;  /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail */
    102
    103#ifdef __STDC__
    104     double attribute_hidden __ieee754_pow(double x, double y)
    105#else
    106     double attribute_hidden __ieee754_pow(x, y)
    107     double x, y;
    108#endif
    109     {
    110         double z, ax, z_h, z_l, p_h, p_l;
    111         double y1, t1, t2, r, s, t, u, v, w;
    112         int32_t i, j, k, yisint, n;
    113         int32_t hx, hy, ix, iy;
    114         u_int32_t lx, ly;
    115
    116         EXTRACT_WORDS(hx, lx, x);
    117         EXTRACT_WORDS(hy, ly, y);
    118         ix = hx & 0x7fffffff;
    119         iy = hy & 0x7fffffff;
    120
    121         /* y==zero: x**0 = 1 */
    122         if ((iy | ly) == 0)
    123             return one;
    124
    125         /* +-NaN return x+y */
    126         if (ix > 0x7ff00000 || ((ix == 0x7ff00000) && (lx != 0)) ||
    127             iy > 0x7ff00000 || ((iy == 0x7ff00000) && (ly != 0)))
    128             return x + y;
    129
    130         /* determine if y is an odd int when x < 0
    131          * yisint = 0       ... y is not an integer
    132          * yisint = 1       ... y is an odd int
    133          * yisint = 2       ... y is an even int
    134          */
    135         yisint = 0;
    136         if (hx < 0) {
    137             if (iy >= 0x43400000)
    138                 yisint = 2;    /* even integer y */
    139             else if (iy >= 0x3ff00000) {
    140                 k = (iy >> 20) - 0x3ff;        /* exponent */
    141                 if (k > 20) {
    142                     j = ly >> (52 - k);
    143                     if ((j << (52 - k)) == ly)
    144                         yisint = 2 - (j & 1);
    145                 } else if (ly == 0) {
    146                     j = iy >> (20 - k);
    147                     if ((j << (20 - k)) == iy)
    148                         yisint = 2 - (j & 1);
    149                 }
    150             }
    151         }
    152
    153         /* special value of y */
    154         if (ly == 0) {
    155             if (iy == 0x7ff00000) {    /* y is +-inf */
    156                 if (((ix - 0x3ff00000) | lx) == 0)
    157                     return y - y;      /* inf**+-1 is NaN */
    158                 else if (ix >= 0x3ff00000)     /* (|x|>1)**+-inf = inf,0 */
    159                     return (hy >= 0) ? y : zero;
    160                 else           /* (|x|<1)**-,+inf = inf,0 */
    161                     return (hy < 0) ? -y : zero;
    162             }
    163             if (iy == 0x3ff00000) {    /* y is  +-1 */
    164                 if (hy < 0)
    165                     return one / x;
    166                 else
    167                     return x;
    168             }
    169             if (hy == 0x40000000)
    170                 return x * x;  /* y is  2 */
    171             if (hy == 0x3fe00000) {    /* y is  0.5 */
    172                 if (hx >= 0)   /* x >= +0 */
    173                     return __ieee754_sqrt(x);
    174             }
    175         }
    176
    177         ax = fabs(x);
    178         /* special value of x */
    179         if (lx == 0) {
    180             if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000) {
    181                 z = ax;        /* x is +-0,+-inf,+-1 */
    182                 if (hy < 0)
    183                     z = one / z;       /* z = (1/|x|) */
    184                 if (hx < 0) {
    185                     if (((ix - 0x3ff00000) | yisint) == 0) {
    186                         z = (z - z) / (z - z); /* (-1)**non-int is NaN */
    187                     } else if (yisint == 1)
    188                         z = -z;        /* (x<0)**odd = -(|x|**odd) */
    189                 }
    190                 return z;
    191             }
    192         }
    193
    194         /* (x<0)**(non-int) is NaN */
    195         if (((((u_int32_t) hx >> 31) - 1) | yisint) == 0)
    196             return (x - x) / (x - x);
    197
    198         /* |y| is huge */
    199         if (iy > 0x41e00000) { /* if |y| > 2**31 */
    200             if (iy > 0x43f00000) {     /* if |y| > 2**64, must o/uflow */
    201                 if (ix <= 0x3fefffff)
    202                     return (hy < 0) ? huge_val * huge_val : tiny * tiny;
    203                 if (ix >= 0x3ff00000)
    204                     return (hy > 0) ? huge_val * huge_val : tiny * tiny;
    205             }
    206             /* over/underflow if x is not close to one */
    207             if (ix < 0x3fefffff)
    208                 return (hy < 0) ? huge_val * huge_val : tiny * tiny;
    209             if (ix > 0x3ff00000)
    210                 return (hy > 0) ? huge_val * huge_val : tiny * tiny;
    211             /* now |1-x| is tiny <= 2**-20, suffice to compute
    212                log(x) by x-x^2/2+x^3/3-x^4/4 */
    213             t = x - 1;         /* t has 20 trailing zeros */
    214             w = (t * t) * (0.5 - t * (0.3333333333333333333333 - t * 0.25));
    215             u = ivln2_h * t;   /* ivln2_h has 21 sig. bits */
    216             v = t * ivln2_l - w * ivln2;
    217             t1 = u + v;
    218             SET_LOW_WORD(t1, 0);
    219             t2 = v - (t1 - u);
    220         } else {
    221             double s2, s_h, s_l, t_h, t_l;
    222             n = 0;
    223             /* take care subnormal number */
    224             if (ix < 0x00100000) {
    225                 ax *= two53;
    226                 n -= 53;
    227                 GET_HIGH_WORD(ix, ax);
    228             }
    229             n += ((ix) >> 20) - 0x3ff;
    230             j = ix & 0x000fffff;
    231             /* determine interval */
    232             ix = j | 0x3ff00000;       /* normalize ix */
    233             if (j <= 0x3988E)
    234                 k = 0;         /* |x|<sqrt(3/2) */
    235             else if (j < 0xBB67A)
    236                 k = 1;         /* |x|<sqrt(3)   */
    237             else {
    238                 k = 0;
    239                 n += 1;
    240                 ix -= 0x00100000;
    241             }
    242             SET_HIGH_WORD(ax, ix);
    243
    244             /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
    245             u = ax - bp[k];    /* bp[0]=1.0, bp[1]=1.5 */
    246             v = one / (ax + bp[k]);
    247             s = u * v;
    248             s_h = s;
    249             SET_LOW_WORD(s_h, 0);
    250             /* t_h=ax+bp[k] High */
    251             t_h = zero;
    252             SET_HIGH_WORD(t_h,
    253                           ((ix >> 1) | 0x20000000) + 0x00080000 + (k << 18));
    254             t_l = ax - (t_h - bp[k]);
    255             s_l = v * ((u - s_h * t_h) - s_h * t_l);
    256             /* compute log(ax) */
    257             s2 = s * s;
    258             r = s2 * s2 * (L1 +
    259                            s2 * (L2 +
    260                                  s2 * (L3 +
    261                                        s2 * (L4 + s2 * (L5 + s2 * L6)))));
    262             r += s_l * (s_h + s);
    263             s2 = s_h * s_h;
    264             t_h = 3.0 + s2 + r;
    265             SET_LOW_WORD(t_h, 0);
    266             t_l = r - ((t_h - 3.0) - s2);
    267             /* u+v = s*(1+...) */
    268             u = s_h * t_h;
    269             v = s_l * t_h + t_l * s;
    270             /* 2/(3log2)*(s+...) */
    271             p_h = u + v;
    272             SET_LOW_WORD(p_h, 0);
    273             p_l = v - (p_h - u);
    274             z_h = cp_h * p_h;  /* cp_h+cp_l = 2/(3*log2) */
    275             z_l = cp_l * p_h + p_l * cp + dp_l[k];
    276             /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
    277             t = (double) n;
    278             t1 = (((z_h + z_l) + dp_h[k]) + t);
    279             SET_LOW_WORD(t1, 0);
    280             t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
    281         }
    282
    283         s = one;               /* s (sign of result -ve**odd) = -1 else = 1 */
    284         if (((((u_int32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
    285             s = -one;          /* (-ve)**(odd int) */
    286
    287         /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
    288         y1 = y;
    289         SET_LOW_WORD(y1, 0);
    290         p_l = (y - y1) * t1 + y * t2;
    291         p_h = y1 * t1;
    292         z = p_l + p_h;
    293         EXTRACT_WORDS(j, i, z);
    294         if (j >= 0x40900000) { /* z >= 1024 */
    295             if (((j - 0x40900000) | i) != 0)   /* if z > 1024 */
    296                 return s * huge_val * huge_val;        /* overflow */
    297             else {
    298                 if (p_l + ovt > z - p_h)
    299                     return s * huge_val * huge_val;    /* overflow */
    300             }
    301         } else if ((j & 0x7fffffff) >= 0x4090cc00) {   /* z <= -1075 */
    302             if (((j - 0xc090cc00) | i) != 0)   /* z < -1075 */
    303                 return s * tiny * tiny;        /* underflow */
    304             else {
    305                 if (p_l <= z - p_h)
    306                     return s * tiny * tiny;    /* underflow */
    307             }
    308         }
    309         /*
    310          * compute 2**(p_h+p_l)
    311          */
    312         i = j & 0x7fffffff;
    313         k = (i >> 20) - 0x3ff;
    314         n = 0;
    315         if (i > 0x3fe00000) {  /* if |z| > 0.5, set n = [z+0.5] */
    316             n = j + (0x00100000 >> (k + 1));
    317             k = ((n & 0x7fffffff) >> 20) - 0x3ff;      /* new k for n */
    318             t = zero;
    319             SET_HIGH_WORD(t, n & ~(0x000fffff >> k));
    320             n = ((n & 0x000fffff) | 0x00100000) >> (20 - k);
    321             if (j < 0)
    322                 n = -n;
    323             p_h -= t;
    324         }
    325         t = p_l + p_h;
    326         SET_LOW_WORD(t, 0);
    327         u = t * lg2_h;
    328         v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
    329         z = u + v;
    330         w = v - (z - u);
    331         t = z * z;
    332         t1 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
    333         r = (z * t1) / (t1 - two) - (w + z * w);
    334         z = one - (r - z);
    335         GET_HIGH_WORD(j, z);
    336         j += (n << 20);
    337         if ((j >> 20) <= 0)
    338             z = scalbn(z, n);  /* subnormal output */
    339         else
    340             SET_HIGH_WORD(z, j);
    341         return s * z;
    342     }