cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

softfloat.c (106516B)


      1/*
      2 * Ported from a work by Andreas Grabher for Previous, NeXT Computer Emulator,
      3 * derived from NetBSD M68040 FPSP functions,
      4 * derived from release 2a of the SoftFloat IEC/IEEE Floating-point Arithmetic
      5 * Package. Those parts of the code (and some later contributions) are
      6 * provided under that license, as detailed below.
      7 * It has subsequently been modified by contributors to the QEMU Project,
      8 * so some portions are provided under:
      9 *  the SoftFloat-2a license
     10 *  the BSD license
     11 *  GPL-v2-or-later
     12 *
     13 * Any future contributions to this file will be taken to be licensed under
     14 * the Softfloat-2a license unless specifically indicated otherwise.
     15 */
     16
     17/*
     18 * Portions of this work are licensed under the terms of the GNU GPL,
     19 * version 2 or later. See the COPYING file in the top-level directory.
     20 */
     21
     22#include "qemu/osdep.h"
     23#include "softfloat.h"
     24#include "fpu/softfloat-macros.h"
     25#include "softfloat_fpsp_tables.h"
     26
     27#define pi_exp      0x4000
     28#define piby2_exp   0x3FFF
     29#define pi_sig      UINT64_C(0xc90fdaa22168c235)
     30
     31static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status)
     32{
     33    if (floatx80_is_signaling_nan(a, status)) {
     34        float_raise(float_flag_invalid, status);
     35        a = floatx80_silence_nan(a, status);
     36    }
     37
     38    if (status->default_nan_mode) {
     39        return floatx80_default_nan(status);
     40    }
     41
     42    return a;
     43}
     44
     45/*
     46 * Returns the mantissa of the extended double-precision floating-point
     47 * value `a'.
     48 */
     49
     50floatx80 floatx80_getman(floatx80 a, float_status *status)
     51{
     52    bool aSign;
     53    int32_t aExp;
     54    uint64_t aSig;
     55
     56    aSig = extractFloatx80Frac(a);
     57    aExp = extractFloatx80Exp(a);
     58    aSign = extractFloatx80Sign(a);
     59
     60    if (aExp == 0x7FFF) {
     61        if ((uint64_t) (aSig << 1)) {
     62            return propagateFloatx80NaNOneArg(a , status);
     63        }
     64        float_raise(float_flag_invalid , status);
     65        return floatx80_default_nan(status);
     66    }
     67
     68    if (aExp == 0) {
     69        if (aSig == 0) {
     70            return packFloatx80(aSign, 0, 0);
     71        }
     72        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
     73    }
     74
     75    return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign,
     76                                0x3FFF, aSig, 0, status);
     77}
     78
     79/*
     80 * Returns the exponent of the extended double-precision floating-point
     81 * value `a' as an extended double-precision value.
     82 */
     83
     84floatx80 floatx80_getexp(floatx80 a, float_status *status)
     85{
     86    bool aSign;
     87    int32_t aExp;
     88    uint64_t aSig;
     89
     90    aSig = extractFloatx80Frac(a);
     91    aExp = extractFloatx80Exp(a);
     92    aSign = extractFloatx80Sign(a);
     93
     94    if (aExp == 0x7FFF) {
     95        if ((uint64_t) (aSig << 1)) {
     96            return propagateFloatx80NaNOneArg(a , status);
     97        }
     98        float_raise(float_flag_invalid , status);
     99        return floatx80_default_nan(status);
    100    }
    101
    102    if (aExp == 0) {
    103        if (aSig == 0) {
    104            return packFloatx80(aSign, 0, 0);
    105        }
    106        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
    107    }
    108
    109    return int32_to_floatx80(aExp - 0x3FFF, status);
    110}
    111
    112/*
    113 * Scales extended double-precision floating-point value in operand `a' by
    114 * value `b'. The function truncates the value in the second operand 'b' to
    115 * an integral value and adds that value to the exponent of the operand 'a'.
    116 * The operation performed according to the IEC/IEEE Standard for Binary
    117 * Floating-Point Arithmetic.
    118 */
    119
    120floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status)
    121{
    122    bool aSign, bSign;
    123    int32_t aExp, bExp, shiftCount;
    124    uint64_t aSig, bSig;
    125
    126    aSig = extractFloatx80Frac(a);
    127    aExp = extractFloatx80Exp(a);
    128    aSign = extractFloatx80Sign(a);
    129    bSig = extractFloatx80Frac(b);
    130    bExp = extractFloatx80Exp(b);
    131    bSign = extractFloatx80Sign(b);
    132
    133    if (bExp == 0x7FFF) {
    134        if ((uint64_t) (bSig << 1) ||
    135            ((aExp == 0x7FFF) && (uint64_t) (aSig << 1))) {
    136            return propagateFloatx80NaN(a, b, status);
    137        }
    138        float_raise(float_flag_invalid , status);
    139        return floatx80_default_nan(status);
    140    }
    141    if (aExp == 0x7FFF) {
    142        if ((uint64_t) (aSig << 1)) {
    143            return propagateFloatx80NaN(a, b, status);
    144        }
    145        return packFloatx80(aSign, floatx80_infinity.high,
    146                            floatx80_infinity.low);
    147    }
    148    if (aExp == 0) {
    149        if (aSig == 0) {
    150            return packFloatx80(aSign, 0, 0);
    151        }
    152        if (bExp < 0x3FFF) {
    153            return a;
    154        }
    155        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
    156    }
    157
    158    if (bExp < 0x3FFF) {
    159        return a;
    160    }
    161
    162    if (0x400F < bExp) {
    163        aExp = bSign ? -0x6001 : 0xE000;
    164        return roundAndPackFloatx80(status->floatx80_rounding_precision,
    165                                    aSign, aExp, aSig, 0, status);
    166    }
    167
    168    shiftCount = 0x403E - bExp;
    169    bSig >>= shiftCount;
    170    aExp = bSign ? (aExp - bSig) : (aExp + bSig);
    171
    172    return roundAndPackFloatx80(status->floatx80_rounding_precision,
    173                                aSign, aExp, aSig, 0, status);
    174}
    175
    176floatx80 floatx80_move(floatx80 a, float_status *status)
    177{
    178    bool aSign;
    179    int32_t aExp;
    180    uint64_t aSig;
    181
    182    aSig = extractFloatx80Frac(a);
    183    aExp = extractFloatx80Exp(a);
    184    aSign = extractFloatx80Sign(a);
    185
    186    if (aExp == 0x7FFF) {
    187        if ((uint64_t)(aSig << 1)) {
    188            return propagateFloatx80NaNOneArg(a, status);
    189        }
    190        return a;
    191    }
    192    if (aExp == 0) {
    193        if (aSig == 0) {
    194            return a;
    195        }
    196        normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
    197                                      aSign, aExp, aSig, 0, status);
    198    }
    199    return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign,
    200                                aExp, aSig, 0, status);
    201}
    202
    203/*
    204 * Algorithms for transcendental functions supported by MC68881 and MC68882
    205 * mathematical coprocessors. The functions are derived from FPSP library.
    206 */
    207
    208#define one_exp     0x3FFF
    209#define one_sig     UINT64_C(0x8000000000000000)
    210
    211/*
    212 * Function for compactifying extended double-precision floating point values.
    213 */
    214
    215static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig)
    216{
    217    return (aExp << 16) | (aSig >> 48);
    218}
    219
    220/*
    221 * Log base e of x plus 1
    222 */
    223
    224floatx80 floatx80_lognp1(floatx80 a, float_status *status)
    225{
    226    bool aSign;
    227    int32_t aExp;
    228    uint64_t aSig, fSig;
    229
    230    FloatRoundMode user_rnd_mode;
    231    FloatX80RoundPrec user_rnd_prec;
    232
    233    int32_t compact, j, k;
    234    floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu;
    235
    236    aSig = extractFloatx80Frac(a);
    237    aExp = extractFloatx80Exp(a);
    238    aSign = extractFloatx80Sign(a);
    239
    240    if (aExp == 0x7FFF) {
    241        if ((uint64_t) (aSig << 1)) {
    242            propagateFloatx80NaNOneArg(a, status);
    243        }
    244        if (aSign) {
    245            float_raise(float_flag_invalid, status);
    246            return floatx80_default_nan(status);
    247        }
    248        return packFloatx80(0, floatx80_infinity.high, floatx80_infinity.low);
    249    }
    250
    251    if (aExp == 0 && aSig == 0) {
    252        return packFloatx80(aSign, 0, 0);
    253    }
    254
    255    if (aSign && aExp >= one_exp) {
    256        if (aExp == one_exp && aSig == one_sig) {
    257            float_raise(float_flag_divbyzero, status);
    258            return packFloatx80(aSign, floatx80_infinity.high,
    259                                floatx80_infinity.low);
    260        }
    261        float_raise(float_flag_invalid, status);
    262        return floatx80_default_nan(status);
    263    }
    264
    265    if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) {
    266        /* <= min threshold */
    267        float_raise(float_flag_inexact, status);
    268        return floatx80_move(a, status);
    269    }
    270
    271    user_rnd_mode = status->float_rounding_mode;
    272    user_rnd_prec = status->floatx80_rounding_precision;
    273    status->float_rounding_mode = float_round_nearest_even;
    274    status->floatx80_rounding_precision = floatx80_precision_x;
    275
    276    compact = floatx80_make_compact(aExp, aSig);
    277
    278    fp0 = a; /* Z */
    279    fp1 = a;
    280
    281    fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
    282                       status), status); /* X = (1+Z) */
    283
    284    aExp = extractFloatx80Exp(fp0);
    285    aSig = extractFloatx80Frac(fp0);
    286
    287    compact = floatx80_make_compact(aExp, aSig);
    288
    289    if (compact < 0x3FFE8000 || compact > 0x3FFFC000) {
    290        /* |X| < 1/2 or |X| > 3/2 */
    291        k = aExp - 0x3FFF;
    292        fp1 = int32_to_floatx80(k, status);
    293
    294        fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000);
    295        j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */
    296
    297        f = packFloatx80(0, 0x3FFF, fSig); /* F */
    298        fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */
    299
    300        fp0 = floatx80_sub(fp0, f, status); /* Y-F */
    301
    302    lp1cont1:
    303        /* LP1CONT1 */
    304        fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */
    305        logof2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC));
    306        klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */
    307        fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */
    308
    309        fp3 = fp2;
    310        fp1 = fp2;
    311
    312        fp1 = floatx80_mul(fp1, float64_to_floatx80(
    313                           make_float64(0x3FC2499AB5E4040B), status),
    314                           status); /* V*A6 */
    315        fp2 = floatx80_mul(fp2, float64_to_floatx80(
    316                           make_float64(0xBFC555B5848CB7DB), status),
    317                           status); /* V*A5 */
    318        fp1 = floatx80_add(fp1, float64_to_floatx80(
    319                           make_float64(0x3FC99999987D8730), status),
    320                           status); /* A4+V*A6 */
    321        fp2 = floatx80_add(fp2, float64_to_floatx80(
    322                           make_float64(0xBFCFFFFFFF6F7E97), status),
    323                           status); /* A3+V*A5 */
    324        fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */
    325        fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */
    326        fp1 = floatx80_add(fp1, float64_to_floatx80(
    327                           make_float64(0x3FD55555555555A4), status),
    328                           status); /* A2+V*(A4+V*A6) */
    329        fp2 = floatx80_add(fp2, float64_to_floatx80(
    330                           make_float64(0xBFE0000000000008), status),
    331                           status); /* A1+V*(A3+V*A5) */
    332        fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */
    333        fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */
    334        fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */
    335        fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */
    336
    337        fp1 = floatx80_add(fp1, log_tbl[j + 1],
    338                           status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */
    339        fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */
    340
    341        status->float_rounding_mode = user_rnd_mode;
    342        status->floatx80_rounding_precision = user_rnd_prec;
    343
    344        a = floatx80_add(fp0, klog2, status);
    345
    346        float_raise(float_flag_inexact, status);
    347
    348        return a;
    349    } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) {
    350        /* |X| < 1/16 or |X| > -1/16 */
    351        /* LP1CARE */
    352        fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000);
    353        f = packFloatx80(0, 0x3FFF, fSig); /* F */
    354        j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */
    355
    356        if (compact >= 0x3FFF8000) { /* 1+Z >= 1 */
    357            /* KISZERO */
    358            fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x3F800000),
    359                               status), f, status); /* 1-F */
    360            fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (1-F)+Z */
    361            fp1 = packFloatx80(0, 0, 0); /* K = 0 */
    362        } else {
    363            /* KISNEG */
    364            fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x40000000),
    365                               status), f, status); /* 2-F */
    366            fp1 = floatx80_add(fp1, fp1, status); /* 2Z */
    367            fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (2-F)+2Z */
    368            fp1 = packFloatx80(1, one_exp, one_sig); /* K = -1 */
    369        }
    370        goto lp1cont1;
    371    } else {
    372        /* LP1ONE16 */
    373        fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2Z */
    374        fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
    375                           status), status); /* FP0 IS 1+X */
    376
    377        /* LP1CONT2 */
    378        fp1 = floatx80_div(fp1, fp0, status); /* U */
    379        saveu = fp1;
    380        fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */
    381        fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */
    382
    383        fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6),
    384                                  status); /* B5 */
    385        fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0),
    386                                  status); /* B4 */
    387        fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */
    388        fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */
    389        fp3 = floatx80_add(fp3, float64_to_floatx80(
    390                           make_float64(0x3F624924928BCCFF), status),
    391                           status); /* B3+W*B5 */
    392        fp2 = floatx80_add(fp2, float64_to_floatx80(
    393                           make_float64(0x3F899999999995EC), status),
    394                           status); /* B2+W*B4 */
    395        fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */
    396        fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */
    397        fp1 = floatx80_add(fp1, float64_to_floatx80(
    398                           make_float64(0x3FB5555555555555), status),
    399                           status); /* B1+W*(B3+W*B5) */
    400
    401        fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */
    402        fp1 = floatx80_add(fp1, fp2,
    403                           status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */
    404        fp0 = floatx80_mul(fp0, fp1,
    405                           status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */
    406
    407        status->float_rounding_mode = user_rnd_mode;
    408        status->floatx80_rounding_precision = user_rnd_prec;
    409
    410        a = floatx80_add(fp0, saveu, status);
    411
    412        /*if (!floatx80_is_zero(a)) { */
    413            float_raise(float_flag_inexact, status);
    414        /*} */
    415
    416        return a;
    417    }
    418}
    419
    420/*
    421 * Log base e
    422 */
    423
    424floatx80 floatx80_logn(floatx80 a, float_status *status)
    425{
    426    bool aSign;
    427    int32_t aExp;
    428    uint64_t aSig, fSig;
    429
    430    FloatRoundMode user_rnd_mode;
    431    FloatX80RoundPrec user_rnd_prec;
    432
    433    int32_t compact, j, k, adjk;
    434    floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu;
    435
    436    aSig = extractFloatx80Frac(a);
    437    aExp = extractFloatx80Exp(a);
    438    aSign = extractFloatx80Sign(a);
    439
    440    if (aExp == 0x7FFF) {
    441        if ((uint64_t) (aSig << 1)) {
    442            propagateFloatx80NaNOneArg(a, status);
    443        }
    444        if (aSign == 0) {
    445            return packFloatx80(0, floatx80_infinity.high,
    446                                floatx80_infinity.low);
    447        }
    448    }
    449
    450    adjk = 0;
    451
    452    if (aExp == 0) {
    453        if (aSig == 0) { /* zero */
    454            float_raise(float_flag_divbyzero, status);
    455            return packFloatx80(1, floatx80_infinity.high,
    456                                floatx80_infinity.low);
    457        }
    458        if ((aSig & one_sig) == 0) { /* denormal */
    459            normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
    460            adjk = -100;
    461            aExp += 100;
    462            a = packFloatx80(aSign, aExp, aSig);
    463        }
    464    }
    465
    466    if (aSign) {
    467        float_raise(float_flag_invalid, status);
    468        return floatx80_default_nan(status);
    469    }
    470
    471    user_rnd_mode = status->float_rounding_mode;
    472    user_rnd_prec = status->floatx80_rounding_precision;
    473    status->float_rounding_mode = float_round_nearest_even;
    474    status->floatx80_rounding_precision = floatx80_precision_x;
    475
    476    compact = floatx80_make_compact(aExp, aSig);
    477
    478    if (compact < 0x3FFEF07D || compact > 0x3FFF8841) {
    479        /* |X| < 15/16 or |X| > 17/16 */
    480        k = aExp - 0x3FFF;
    481        k += adjk;
    482        fp1 = int32_to_floatx80(k, status);
    483
    484        fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000);
    485        j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */
    486
    487        f = packFloatx80(0, 0x3FFF, fSig); /* F */
    488        fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */
    489
    490        fp0 = floatx80_sub(fp0, f, status); /* Y-F */
    491
    492        /* LP1CONT1 */
    493        fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */
    494        logof2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC));
    495        klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */
    496        fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */
    497
    498        fp3 = fp2;
    499        fp1 = fp2;
    500
    501        fp1 = floatx80_mul(fp1, float64_to_floatx80(
    502                           make_float64(0x3FC2499AB5E4040B), status),
    503                           status); /* V*A6 */
    504        fp2 = floatx80_mul(fp2, float64_to_floatx80(
    505                           make_float64(0xBFC555B5848CB7DB), status),
    506                           status); /* V*A5 */
    507        fp1 = floatx80_add(fp1, float64_to_floatx80(
    508                           make_float64(0x3FC99999987D8730), status),
    509                           status); /* A4+V*A6 */
    510        fp2 = floatx80_add(fp2, float64_to_floatx80(
    511                           make_float64(0xBFCFFFFFFF6F7E97), status),
    512                           status); /* A3+V*A5 */
    513        fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */
    514        fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */
    515        fp1 = floatx80_add(fp1, float64_to_floatx80(
    516                           make_float64(0x3FD55555555555A4), status),
    517                           status); /* A2+V*(A4+V*A6) */
    518        fp2 = floatx80_add(fp2, float64_to_floatx80(
    519                           make_float64(0xBFE0000000000008), status),
    520                           status); /* A1+V*(A3+V*A5) */
    521        fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */
    522        fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */
    523        fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */
    524        fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */
    525
    526        fp1 = floatx80_add(fp1, log_tbl[j + 1],
    527                           status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */
    528        fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */
    529
    530        status->float_rounding_mode = user_rnd_mode;
    531        status->floatx80_rounding_precision = user_rnd_prec;
    532
    533        a = floatx80_add(fp0, klog2, status);
    534
    535        float_raise(float_flag_inexact, status);
    536
    537        return a;
    538    } else { /* |X-1| >= 1/16 */
    539        fp0 = a;
    540        fp1 = a;
    541        fp1 = floatx80_sub(fp1, float32_to_floatx80(make_float32(0x3F800000),
    542                           status), status); /* FP1 IS X-1 */
    543        fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
    544                           status), status); /* FP0 IS X+1 */
    545        fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2(X-1) */
    546
    547        /* LP1CONT2 */
    548        fp1 = floatx80_div(fp1, fp0, status); /* U */
    549        saveu = fp1;
    550        fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */
    551        fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */
    552
    553        fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6),
    554                                  status); /* B5 */
    555        fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0),
    556                                  status); /* B4 */
    557        fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */
    558        fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */
    559        fp3 = floatx80_add(fp3, float64_to_floatx80(
    560                           make_float64(0x3F624924928BCCFF), status),
    561                           status); /* B3+W*B5 */
    562        fp2 = floatx80_add(fp2, float64_to_floatx80(
    563                           make_float64(0x3F899999999995EC), status),
    564                           status); /* B2+W*B4 */
    565        fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */
    566        fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */
    567        fp1 = floatx80_add(fp1, float64_to_floatx80(
    568                           make_float64(0x3FB5555555555555), status),
    569                           status); /* B1+W*(B3+W*B5) */
    570
    571        fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */
    572        fp1 = floatx80_add(fp1, fp2, status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */
    573        fp0 = floatx80_mul(fp0, fp1,
    574                           status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */
    575
    576        status->float_rounding_mode = user_rnd_mode;
    577        status->floatx80_rounding_precision = user_rnd_prec;
    578
    579        a = floatx80_add(fp0, saveu, status);
    580
    581        /*if (!floatx80_is_zero(a)) { */
    582            float_raise(float_flag_inexact, status);
    583        /*} */
    584
    585        return a;
    586    }
    587}
    588
    589/*
    590 * Log base 10
    591 */
    592
    593floatx80 floatx80_log10(floatx80 a, float_status *status)
    594{
    595    bool aSign;
    596    int32_t aExp;
    597    uint64_t aSig;
    598
    599    FloatRoundMode user_rnd_mode;
    600    FloatX80RoundPrec user_rnd_prec;
    601
    602    floatx80 fp0, fp1;
    603
    604    aSig = extractFloatx80Frac(a);
    605    aExp = extractFloatx80Exp(a);
    606    aSign = extractFloatx80Sign(a);
    607
    608    if (aExp == 0x7FFF) {
    609        if ((uint64_t) (aSig << 1)) {
    610            propagateFloatx80NaNOneArg(a, status);
    611        }
    612        if (aSign == 0) {
    613            return packFloatx80(0, floatx80_infinity.high,
    614                                floatx80_infinity.low);
    615        }
    616    }
    617
    618    if (aExp == 0 && aSig == 0) {
    619        float_raise(float_flag_divbyzero, status);
    620        return packFloatx80(1, floatx80_infinity.high,
    621                            floatx80_infinity.low);
    622    }
    623
    624    if (aSign) {
    625        float_raise(float_flag_invalid, status);
    626        return floatx80_default_nan(status);
    627    }
    628
    629    user_rnd_mode = status->float_rounding_mode;
    630    user_rnd_prec = status->floatx80_rounding_precision;
    631    status->float_rounding_mode = float_round_nearest_even;
    632    status->floatx80_rounding_precision = floatx80_precision_x;
    633
    634    fp0 = floatx80_logn(a, status);
    635    fp1 = packFloatx80(0, 0x3FFD, UINT64_C(0xDE5BD8A937287195)); /* INV_L10 */
    636
    637    status->float_rounding_mode = user_rnd_mode;
    638    status->floatx80_rounding_precision = user_rnd_prec;
    639
    640    a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L10 */
    641
    642    float_raise(float_flag_inexact, status);
    643
    644    return a;
    645}
    646
    647/*
    648 * Log base 2
    649 */
    650
    651floatx80 floatx80_log2(floatx80 a, float_status *status)
    652{
    653    bool aSign;
    654    int32_t aExp;
    655    uint64_t aSig;
    656
    657    FloatRoundMode user_rnd_mode;
    658    FloatX80RoundPrec user_rnd_prec;
    659
    660    floatx80 fp0, fp1;
    661
    662    aSig = extractFloatx80Frac(a);
    663    aExp = extractFloatx80Exp(a);
    664    aSign = extractFloatx80Sign(a);
    665
    666    if (aExp == 0x7FFF) {
    667        if ((uint64_t) (aSig << 1)) {
    668            propagateFloatx80NaNOneArg(a, status);
    669        }
    670        if (aSign == 0) {
    671            return packFloatx80(0, floatx80_infinity.high,
    672                                floatx80_infinity.low);
    673        }
    674    }
    675
    676    if (aExp == 0) {
    677        if (aSig == 0) {
    678            float_raise(float_flag_divbyzero, status);
    679            return packFloatx80(1, floatx80_infinity.high,
    680                                floatx80_infinity.low);
    681        }
    682        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
    683    }
    684
    685    if (aSign) {
    686        float_raise(float_flag_invalid, status);
    687        return floatx80_default_nan(status);
    688    }
    689
    690    user_rnd_mode = status->float_rounding_mode;
    691    user_rnd_prec = status->floatx80_rounding_precision;
    692    status->float_rounding_mode = float_round_nearest_even;
    693    status->floatx80_rounding_precision = floatx80_precision_x;
    694
    695    if (aSig == one_sig) { /* X is 2^k */
    696        status->float_rounding_mode = user_rnd_mode;
    697        status->floatx80_rounding_precision = user_rnd_prec;
    698
    699        a = int32_to_floatx80(aExp - 0x3FFF, status);
    700    } else {
    701        fp0 = floatx80_logn(a, status);
    702        fp1 = packFloatx80(0, 0x3FFF, UINT64_C(0xB8AA3B295C17F0BC)); /* INV_L2 */
    703
    704        status->float_rounding_mode = user_rnd_mode;
    705        status->floatx80_rounding_precision = user_rnd_prec;
    706
    707        a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L2 */
    708    }
    709
    710    float_raise(float_flag_inexact, status);
    711
    712    return a;
    713}
    714
    715/*
    716 * e to x
    717 */
    718
    719floatx80 floatx80_etox(floatx80 a, float_status *status)
    720{
    721    bool aSign;
    722    int32_t aExp;
    723    uint64_t aSig;
    724
    725    FloatRoundMode user_rnd_mode;
    726    FloatX80RoundPrec user_rnd_prec;
    727
    728    int32_t compact, n, j, k, m, m1;
    729    floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale;
    730    bool adjflag;
    731
    732    aSig = extractFloatx80Frac(a);
    733    aExp = extractFloatx80Exp(a);
    734    aSign = extractFloatx80Sign(a);
    735
    736    if (aExp == 0x7FFF) {
    737        if ((uint64_t) (aSig << 1)) {
    738            return propagateFloatx80NaNOneArg(a, status);
    739        }
    740        if (aSign) {
    741            return packFloatx80(0, 0, 0);
    742        }
    743        return packFloatx80(0, floatx80_infinity.high,
    744                            floatx80_infinity.low);
    745    }
    746
    747    if (aExp == 0 && aSig == 0) {
    748        return packFloatx80(0, one_exp, one_sig);
    749    }
    750
    751    user_rnd_mode = status->float_rounding_mode;
    752    user_rnd_prec = status->floatx80_rounding_precision;
    753    status->float_rounding_mode = float_round_nearest_even;
    754    status->floatx80_rounding_precision = floatx80_precision_x;
    755
    756    adjflag = 0;
    757
    758    if (aExp >= 0x3FBE) { /* |X| >= 2^(-65) */
    759        compact = floatx80_make_compact(aExp, aSig);
    760
    761        if (compact < 0x400CB167) { /* |X| < 16380 log2 */
    762            fp0 = a;
    763            fp1 = a;
    764            fp0 = floatx80_mul(fp0, float32_to_floatx80(
    765                               make_float32(0x42B8AA3B), status),
    766                               status); /* 64/log2 * X */
    767            adjflag = 0;
    768            n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */
    769            fp0 = int32_to_floatx80(n, status);
    770
    771            j = n & 0x3F; /* J = N mod 64 */
    772            m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
    773            if (n < 0 && j) {
    774                /*
    775                 * arithmetic right shift is division and
    776                 * round towards minus infinity
    777                 */
    778                m--;
    779            }
    780            m += 0x3FFF; /* biased exponent of 2^(M) */
    781
    782        expcont1:
    783            fp2 = fp0; /* N */
    784            fp0 = floatx80_mul(fp0, float32_to_floatx80(
    785                               make_float32(0xBC317218), status),
    786                               status); /* N * L1, L1 = lead(-log2/64) */
    787            l2 = packFloatx80(0, 0x3FDC, UINT64_C(0x82E308654361C4C6));
    788            fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */
    789            fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */
    790            fp0 = floatx80_add(fp0, fp2, status); /* R */
    791
    792            fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
    793            fp2 = float32_to_floatx80(make_float32(0x3AB60B70),
    794                                      status); /* A5 */
    795            fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A5 */
    796            fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3C088895),
    797                               status), fp1,
    798                               status); /* fp3 is S*A4 */
    799            fp2 = floatx80_add(fp2, float64_to_floatx80(make_float64(
    800                               0x3FA5555555554431), status),
    801                               status); /* fp2 is A3+S*A5 */
    802            fp3 = floatx80_add(fp3, float64_to_floatx80(make_float64(
    803                               0x3FC5555555554018), status),
    804                               status); /* fp3 is A2+S*A4 */
    805            fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*(A3+S*A5) */
    806            fp3 = floatx80_mul(fp3, fp1, status); /* fp3 is S*(A2+S*A4) */
    807            fp2 = floatx80_add(fp2, float32_to_floatx80(
    808                               make_float32(0x3F000000), status),
    809                               status); /* fp2 is A1+S*(A3+S*A5) */
    810            fp3 = floatx80_mul(fp3, fp0, status); /* fp3 IS R*S*(A2+S*A4) */
    811            fp2 = floatx80_mul(fp2, fp1,
    812                               status); /* fp2 IS S*(A1+S*(A3+S*A5)) */
    813            fp0 = floatx80_add(fp0, fp3, status); /* fp0 IS R+R*S*(A2+S*A4) */
    814            fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */
    815
    816            fp1 = exp_tbl[j];
    817            fp0 = floatx80_mul(fp0, fp1, status); /* 2^(J/64)*(Exp(R)-1) */
    818            fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status),
    819                               status); /* accurate 2^(J/64) */
    820            fp0 = floatx80_add(fp0, fp1,
    821                               status); /* 2^(J/64) + 2^(J/64)*(Exp(R)-1) */
    822
    823            scale = packFloatx80(0, m, one_sig);
    824            if (adjflag) {
    825                adjscale = packFloatx80(0, m1, one_sig);
    826                fp0 = floatx80_mul(fp0, adjscale, status);
    827            }
    828
    829            status->float_rounding_mode = user_rnd_mode;
    830            status->floatx80_rounding_precision = user_rnd_prec;
    831
    832            a = floatx80_mul(fp0, scale, status);
    833
    834            float_raise(float_flag_inexact, status);
    835
    836            return a;
    837        } else { /* |X| >= 16380 log2 */
    838            if (compact > 0x400CB27C) { /* |X| >= 16480 log2 */
    839                status->float_rounding_mode = user_rnd_mode;
    840                status->floatx80_rounding_precision = user_rnd_prec;
    841                if (aSign) {
    842                    a = roundAndPackFloatx80(
    843                                           status->floatx80_rounding_precision,
    844                                           0, -0x1000, aSig, 0, status);
    845                } else {
    846                    a = roundAndPackFloatx80(
    847                                           status->floatx80_rounding_precision,
    848                                           0, 0x8000, aSig, 0, status);
    849                }
    850                float_raise(float_flag_inexact, status);
    851
    852                return a;
    853            } else {
    854                fp0 = a;
    855                fp1 = a;
    856                fp0 = floatx80_mul(fp0, float32_to_floatx80(
    857                                   make_float32(0x42B8AA3B), status),
    858                                   status); /* 64/log2 * X */
    859                adjflag = 1;
    860                n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */
    861                fp0 = int32_to_floatx80(n, status);
    862
    863                j = n & 0x3F; /* J = N mod 64 */
    864                /* NOTE: this is really arithmetic right shift by 6 */
    865                k = n / 64;
    866                if (n < 0 && j) {
    867                    /* arithmetic right shift is division and
    868                     * round towards minus infinity
    869                     */
    870                    k--;
    871                }
    872                /* NOTE: this is really arithmetic right shift by 1 */
    873                m1 = k / 2;
    874                if (k < 0 && (k & 1)) {
    875                    /* arithmetic right shift is division and
    876                     * round towards minus infinity
    877                     */
    878                    m1--;
    879                }
    880                m = k - m1;
    881                m1 += 0x3FFF; /* biased exponent of 2^(M1) */
    882                m += 0x3FFF; /* biased exponent of 2^(M) */
    883
    884                goto expcont1;
    885            }
    886        }
    887    } else { /* |X| < 2^(-65) */
    888        status->float_rounding_mode = user_rnd_mode;
    889        status->floatx80_rounding_precision = user_rnd_prec;
    890
    891        a = floatx80_add(a, float32_to_floatx80(make_float32(0x3F800000),
    892                         status), status); /* 1 + X */
    893
    894        float_raise(float_flag_inexact, status);
    895
    896        return a;
    897    }
    898}
    899
    900/*
    901 * 2 to x
    902 */
    903
    904floatx80 floatx80_twotox(floatx80 a, float_status *status)
    905{
    906    bool aSign;
    907    int32_t aExp;
    908    uint64_t aSig;
    909
    910    FloatRoundMode user_rnd_mode;
    911    FloatX80RoundPrec user_rnd_prec;
    912
    913    int32_t compact, n, j, l, m, m1;
    914    floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2;
    915
    916    aSig = extractFloatx80Frac(a);
    917    aExp = extractFloatx80Exp(a);
    918    aSign = extractFloatx80Sign(a);
    919
    920    if (aExp == 0x7FFF) {
    921        if ((uint64_t) (aSig << 1)) {
    922            return propagateFloatx80NaNOneArg(a, status);
    923        }
    924        if (aSign) {
    925            return packFloatx80(0, 0, 0);
    926        }
    927        return packFloatx80(0, floatx80_infinity.high,
    928                            floatx80_infinity.low);
    929    }
    930
    931    if (aExp == 0 && aSig == 0) {
    932        return packFloatx80(0, one_exp, one_sig);
    933    }
    934
    935    user_rnd_mode = status->float_rounding_mode;
    936    user_rnd_prec = status->floatx80_rounding_precision;
    937    status->float_rounding_mode = float_round_nearest_even;
    938    status->floatx80_rounding_precision = floatx80_precision_x;
    939
    940    fp0 = a;
    941
    942    compact = floatx80_make_compact(aExp, aSig);
    943
    944    if (compact < 0x3FB98000 || compact > 0x400D80C0) {
    945        /* |X| > 16480 or |X| < 2^(-70) */
    946        if (compact > 0x3FFF8000) { /* |X| > 16480 */
    947            status->float_rounding_mode = user_rnd_mode;
    948            status->floatx80_rounding_precision = user_rnd_prec;
    949
    950            if (aSign) {
    951                return roundAndPackFloatx80(status->floatx80_rounding_precision,
    952                                            0, -0x1000, aSig, 0, status);
    953            } else {
    954                return roundAndPackFloatx80(status->floatx80_rounding_precision,
    955                                            0, 0x8000, aSig, 0, status);
    956            }
    957        } else { /* |X| < 2^(-70) */
    958            status->float_rounding_mode = user_rnd_mode;
    959            status->floatx80_rounding_precision = user_rnd_prec;
    960
    961            a = floatx80_add(fp0, float32_to_floatx80(
    962                             make_float32(0x3F800000), status),
    963                             status); /* 1 + X */
    964
    965            float_raise(float_flag_inexact, status);
    966
    967            return a;
    968        }
    969    } else { /* 2^(-70) <= |X| <= 16480 */
    970        fp1 = fp0; /* X */
    971        fp1 = floatx80_mul(fp1, float32_to_floatx80(
    972                           make_float32(0x42800000), status),
    973                           status); /* X * 64 */
    974        n = floatx80_to_int32(fp1, status);
    975        fp1 = int32_to_floatx80(n, status);
    976        j = n & 0x3F;
    977        l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
    978        if (n < 0 && j) {
    979            /*
    980             * arithmetic right shift is division and
    981             * round towards minus infinity
    982             */
    983            l--;
    984        }
    985        m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */
    986        if (l < 0 && (l & 1)) {
    987            /*
    988             * arithmetic right shift is division and
    989             * round towards minus infinity
    990             */
    991            m--;
    992        }
    993        m1 = l - m;
    994        m1 += 0x3FFF; /* ADJFACT IS 2^(M') */
    995
    996        adjfact = packFloatx80(0, m1, one_sig);
    997        fact1 = exp2_tbl[j];
    998        fact1.high += m;
    999        fact2.high = exp2_tbl2[j] >> 16;
   1000        fact2.high += m;
   1001        fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF);
   1002        fact2.low <<= 48;
   1003
   1004        fp1 = floatx80_mul(fp1, float32_to_floatx80(
   1005                           make_float32(0x3C800000), status),
   1006                           status); /* (1/64)*N */
   1007        fp0 = floatx80_sub(fp0, fp1, status); /* X - (1/64)*INT(64 X) */
   1008        fp2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC)); /* LOG2 */
   1009        fp0 = floatx80_mul(fp0, fp2, status); /* R */
   1010
   1011        /* EXPR */
   1012        fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
   1013        fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2),
   1014                                  status); /* A5 */
   1015        fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C),
   1016                                  status); /* A4 */
   1017        fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */
   1018        fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */
   1019        fp2 = floatx80_add(fp2, float64_to_floatx80(
   1020                           make_float64(0x3FA5555555554CC1), status),
   1021                           status); /* A3+S*A5 */
   1022        fp3 = floatx80_add(fp3, float64_to_floatx80(
   1023                           make_float64(0x3FC5555555554A54), status),
   1024                           status); /* A2+S*A4 */
   1025        fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */
   1026        fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */
   1027        fp2 = floatx80_add(fp2, float64_to_floatx80(
   1028                           make_float64(0x3FE0000000000000), status),
   1029                           status); /* A1+S*(A3+S*A5) */
   1030        fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */
   1031
   1032        fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */
   1033        fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */
   1034        fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */
   1035
   1036        fp0 = floatx80_mul(fp0, fact1, status);
   1037        fp0 = floatx80_add(fp0, fact2, status);
   1038        fp0 = floatx80_add(fp0, fact1, status);
   1039
   1040        status->float_rounding_mode = user_rnd_mode;
   1041        status->floatx80_rounding_precision = user_rnd_prec;
   1042
   1043        a = floatx80_mul(fp0, adjfact, status);
   1044
   1045        float_raise(float_flag_inexact, status);
   1046
   1047        return a;
   1048    }
   1049}
   1050
   1051/*
   1052 * 10 to x
   1053 */
   1054
   1055floatx80 floatx80_tentox(floatx80 a, float_status *status)
   1056{
   1057    bool aSign;
   1058    int32_t aExp;
   1059    uint64_t aSig;
   1060
   1061    FloatRoundMode user_rnd_mode;
   1062    FloatX80RoundPrec user_rnd_prec;
   1063
   1064    int32_t compact, n, j, l, m, m1;
   1065    floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2;
   1066
   1067    aSig = extractFloatx80Frac(a);
   1068    aExp = extractFloatx80Exp(a);
   1069    aSign = extractFloatx80Sign(a);
   1070
   1071    if (aExp == 0x7FFF) {
   1072        if ((uint64_t) (aSig << 1)) {
   1073            return propagateFloatx80NaNOneArg(a, status);
   1074        }
   1075        if (aSign) {
   1076            return packFloatx80(0, 0, 0);
   1077        }
   1078        return packFloatx80(0, floatx80_infinity.high,
   1079                            floatx80_infinity.low);
   1080    }
   1081
   1082    if (aExp == 0 && aSig == 0) {
   1083        return packFloatx80(0, one_exp, one_sig);
   1084    }
   1085
   1086    user_rnd_mode = status->float_rounding_mode;
   1087    user_rnd_prec = status->floatx80_rounding_precision;
   1088    status->float_rounding_mode = float_round_nearest_even;
   1089    status->floatx80_rounding_precision = floatx80_precision_x;
   1090
   1091    fp0 = a;
   1092
   1093    compact = floatx80_make_compact(aExp, aSig);
   1094
   1095    if (compact < 0x3FB98000 || compact > 0x400B9B07) {
   1096        /* |X| > 16480 LOG2/LOG10 or |X| < 2^(-70) */
   1097        if (compact > 0x3FFF8000) { /* |X| > 16480 */
   1098            status->float_rounding_mode = user_rnd_mode;
   1099            status->floatx80_rounding_precision = user_rnd_prec;
   1100
   1101            if (aSign) {
   1102                return roundAndPackFloatx80(status->floatx80_rounding_precision,
   1103                                            0, -0x1000, aSig, 0, status);
   1104            } else {
   1105                return roundAndPackFloatx80(status->floatx80_rounding_precision,
   1106                                            0, 0x8000, aSig, 0, status);
   1107            }
   1108        } else { /* |X| < 2^(-70) */
   1109            status->float_rounding_mode = user_rnd_mode;
   1110            status->floatx80_rounding_precision = user_rnd_prec;
   1111
   1112            a = floatx80_add(fp0, float32_to_floatx80(
   1113                             make_float32(0x3F800000), status),
   1114                             status); /* 1 + X */
   1115
   1116            float_raise(float_flag_inexact, status);
   1117
   1118            return a;
   1119        }
   1120    } else { /* 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 */
   1121        fp1 = fp0; /* X */
   1122        fp1 = floatx80_mul(fp1, float64_to_floatx80(
   1123                           make_float64(0x406A934F0979A371),
   1124                           status), status); /* X*64*LOG10/LOG2 */
   1125        n = floatx80_to_int32(fp1, status); /* N=INT(X*64*LOG10/LOG2) */
   1126        fp1 = int32_to_floatx80(n, status);
   1127
   1128        j = n & 0x3F;
   1129        l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
   1130        if (n < 0 && j) {
   1131            /*
   1132             * arithmetic right shift is division and
   1133             * round towards minus infinity
   1134             */
   1135            l--;
   1136        }
   1137        m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */
   1138        if (l < 0 && (l & 1)) {
   1139            /*
   1140             * arithmetic right shift is division and
   1141             * round towards minus infinity
   1142             */
   1143            m--;
   1144        }
   1145        m1 = l - m;
   1146        m1 += 0x3FFF; /* ADJFACT IS 2^(M') */
   1147
   1148        adjfact = packFloatx80(0, m1, one_sig);
   1149        fact1 = exp2_tbl[j];
   1150        fact1.high += m;
   1151        fact2.high = exp2_tbl2[j] >> 16;
   1152        fact2.high += m;
   1153        fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF);
   1154        fact2.low <<= 48;
   1155
   1156        fp2 = fp1; /* N */
   1157        fp1 = floatx80_mul(fp1, float64_to_floatx80(
   1158                           make_float64(0x3F734413509F8000), status),
   1159                           status); /* N*(LOG2/64LOG10)_LEAD */
   1160        fp3 = packFloatx80(1, 0x3FCD, UINT64_C(0xC0219DC1DA994FD2));
   1161        fp2 = floatx80_mul(fp2, fp3, status); /* N*(LOG2/64LOG10)_TRAIL */
   1162        fp0 = floatx80_sub(fp0, fp1, status); /* X - N L_LEAD */
   1163        fp0 = floatx80_sub(fp0, fp2, status); /* X - N L_TRAIL */
   1164        fp2 = packFloatx80(0, 0x4000, UINT64_C(0x935D8DDDAAA8AC17)); /* LOG10 */
   1165        fp0 = floatx80_mul(fp0, fp2, status); /* R */
   1166
   1167        /* EXPR */
   1168        fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
   1169        fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2),
   1170                                  status); /* A5 */
   1171        fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C),
   1172                                  status); /* A4 */
   1173        fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */
   1174        fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */
   1175        fp2 = floatx80_add(fp2, float64_to_floatx80(
   1176                           make_float64(0x3FA5555555554CC1), status),
   1177                           status); /* A3+S*A5 */
   1178        fp3 = floatx80_add(fp3, float64_to_floatx80(
   1179                           make_float64(0x3FC5555555554A54), status),
   1180                           status); /* A2+S*A4 */
   1181        fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */
   1182        fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */
   1183        fp2 = floatx80_add(fp2, float64_to_floatx80(
   1184                           make_float64(0x3FE0000000000000), status),
   1185                           status); /* A1+S*(A3+S*A5) */
   1186        fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */
   1187
   1188        fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */
   1189        fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */
   1190        fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */
   1191
   1192        fp0 = floatx80_mul(fp0, fact1, status);
   1193        fp0 = floatx80_add(fp0, fact2, status);
   1194        fp0 = floatx80_add(fp0, fact1, status);
   1195
   1196        status->float_rounding_mode = user_rnd_mode;
   1197        status->floatx80_rounding_precision = user_rnd_prec;
   1198
   1199        a = floatx80_mul(fp0, adjfact, status);
   1200
   1201        float_raise(float_flag_inexact, status);
   1202
   1203        return a;
   1204    }
   1205}
   1206
   1207/*
   1208 * Tangent
   1209 */
   1210
   1211floatx80 floatx80_tan(floatx80 a, float_status *status)
   1212{
   1213    bool aSign, xSign;
   1214    int32_t aExp, xExp;
   1215    uint64_t aSig, xSig;
   1216
   1217    FloatRoundMode user_rnd_mode;
   1218    FloatX80RoundPrec user_rnd_prec;
   1219
   1220    int32_t compact, l, n, j;
   1221    floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2;
   1222    float32 twoto63;
   1223    bool endflag;
   1224
   1225    aSig = extractFloatx80Frac(a);
   1226    aExp = extractFloatx80Exp(a);
   1227    aSign = extractFloatx80Sign(a);
   1228
   1229    if (aExp == 0x7FFF) {
   1230        if ((uint64_t) (aSig << 1)) {
   1231            return propagateFloatx80NaNOneArg(a, status);
   1232        }
   1233        float_raise(float_flag_invalid, status);
   1234        return floatx80_default_nan(status);
   1235    }
   1236
   1237    if (aExp == 0 && aSig == 0) {
   1238        return packFloatx80(aSign, 0, 0);
   1239    }
   1240
   1241    user_rnd_mode = status->float_rounding_mode;
   1242    user_rnd_prec = status->floatx80_rounding_precision;
   1243    status->float_rounding_mode = float_round_nearest_even;
   1244    status->floatx80_rounding_precision = floatx80_precision_x;
   1245
   1246    compact = floatx80_make_compact(aExp, aSig);
   1247
   1248    fp0 = a;
   1249
   1250    if (compact < 0x3FD78000 || compact > 0x4004BC7E) {
   1251        /* 2^(-40) > |X| > 15 PI */
   1252        if (compact > 0x3FFF8000) { /* |X| >= 15 PI */
   1253            /* REDUCEX */
   1254            fp1 = packFloatx80(0, 0, 0);
   1255            if (compact == 0x7FFEFFFF) {
   1256                twopi1 = packFloatx80(aSign ^ 1, 0x7FFE,
   1257                                      UINT64_C(0xC90FDAA200000000));
   1258                twopi2 = packFloatx80(aSign ^ 1, 0x7FDC,
   1259                                      UINT64_C(0x85A308D300000000));
   1260                fp0 = floatx80_add(fp0, twopi1, status);
   1261                fp1 = fp0;
   1262                fp0 = floatx80_add(fp0, twopi2, status);
   1263                fp1 = floatx80_sub(fp1, fp0, status);
   1264                fp1 = floatx80_add(fp1, twopi2, status);
   1265            }
   1266        loop:
   1267            xSign = extractFloatx80Sign(fp0);
   1268            xExp = extractFloatx80Exp(fp0);
   1269            xExp -= 0x3FFF;
   1270            if (xExp <= 28) {
   1271                l = 0;
   1272                endflag = true;
   1273            } else {
   1274                l = xExp - 27;
   1275                endflag = false;
   1276            }
   1277            invtwopi = packFloatx80(0, 0x3FFE - l,
   1278                                    UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */
   1279            twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000));
   1280            twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000));
   1281
   1282            /* SIGN(INARG)*2^63 IN SGL */
   1283            twoto63 = packFloat32(xSign, 0xBE, 0);
   1284
   1285            fp2 = floatx80_mul(fp0, invtwopi, status);
   1286            fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status),
   1287                               status); /* THE FRACT PART OF FP2 IS ROUNDED */
   1288            fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status),
   1289                               status); /* FP2 is N */
   1290            fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */
   1291            fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */
   1292            fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */
   1293            fp4 = floatx80_sub(fp4, fp3, status); /* W-P */
   1294            fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */
   1295            fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */
   1296            fp3 = fp0; /* FP3 is A */
   1297            fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
   1298            fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
   1299
   1300            if (endflag) {
   1301                n = floatx80_to_int32(fp2, status);
   1302                goto tancont;
   1303            }
   1304            fp3 = floatx80_sub(fp3, fp0, status); /* A-R */
   1305            fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */
   1306            goto loop;
   1307        } else {
   1308            status->float_rounding_mode = user_rnd_mode;
   1309            status->floatx80_rounding_precision = user_rnd_prec;
   1310
   1311            a = floatx80_move(a, status);
   1312
   1313            float_raise(float_flag_inexact, status);
   1314
   1315            return a;
   1316        }
   1317    } else {
   1318        fp1 = floatx80_mul(fp0, float64_to_floatx80(
   1319                           make_float64(0x3FE45F306DC9C883), status),
   1320                           status); /* X*2/PI */
   1321
   1322        n = floatx80_to_int32(fp1, status);
   1323        j = 32 + n;
   1324
   1325        fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */
   1326        fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status),
   1327                           status); /* FP0 IS R = (X-Y1)-Y2 */
   1328
   1329    tancont:
   1330        if (n & 1) {
   1331            /* NODD */
   1332            fp1 = fp0; /* R */
   1333            fp0 = floatx80_mul(fp0, fp0, status); /* S = R*R */
   1334            fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688),
   1335                                      status); /* Q4 */
   1336            fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04),
   1337                                      status); /* P3 */
   1338            fp3 = floatx80_mul(fp3, fp0, status); /* SQ4 */
   1339            fp2 = floatx80_mul(fp2, fp0, status); /* SP3 */
   1340            fp3 = floatx80_add(fp3, float64_to_floatx80(
   1341                               make_float64(0xBF346F59B39BA65F), status),
   1342                               status); /* Q3+SQ4 */
   1343            fp4 = packFloatx80(0, 0x3FF6, UINT64_C(0xE073D3FC199C4A00));
   1344            fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */
   1345            fp3 = floatx80_mul(fp3, fp0, status); /* S(Q3+SQ4) */
   1346            fp2 = floatx80_mul(fp2, fp0, status); /* S(P2+SP3) */
   1347            fp4 = packFloatx80(0, 0x3FF9, UINT64_C(0xD23CD68415D95FA1));
   1348            fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */
   1349            fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0x8895A6C5FB423BCA));
   1350            fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */
   1351            fp3 = floatx80_mul(fp3, fp0, status); /* S(Q2+S(Q3+SQ4)) */
   1352            fp2 = floatx80_mul(fp2, fp0, status); /* S(P1+S(P2+SP3)) */
   1353            fp4 = packFloatx80(1, 0x3FFD, UINT64_C(0xEEF57E0DA84BC8CE));
   1354            fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */
   1355            fp2 = floatx80_mul(fp2, fp1, status); /* RS(P1+S(P2+SP3)) */
   1356            fp0 = floatx80_mul(fp0, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */
   1357            fp1 = floatx80_add(fp1, fp2, status); /* R+RS(P1+S(P2+SP3)) */
   1358            fp0 = floatx80_add(fp0, float32_to_floatx80(
   1359                               make_float32(0x3F800000), status),
   1360                               status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */
   1361
   1362            xSign = extractFloatx80Sign(fp1);
   1363            xExp = extractFloatx80Exp(fp1);
   1364            xSig = extractFloatx80Frac(fp1);
   1365            xSign ^= 1;
   1366            fp1 = packFloatx80(xSign, xExp, xSig);
   1367
   1368            status->float_rounding_mode = user_rnd_mode;
   1369            status->floatx80_rounding_precision = user_rnd_prec;
   1370
   1371            a = floatx80_div(fp0, fp1, status);
   1372
   1373            float_raise(float_flag_inexact, status);
   1374
   1375            return a;
   1376        } else {
   1377            fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
   1378            fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688),
   1379                                      status); /* Q4 */
   1380            fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04),
   1381                                      status); /* P3 */
   1382            fp3 = floatx80_mul(fp3, fp1, status); /* SQ4 */
   1383            fp2 = floatx80_mul(fp2, fp1, status); /* SP3 */
   1384            fp3 = floatx80_add(fp3, float64_to_floatx80(
   1385                               make_float64(0xBF346F59B39BA65F), status),
   1386                               status); /* Q3+SQ4 */
   1387            fp4 = packFloatx80(0, 0x3FF6, UINT64_C(0xE073D3FC199C4A00));
   1388            fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */
   1389            fp3 = floatx80_mul(fp3, fp1, status); /* S(Q3+SQ4) */
   1390            fp2 = floatx80_mul(fp2, fp1, status); /* S(P2+SP3) */
   1391            fp4 = packFloatx80(0, 0x3FF9, UINT64_C(0xD23CD68415D95FA1));
   1392            fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */
   1393            fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0x8895A6C5FB423BCA));
   1394            fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */
   1395            fp3 = floatx80_mul(fp3, fp1, status); /* S(Q2+S(Q3+SQ4)) */
   1396            fp2 = floatx80_mul(fp2, fp1, status); /* S(P1+S(P2+SP3)) */
   1397            fp4 = packFloatx80(1, 0x3FFD, UINT64_C(0xEEF57E0DA84BC8CE));
   1398            fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */
   1399            fp2 = floatx80_mul(fp2, fp0, status); /* RS(P1+S(P2+SP3)) */
   1400            fp1 = floatx80_mul(fp1, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */
   1401            fp0 = floatx80_add(fp0, fp2, status); /* R+RS(P1+S(P2+SP3)) */
   1402            fp1 = floatx80_add(fp1, float32_to_floatx80(
   1403                               make_float32(0x3F800000), status),
   1404                               status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */
   1405
   1406            status->float_rounding_mode = user_rnd_mode;
   1407            status->floatx80_rounding_precision = user_rnd_prec;
   1408
   1409            a = floatx80_div(fp0, fp1, status);
   1410
   1411            float_raise(float_flag_inexact, status);
   1412
   1413            return a;
   1414        }
   1415    }
   1416}
   1417
   1418/*
   1419 * Sine
   1420 */
   1421
   1422floatx80 floatx80_sin(floatx80 a, float_status *status)
   1423{
   1424    bool aSign, xSign;
   1425    int32_t aExp, xExp;
   1426    uint64_t aSig, xSig;
   1427
   1428    FloatRoundMode user_rnd_mode;
   1429    FloatX80RoundPrec user_rnd_prec;
   1430
   1431    int32_t compact, l, n, j;
   1432    floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2;
   1433    float32 posneg1, twoto63;
   1434    bool endflag;
   1435
   1436    aSig = extractFloatx80Frac(a);
   1437    aExp = extractFloatx80Exp(a);
   1438    aSign = extractFloatx80Sign(a);
   1439
   1440    if (aExp == 0x7FFF) {
   1441        if ((uint64_t) (aSig << 1)) {
   1442            return propagateFloatx80NaNOneArg(a, status);
   1443        }
   1444        float_raise(float_flag_invalid, status);
   1445        return floatx80_default_nan(status);
   1446    }
   1447
   1448    if (aExp == 0 && aSig == 0) {
   1449        return packFloatx80(aSign, 0, 0);
   1450    }
   1451
   1452    user_rnd_mode = status->float_rounding_mode;
   1453    user_rnd_prec = status->floatx80_rounding_precision;
   1454    status->float_rounding_mode = float_round_nearest_even;
   1455    status->floatx80_rounding_precision = floatx80_precision_x;
   1456
   1457    compact = floatx80_make_compact(aExp, aSig);
   1458
   1459    fp0 = a;
   1460
   1461    if (compact < 0x3FD78000 || compact > 0x4004BC7E) {
   1462        /* 2^(-40) > |X| > 15 PI */
   1463        if (compact > 0x3FFF8000) { /* |X| >= 15 PI */
   1464            /* REDUCEX */
   1465            fp1 = packFloatx80(0, 0, 0);
   1466            if (compact == 0x7FFEFFFF) {
   1467                twopi1 = packFloatx80(aSign ^ 1, 0x7FFE,
   1468                                      UINT64_C(0xC90FDAA200000000));
   1469                twopi2 = packFloatx80(aSign ^ 1, 0x7FDC,
   1470                                      UINT64_C(0x85A308D300000000));
   1471                fp0 = floatx80_add(fp0, twopi1, status);
   1472                fp1 = fp0;
   1473                fp0 = floatx80_add(fp0, twopi2, status);
   1474                fp1 = floatx80_sub(fp1, fp0, status);
   1475                fp1 = floatx80_add(fp1, twopi2, status);
   1476            }
   1477        loop:
   1478            xSign = extractFloatx80Sign(fp0);
   1479            xExp = extractFloatx80Exp(fp0);
   1480            xExp -= 0x3FFF;
   1481            if (xExp <= 28) {
   1482                l = 0;
   1483                endflag = true;
   1484            } else {
   1485                l = xExp - 27;
   1486                endflag = false;
   1487            }
   1488            invtwopi = packFloatx80(0, 0x3FFE - l,
   1489                                    UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */
   1490            twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000));
   1491            twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000));
   1492
   1493            /* SIGN(INARG)*2^63 IN SGL */
   1494            twoto63 = packFloat32(xSign, 0xBE, 0);
   1495
   1496            fp2 = floatx80_mul(fp0, invtwopi, status);
   1497            fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status),
   1498                               status); /* THE FRACT PART OF FP2 IS ROUNDED */
   1499            fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status),
   1500                               status); /* FP2 is N */
   1501            fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */
   1502            fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */
   1503            fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */
   1504            fp4 = floatx80_sub(fp4, fp3, status); /* W-P */
   1505            fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */
   1506            fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */
   1507            fp3 = fp0; /* FP3 is A */
   1508            fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
   1509            fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
   1510
   1511            if (endflag) {
   1512                n = floatx80_to_int32(fp2, status);
   1513                goto sincont;
   1514            }
   1515            fp3 = floatx80_sub(fp3, fp0, status); /* A-R */
   1516            fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */
   1517            goto loop;
   1518        } else {
   1519            /* SINSM */
   1520            fp0 = float32_to_floatx80(make_float32(0x3F800000),
   1521                                      status); /* 1 */
   1522
   1523            status->float_rounding_mode = user_rnd_mode;
   1524            status->floatx80_rounding_precision = user_rnd_prec;
   1525
   1526            /* SINTINY */
   1527            a = floatx80_move(a, status);
   1528            float_raise(float_flag_inexact, status);
   1529
   1530            return a;
   1531        }
   1532    } else {
   1533        fp1 = floatx80_mul(fp0, float64_to_floatx80(
   1534                           make_float64(0x3FE45F306DC9C883), status),
   1535                           status); /* X*2/PI */
   1536
   1537        n = floatx80_to_int32(fp1, status);
   1538        j = 32 + n;
   1539
   1540        fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */
   1541        fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status),
   1542                           status); /* FP0 IS R = (X-Y1)-Y2 */
   1543
   1544    sincont:
   1545        if (n & 1) {
   1546            /* COSPOLY */
   1547            fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
   1548            fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
   1549            fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3),
   1550                                      status); /* B8 */
   1551            fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19),
   1552                                      status); /* B7 */
   1553
   1554            xSign = extractFloatx80Sign(fp0); /* X IS S */
   1555            xExp = extractFloatx80Exp(fp0);
   1556            xSig = extractFloatx80Frac(fp0);
   1557
   1558            if ((n >> 1) & 1) {
   1559                xSign ^= 1;
   1560                posneg1 = make_float32(0xBF800000); /* -1 */
   1561            } else {
   1562                xSign ^= 0;
   1563                posneg1 = make_float32(0x3F800000); /* 1 */
   1564            } /* X IS NOW R'= SGN*R */
   1565
   1566            fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */
   1567            fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */
   1568            fp2 = floatx80_add(fp2, float64_to_floatx80(
   1569                               make_float64(0x3E21EED90612C972), status),
   1570                               status); /* B6+TB8 */
   1571            fp3 = floatx80_add(fp3, float64_to_floatx80(
   1572                               make_float64(0xBE927E4FB79D9FCF), status),
   1573                               status); /* B5+TB7 */
   1574            fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */
   1575            fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */
   1576            fp2 = floatx80_add(fp2, float64_to_floatx80(
   1577                               make_float64(0x3EFA01A01A01D423), status),
   1578                               status); /* B4+T(B6+TB8) */
   1579            fp4 = packFloatx80(1, 0x3FF5, UINT64_C(0xB60B60B60B61D438));
   1580            fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */
   1581            fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */
   1582            fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */
   1583            fp4 = packFloatx80(0, 0x3FFA, UINT64_C(0xAAAAAAAAAAAAAB5E));
   1584            fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */
   1585            fp1 = floatx80_add(fp1, float32_to_floatx80(
   1586                               make_float32(0xBF000000), status),
   1587                               status); /* B1+T(B3+T(B5+TB7)) */
   1588            fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */
   1589            fp0 = floatx80_add(fp0, fp1, status); /* [B1+T(B3+T(B5+TB7))]+
   1590                                                   * [S(B2+T(B4+T(B6+TB8)))]
   1591                                                   */
   1592
   1593            x = packFloatx80(xSign, xExp, xSig);
   1594            fp0 = floatx80_mul(fp0, x, status);
   1595
   1596            status->float_rounding_mode = user_rnd_mode;
   1597            status->floatx80_rounding_precision = user_rnd_prec;
   1598
   1599            a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status);
   1600
   1601            float_raise(float_flag_inexact, status);
   1602
   1603            return a;
   1604        } else {
   1605            /* SINPOLY */
   1606            xSign = extractFloatx80Sign(fp0); /* X IS R */
   1607            xExp = extractFloatx80Exp(fp0);
   1608            xSig = extractFloatx80Frac(fp0);
   1609
   1610            xSign ^= (n >> 1) & 1; /* X IS NOW R'= SGN*R */
   1611
   1612            fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
   1613            fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
   1614            fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5),
   1615                                      status); /* A7 */
   1616            fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1),
   1617                                      status); /* A6 */
   1618            fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */
   1619            fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */
   1620            fp3 = floatx80_add(fp3, float64_to_floatx80(
   1621                               make_float64(0xBE5AE6452A118AE4), status),
   1622                               status); /* A5+T*A7 */
   1623            fp2 = floatx80_add(fp2, float64_to_floatx80(
   1624                               make_float64(0x3EC71DE3A5341531), status),
   1625                               status); /* A4+T*A6 */
   1626            fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */
   1627            fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */
   1628            fp3 = floatx80_add(fp3, float64_to_floatx80(
   1629                               make_float64(0xBF2A01A01A018B59), status),
   1630                               status); /* A3+T(A5+TA7) */
   1631            fp4 = packFloatx80(0, 0x3FF8, UINT64_C(0x88888888888859AF));
   1632            fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */
   1633            fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */
   1634            fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */
   1635            fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAA99));
   1636            fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */
   1637            fp1 = floatx80_add(fp1, fp2,
   1638                               status); /* [A1+T(A3+T(A5+TA7))]+
   1639                                         * [S(A2+T(A4+TA6))]
   1640                                         */
   1641
   1642            x = packFloatx80(xSign, xExp, xSig);
   1643            fp0 = floatx80_mul(fp0, x, status); /* R'*S */
   1644            fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */
   1645
   1646            status->float_rounding_mode = user_rnd_mode;
   1647            status->floatx80_rounding_precision = user_rnd_prec;
   1648
   1649            a = floatx80_add(fp0, x, status);
   1650
   1651            float_raise(float_flag_inexact, status);
   1652
   1653            return a;
   1654        }
   1655    }
   1656}
   1657
   1658/*
   1659 * Cosine
   1660 */
   1661
   1662floatx80 floatx80_cos(floatx80 a, float_status *status)
   1663{
   1664    bool aSign, xSign;
   1665    int32_t aExp, xExp;
   1666    uint64_t aSig, xSig;
   1667
   1668    FloatRoundMode user_rnd_mode;
   1669    FloatX80RoundPrec user_rnd_prec;
   1670
   1671    int32_t compact, l, n, j;
   1672    floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2;
   1673    float32 posneg1, twoto63;
   1674    bool endflag;
   1675
   1676    aSig = extractFloatx80Frac(a);
   1677    aExp = extractFloatx80Exp(a);
   1678    aSign = extractFloatx80Sign(a);
   1679
   1680    if (aExp == 0x7FFF) {
   1681        if ((uint64_t) (aSig << 1)) {
   1682            return propagateFloatx80NaNOneArg(a, status);
   1683        }
   1684        float_raise(float_flag_invalid, status);
   1685        return floatx80_default_nan(status);
   1686    }
   1687
   1688    if (aExp == 0 && aSig == 0) {
   1689        return packFloatx80(0, one_exp, one_sig);
   1690    }
   1691
   1692    user_rnd_mode = status->float_rounding_mode;
   1693    user_rnd_prec = status->floatx80_rounding_precision;
   1694    status->float_rounding_mode = float_round_nearest_even;
   1695    status->floatx80_rounding_precision = floatx80_precision_x;
   1696
   1697    compact = floatx80_make_compact(aExp, aSig);
   1698
   1699    fp0 = a;
   1700
   1701    if (compact < 0x3FD78000 || compact > 0x4004BC7E) {
   1702        /* 2^(-40) > |X| > 15 PI */
   1703        if (compact > 0x3FFF8000) { /* |X| >= 15 PI */
   1704            /* REDUCEX */
   1705            fp1 = packFloatx80(0, 0, 0);
   1706            if (compact == 0x7FFEFFFF) {
   1707                twopi1 = packFloatx80(aSign ^ 1, 0x7FFE,
   1708                                      UINT64_C(0xC90FDAA200000000));
   1709                twopi2 = packFloatx80(aSign ^ 1, 0x7FDC,
   1710                                      UINT64_C(0x85A308D300000000));
   1711                fp0 = floatx80_add(fp0, twopi1, status);
   1712                fp1 = fp0;
   1713                fp0 = floatx80_add(fp0, twopi2, status);
   1714                fp1 = floatx80_sub(fp1, fp0, status);
   1715                fp1 = floatx80_add(fp1, twopi2, status);
   1716            }
   1717        loop:
   1718            xSign = extractFloatx80Sign(fp0);
   1719            xExp = extractFloatx80Exp(fp0);
   1720            xExp -= 0x3FFF;
   1721            if (xExp <= 28) {
   1722                l = 0;
   1723                endflag = true;
   1724            } else {
   1725                l = xExp - 27;
   1726                endflag = false;
   1727            }
   1728            invtwopi = packFloatx80(0, 0x3FFE - l,
   1729                                    UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */
   1730            twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000));
   1731            twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000));
   1732
   1733            /* SIGN(INARG)*2^63 IN SGL */
   1734            twoto63 = packFloat32(xSign, 0xBE, 0);
   1735
   1736            fp2 = floatx80_mul(fp0, invtwopi, status);
   1737            fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status),
   1738                               status); /* THE FRACT PART OF FP2 IS ROUNDED */
   1739            fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status),
   1740                               status); /* FP2 is N */
   1741            fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */
   1742            fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */
   1743            fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */
   1744            fp4 = floatx80_sub(fp4, fp3, status); /* W-P */
   1745            fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */
   1746            fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */
   1747            fp3 = fp0; /* FP3 is A */
   1748            fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
   1749            fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
   1750
   1751            if (endflag) {
   1752                n = floatx80_to_int32(fp2, status);
   1753                goto sincont;
   1754            }
   1755            fp3 = floatx80_sub(fp3, fp0, status); /* A-R */
   1756            fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */
   1757            goto loop;
   1758        } else {
   1759            /* SINSM */
   1760            fp0 = float32_to_floatx80(make_float32(0x3F800000), status); /* 1 */
   1761
   1762            status->float_rounding_mode = user_rnd_mode;
   1763            status->floatx80_rounding_precision = user_rnd_prec;
   1764
   1765            /* COSTINY */
   1766            a = floatx80_sub(fp0, float32_to_floatx80(
   1767                             make_float32(0x00800000), status),
   1768                             status);
   1769            float_raise(float_flag_inexact, status);
   1770
   1771            return a;
   1772        }
   1773    } else {
   1774        fp1 = floatx80_mul(fp0, float64_to_floatx80(
   1775                           make_float64(0x3FE45F306DC9C883), status),
   1776                           status); /* X*2/PI */
   1777
   1778        n = floatx80_to_int32(fp1, status);
   1779        j = 32 + n;
   1780
   1781        fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */
   1782        fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status),
   1783                           status); /* FP0 IS R = (X-Y1)-Y2 */
   1784
   1785    sincont:
   1786        if ((n + 1) & 1) {
   1787            /* COSPOLY */
   1788            fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
   1789            fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
   1790            fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3),
   1791                                      status); /* B8 */
   1792            fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19),
   1793                                      status); /* B7 */
   1794
   1795            xSign = extractFloatx80Sign(fp0); /* X IS S */
   1796            xExp = extractFloatx80Exp(fp0);
   1797            xSig = extractFloatx80Frac(fp0);
   1798
   1799            if (((n + 1) >> 1) & 1) {
   1800                xSign ^= 1;
   1801                posneg1 = make_float32(0xBF800000); /* -1 */
   1802            } else {
   1803                xSign ^= 0;
   1804                posneg1 = make_float32(0x3F800000); /* 1 */
   1805            } /* X IS NOW R'= SGN*R */
   1806
   1807            fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */
   1808            fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */
   1809            fp2 = floatx80_add(fp2, float64_to_floatx80(
   1810                               make_float64(0x3E21EED90612C972), status),
   1811                               status); /* B6+TB8 */
   1812            fp3 = floatx80_add(fp3, float64_to_floatx80(
   1813                               make_float64(0xBE927E4FB79D9FCF), status),
   1814                               status); /* B5+TB7 */
   1815            fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */
   1816            fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */
   1817            fp2 = floatx80_add(fp2, float64_to_floatx80(
   1818                               make_float64(0x3EFA01A01A01D423), status),
   1819                               status); /* B4+T(B6+TB8) */
   1820            fp4 = packFloatx80(1, 0x3FF5, UINT64_C(0xB60B60B60B61D438));
   1821            fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */
   1822            fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */
   1823            fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */
   1824            fp4 = packFloatx80(0, 0x3FFA, UINT64_C(0xAAAAAAAAAAAAAB5E));
   1825            fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */
   1826            fp1 = floatx80_add(fp1, float32_to_floatx80(
   1827                               make_float32(0xBF000000), status),
   1828                               status); /* B1+T(B3+T(B5+TB7)) */
   1829            fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */
   1830            fp0 = floatx80_add(fp0, fp1, status);
   1831                              /* [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] */
   1832
   1833            x = packFloatx80(xSign, xExp, xSig);
   1834            fp0 = floatx80_mul(fp0, x, status);
   1835
   1836            status->float_rounding_mode = user_rnd_mode;
   1837            status->floatx80_rounding_precision = user_rnd_prec;
   1838
   1839            a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status);
   1840
   1841            float_raise(float_flag_inexact, status);
   1842
   1843            return a;
   1844        } else {
   1845            /* SINPOLY */
   1846            xSign = extractFloatx80Sign(fp0); /* X IS R */
   1847            xExp = extractFloatx80Exp(fp0);
   1848            xSig = extractFloatx80Frac(fp0);
   1849
   1850            xSign ^= ((n + 1) >> 1) & 1; /* X IS NOW R'= SGN*R */
   1851
   1852            fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
   1853            fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
   1854            fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5),
   1855                                      status); /* A7 */
   1856            fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1),
   1857                                      status); /* A6 */
   1858            fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */
   1859            fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */
   1860            fp3 = floatx80_add(fp3, float64_to_floatx80(
   1861                               make_float64(0xBE5AE6452A118AE4), status),
   1862                               status); /* A5+T*A7 */
   1863            fp2 = floatx80_add(fp2, float64_to_floatx80(
   1864                               make_float64(0x3EC71DE3A5341531), status),
   1865                               status); /* A4+T*A6 */
   1866            fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */
   1867            fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */
   1868            fp3 = floatx80_add(fp3, float64_to_floatx80(
   1869                               make_float64(0xBF2A01A01A018B59), status),
   1870                               status); /* A3+T(A5+TA7) */
   1871            fp4 = packFloatx80(0, 0x3FF8, UINT64_C(0x88888888888859AF));
   1872            fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */
   1873            fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */
   1874            fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */
   1875            fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAA99));
   1876            fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */
   1877            fp1 = floatx80_add(fp1, fp2, status);
   1878                                    /* [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] */
   1879
   1880            x = packFloatx80(xSign, xExp, xSig);
   1881            fp0 = floatx80_mul(fp0, x, status); /* R'*S */
   1882            fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */
   1883
   1884            status->float_rounding_mode = user_rnd_mode;
   1885            status->floatx80_rounding_precision = user_rnd_prec;
   1886
   1887            a = floatx80_add(fp0, x, status);
   1888
   1889            float_raise(float_flag_inexact, status);
   1890
   1891            return a;
   1892        }
   1893    }
   1894}
   1895
   1896/*
   1897 * Arc tangent
   1898 */
   1899
   1900floatx80 floatx80_atan(floatx80 a, float_status *status)
   1901{
   1902    bool aSign;
   1903    int32_t aExp;
   1904    uint64_t aSig;
   1905
   1906    FloatRoundMode user_rnd_mode;
   1907    FloatX80RoundPrec user_rnd_prec;
   1908
   1909    int32_t compact, tbl_index;
   1910    floatx80 fp0, fp1, fp2, fp3, xsave;
   1911
   1912    aSig = extractFloatx80Frac(a);
   1913    aExp = extractFloatx80Exp(a);
   1914    aSign = extractFloatx80Sign(a);
   1915
   1916    if (aExp == 0x7FFF) {
   1917        if ((uint64_t) (aSig << 1)) {
   1918            return propagateFloatx80NaNOneArg(a, status);
   1919        }
   1920        a = packFloatx80(aSign, piby2_exp, pi_sig);
   1921        float_raise(float_flag_inexact, status);
   1922        return floatx80_move(a, status);
   1923    }
   1924
   1925    if (aExp == 0 && aSig == 0) {
   1926        return packFloatx80(aSign, 0, 0);
   1927    }
   1928
   1929    compact = floatx80_make_compact(aExp, aSig);
   1930
   1931    user_rnd_mode = status->float_rounding_mode;
   1932    user_rnd_prec = status->floatx80_rounding_precision;
   1933    status->float_rounding_mode = float_round_nearest_even;
   1934    status->floatx80_rounding_precision = floatx80_precision_x;
   1935
   1936    if (compact < 0x3FFB8000 || compact > 0x4002FFFF) {
   1937        /* |X| >= 16 or |X| < 1/16 */
   1938        if (compact > 0x3FFF8000) { /* |X| >= 16 */
   1939            if (compact > 0x40638000) { /* |X| > 2^(100) */
   1940                fp0 = packFloatx80(aSign, piby2_exp, pi_sig);
   1941                fp1 = packFloatx80(aSign, 0x0001, one_sig);
   1942
   1943                status->float_rounding_mode = user_rnd_mode;
   1944                status->floatx80_rounding_precision = user_rnd_prec;
   1945
   1946                a = floatx80_sub(fp0, fp1, status);
   1947
   1948                float_raise(float_flag_inexact, status);
   1949
   1950                return a;
   1951            } else {
   1952                fp0 = a;
   1953                fp1 = packFloatx80(1, one_exp, one_sig); /* -1 */
   1954                fp1 = floatx80_div(fp1, fp0, status); /* X' = -1/X */
   1955                xsave = fp1;
   1956                fp0 = floatx80_mul(fp1, fp1, status); /* Y = X'*X' */
   1957                fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */
   1958                fp3 = float64_to_floatx80(make_float64(0xBFB70BF398539E6A),
   1959                                          status); /* C5 */
   1960                fp2 = float64_to_floatx80(make_float64(0x3FBC7187962D1D7D),
   1961                                          status); /* C4 */
   1962                fp3 = floatx80_mul(fp3, fp1, status); /* Z*C5 */
   1963                fp2 = floatx80_mul(fp2, fp1, status); /* Z*C4 */
   1964                fp3 = floatx80_add(fp3, float64_to_floatx80(
   1965                                   make_float64(0xBFC24924827107B8), status),
   1966                                   status); /* C3+Z*C5 */
   1967                fp2 = floatx80_add(fp2, float64_to_floatx80(
   1968                                   make_float64(0x3FC999999996263E), status),
   1969                                   status); /* C2+Z*C4 */
   1970                fp1 = floatx80_mul(fp1, fp3, status); /* Z*(C3+Z*C5) */
   1971                fp2 = floatx80_mul(fp2, fp0, status); /* Y*(C2+Z*C4) */
   1972                fp1 = floatx80_add(fp1, float64_to_floatx80(
   1973                                   make_float64(0xBFD5555555555536), status),
   1974                                   status); /* C1+Z*(C3+Z*C5) */
   1975                fp0 = floatx80_mul(fp0, xsave, status); /* X'*Y */
   1976                /* [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] */
   1977                fp1 = floatx80_add(fp1, fp2, status);
   1978                /* X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ?? */
   1979                fp0 = floatx80_mul(fp0, fp1, status);
   1980                fp0 = floatx80_add(fp0, xsave, status);
   1981                fp1 = packFloatx80(aSign, piby2_exp, pi_sig);
   1982
   1983                status->float_rounding_mode = user_rnd_mode;
   1984                status->floatx80_rounding_precision = user_rnd_prec;
   1985
   1986                a = floatx80_add(fp0, fp1, status);
   1987
   1988                float_raise(float_flag_inexact, status);
   1989
   1990                return a;
   1991            }
   1992        } else { /* |X| < 1/16 */
   1993            if (compact < 0x3FD78000) { /* |X| < 2^(-40) */
   1994                status->float_rounding_mode = user_rnd_mode;
   1995                status->floatx80_rounding_precision = user_rnd_prec;
   1996
   1997                a = floatx80_move(a, status);
   1998
   1999                float_raise(float_flag_inexact, status);
   2000
   2001                return a;
   2002            } else {
   2003                fp0 = a;
   2004                xsave = a;
   2005                fp0 = floatx80_mul(fp0, fp0, status); /* Y = X*X */
   2006                fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */
   2007                fp2 = float64_to_floatx80(make_float64(0x3FB344447F876989),
   2008                                          status); /* B6 */
   2009                fp3 = float64_to_floatx80(make_float64(0xBFB744EE7FAF45DB),
   2010                                          status); /* B5 */
   2011                fp2 = floatx80_mul(fp2, fp1, status); /* Z*B6 */
   2012                fp3 = floatx80_mul(fp3, fp1, status); /* Z*B5 */
   2013                fp2 = floatx80_add(fp2, float64_to_floatx80(
   2014                                   make_float64(0x3FBC71C646940220), status),
   2015                                   status); /* B4+Z*B6 */
   2016                fp3 = floatx80_add(fp3, float64_to_floatx80(
   2017                                   make_float64(0xBFC24924921872F9),
   2018                                   status), status); /* B3+Z*B5 */
   2019                fp2 = floatx80_mul(fp2, fp1, status); /* Z*(B4+Z*B6) */
   2020                fp1 = floatx80_mul(fp1, fp3, status); /* Z*(B3+Z*B5) */
   2021                fp2 = floatx80_add(fp2, float64_to_floatx80(
   2022                                   make_float64(0x3FC9999999998FA9), status),
   2023                                   status); /* B2+Z*(B4+Z*B6) */
   2024                fp1 = floatx80_add(fp1, float64_to_floatx80(
   2025                                   make_float64(0xBFD5555555555555), status),
   2026                                   status); /* B1+Z*(B3+Z*B5) */
   2027                fp2 = floatx80_mul(fp2, fp0, status); /* Y*(B2+Z*(B4+Z*B6)) */
   2028                fp0 = floatx80_mul(fp0, xsave, status); /* X*Y */
   2029                /* [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] */
   2030                fp1 = floatx80_add(fp1, fp2, status);
   2031                /* X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) */
   2032                fp0 = floatx80_mul(fp0, fp1, status);
   2033
   2034                status->float_rounding_mode = user_rnd_mode;
   2035                status->floatx80_rounding_precision = user_rnd_prec;
   2036
   2037                a = floatx80_add(fp0, xsave, status);
   2038
   2039                float_raise(float_flag_inexact, status);
   2040
   2041                return a;
   2042            }
   2043        }
   2044    } else {
   2045        aSig &= UINT64_C(0xF800000000000000);
   2046        aSig |= UINT64_C(0x0400000000000000);
   2047        xsave = packFloatx80(aSign, aExp, aSig); /* F */
   2048        fp0 = a;
   2049        fp1 = a; /* X */
   2050        fp2 = packFloatx80(0, one_exp, one_sig); /* 1 */
   2051        fp1 = floatx80_mul(fp1, xsave, status); /* X*F */
   2052        fp0 = floatx80_sub(fp0, xsave, status); /* X-F */
   2053        fp1 = floatx80_add(fp1, fp2, status); /* 1 + X*F */
   2054        fp0 = floatx80_div(fp0, fp1, status); /* U = (X-F)/(1+X*F) */
   2055
   2056        tbl_index = compact;
   2057
   2058        tbl_index &= 0x7FFF0000;
   2059        tbl_index -= 0x3FFB0000;
   2060        tbl_index >>= 1;
   2061        tbl_index += compact & 0x00007800;
   2062        tbl_index >>= 11;
   2063
   2064        fp3 = atan_tbl[tbl_index];
   2065
   2066        fp3.high |= aSign ? 0x8000 : 0; /* ATAN(F) */
   2067
   2068        fp1 = floatx80_mul(fp0, fp0, status); /* V = U*U */
   2069        fp2 = float64_to_floatx80(make_float64(0xBFF6687E314987D8),
   2070                                  status); /* A3 */
   2071        fp2 = floatx80_add(fp2, fp1, status); /* A3+V */
   2072        fp2 = floatx80_mul(fp2, fp1, status); /* V*(A3+V) */
   2073        fp1 = floatx80_mul(fp1, fp0, status); /* U*V */
   2074        fp2 = floatx80_add(fp2, float64_to_floatx80(
   2075                           make_float64(0x4002AC6934A26DB3), status),
   2076                           status); /* A2+V*(A3+V) */
   2077        fp1 = floatx80_mul(fp1, float64_to_floatx80(
   2078                           make_float64(0xBFC2476F4E1DA28E), status),
   2079                           status); /* A1+U*V */
   2080        fp1 = floatx80_mul(fp1, fp2, status); /* A1*U*V*(A2+V*(A3+V)) */
   2081        fp0 = floatx80_add(fp0, fp1, status); /* ATAN(U) */
   2082
   2083        status->float_rounding_mode = user_rnd_mode;
   2084        status->floatx80_rounding_precision = user_rnd_prec;
   2085
   2086        a = floatx80_add(fp0, fp3, status); /* ATAN(X) */
   2087
   2088        float_raise(float_flag_inexact, status);
   2089
   2090        return a;
   2091    }
   2092}
   2093
   2094/*
   2095 * Arc sine
   2096 */
   2097
   2098floatx80 floatx80_asin(floatx80 a, float_status *status)
   2099{
   2100    bool aSign;
   2101    int32_t aExp;
   2102    uint64_t aSig;
   2103
   2104    FloatRoundMode user_rnd_mode;
   2105    FloatX80RoundPrec user_rnd_prec;
   2106
   2107    int32_t compact;
   2108    floatx80 fp0, fp1, fp2, one;
   2109
   2110    aSig = extractFloatx80Frac(a);
   2111    aExp = extractFloatx80Exp(a);
   2112    aSign = extractFloatx80Sign(a);
   2113
   2114    if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) {
   2115        return propagateFloatx80NaNOneArg(a, status);
   2116    }
   2117
   2118    if (aExp == 0 && aSig == 0) {
   2119        return packFloatx80(aSign, 0, 0);
   2120    }
   2121
   2122    compact = floatx80_make_compact(aExp, aSig);
   2123
   2124    if (compact >= 0x3FFF8000) { /* |X| >= 1 */
   2125        if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */
   2126            float_raise(float_flag_inexact, status);
   2127            a = packFloatx80(aSign, piby2_exp, pi_sig);
   2128            return floatx80_move(a, status);
   2129        } else { /* |X| > 1 */
   2130            float_raise(float_flag_invalid, status);
   2131            return floatx80_default_nan(status);
   2132        }
   2133
   2134    } /* |X| < 1 */
   2135
   2136    user_rnd_mode = status->float_rounding_mode;
   2137    user_rnd_prec = status->floatx80_rounding_precision;
   2138    status->float_rounding_mode = float_round_nearest_even;
   2139    status->floatx80_rounding_precision = floatx80_precision_x;
   2140
   2141    one = packFloatx80(0, one_exp, one_sig);
   2142    fp0 = a;
   2143
   2144    fp1 = floatx80_sub(one, fp0, status);   /* 1 - X */
   2145    fp2 = floatx80_add(one, fp0, status);   /* 1 + X */
   2146    fp1 = floatx80_mul(fp2, fp1, status);   /* (1+X)*(1-X) */
   2147    fp1 = floatx80_sqrt(fp1, status);       /* SQRT((1+X)*(1-X)) */
   2148    fp0 = floatx80_div(fp0, fp1, status);   /* X/SQRT((1+X)*(1-X)) */
   2149
   2150    status->float_rounding_mode = user_rnd_mode;
   2151    status->floatx80_rounding_precision = user_rnd_prec;
   2152
   2153    a = floatx80_atan(fp0, status);         /* ATAN(X/SQRT((1+X)*(1-X))) */
   2154
   2155    float_raise(float_flag_inexact, status);
   2156
   2157    return a;
   2158}
   2159
   2160/*
   2161 * Arc cosine
   2162 */
   2163
   2164floatx80 floatx80_acos(floatx80 a, float_status *status)
   2165{
   2166    bool aSign;
   2167    int32_t aExp;
   2168    uint64_t aSig;
   2169
   2170    FloatRoundMode user_rnd_mode;
   2171    FloatX80RoundPrec user_rnd_prec;
   2172
   2173    int32_t compact;
   2174    floatx80 fp0, fp1, one;
   2175
   2176    aSig = extractFloatx80Frac(a);
   2177    aExp = extractFloatx80Exp(a);
   2178    aSign = extractFloatx80Sign(a);
   2179
   2180    if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) {
   2181        return propagateFloatx80NaNOneArg(a, status);
   2182    }
   2183    if (aExp == 0 && aSig == 0) {
   2184        float_raise(float_flag_inexact, status);
   2185        return roundAndPackFloatx80(status->floatx80_rounding_precision, 0,
   2186                                    piby2_exp, pi_sig, 0, status);
   2187    }
   2188
   2189    compact = floatx80_make_compact(aExp, aSig);
   2190
   2191    if (compact >= 0x3FFF8000) { /* |X| >= 1 */
   2192        if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */
   2193            if (aSign) { /* X == -1 */
   2194                a = packFloatx80(0, pi_exp, pi_sig);
   2195                float_raise(float_flag_inexact, status);
   2196                return floatx80_move(a, status);
   2197            } else { /* X == +1 */
   2198                return packFloatx80(0, 0, 0);
   2199            }
   2200        } else { /* |X| > 1 */
   2201            float_raise(float_flag_invalid, status);
   2202            return floatx80_default_nan(status);
   2203        }
   2204    } /* |X| < 1 */
   2205
   2206    user_rnd_mode = status->float_rounding_mode;
   2207    user_rnd_prec = status->floatx80_rounding_precision;
   2208    status->float_rounding_mode = float_round_nearest_even;
   2209    status->floatx80_rounding_precision = floatx80_precision_x;
   2210
   2211    one = packFloatx80(0, one_exp, one_sig);
   2212    fp0 = a;
   2213
   2214    fp1 = floatx80_add(one, fp0, status);   /* 1 + X */
   2215    fp0 = floatx80_sub(one, fp0, status);   /* 1 - X */
   2216    fp0 = floatx80_div(fp0, fp1, status);   /* (1-X)/(1+X) */
   2217    fp0 = floatx80_sqrt(fp0, status);       /* SQRT((1-X)/(1+X)) */
   2218    fp0 = floatx80_atan(fp0, status);       /* ATAN(SQRT((1-X)/(1+X))) */
   2219
   2220    status->float_rounding_mode = user_rnd_mode;
   2221    status->floatx80_rounding_precision = user_rnd_prec;
   2222
   2223    a = floatx80_add(fp0, fp0, status);     /* 2 * ATAN(SQRT((1-X)/(1+X))) */
   2224
   2225    float_raise(float_flag_inexact, status);
   2226
   2227    return a;
   2228}
   2229
   2230/*
   2231 * Hyperbolic arc tangent
   2232 */
   2233
   2234floatx80 floatx80_atanh(floatx80 a, float_status *status)
   2235{
   2236    bool aSign;
   2237    int32_t aExp;
   2238    uint64_t aSig;
   2239
   2240    FloatRoundMode user_rnd_mode;
   2241    FloatX80RoundPrec user_rnd_prec;
   2242
   2243    int32_t compact;
   2244    floatx80 fp0, fp1, fp2, one;
   2245
   2246    aSig = extractFloatx80Frac(a);
   2247    aExp = extractFloatx80Exp(a);
   2248    aSign = extractFloatx80Sign(a);
   2249
   2250    if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) {
   2251        return propagateFloatx80NaNOneArg(a, status);
   2252    }
   2253
   2254    if (aExp == 0 && aSig == 0) {
   2255        return packFloatx80(aSign, 0, 0);
   2256    }
   2257
   2258    compact = floatx80_make_compact(aExp, aSig);
   2259
   2260    if (compact >= 0x3FFF8000) { /* |X| >= 1 */
   2261        if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */
   2262            float_raise(float_flag_divbyzero, status);
   2263            return packFloatx80(aSign, floatx80_infinity.high,
   2264                                floatx80_infinity.low);
   2265        } else { /* |X| > 1 */
   2266            float_raise(float_flag_invalid, status);
   2267            return floatx80_default_nan(status);
   2268        }
   2269    } /* |X| < 1 */
   2270
   2271    user_rnd_mode = status->float_rounding_mode;
   2272    user_rnd_prec = status->floatx80_rounding_precision;
   2273    status->float_rounding_mode = float_round_nearest_even;
   2274    status->floatx80_rounding_precision = floatx80_precision_x;
   2275
   2276    one = packFloatx80(0, one_exp, one_sig);
   2277    fp2 = packFloatx80(aSign, 0x3FFE, one_sig); /* SIGN(X) * (1/2) */
   2278    fp0 = packFloatx80(0, aExp, aSig); /* Y = |X| */
   2279    fp1 = packFloatx80(1, aExp, aSig); /* -Y */
   2280    fp0 = floatx80_add(fp0, fp0, status); /* 2Y */
   2281    fp1 = floatx80_add(fp1, one, status); /* 1-Y */
   2282    fp0 = floatx80_div(fp0, fp1, status); /* Z = 2Y/(1-Y) */
   2283    fp0 = floatx80_lognp1(fp0, status); /* LOG1P(Z) */
   2284
   2285    status->float_rounding_mode = user_rnd_mode;
   2286    status->floatx80_rounding_precision = user_rnd_prec;
   2287
   2288    a = floatx80_mul(fp0, fp2,
   2289                     status); /* ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z) */
   2290
   2291    float_raise(float_flag_inexact, status);
   2292
   2293    return a;
   2294}
   2295
   2296/*
   2297 * e to x minus 1
   2298 */
   2299
   2300floatx80 floatx80_etoxm1(floatx80 a, float_status *status)
   2301{
   2302    bool aSign;
   2303    int32_t aExp;
   2304    uint64_t aSig;
   2305
   2306    FloatRoundMode user_rnd_mode;
   2307    FloatX80RoundPrec user_rnd_prec;
   2308
   2309    int32_t compact, n, j, m, m1;
   2310    floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc;
   2311
   2312    aSig = extractFloatx80Frac(a);
   2313    aExp = extractFloatx80Exp(a);
   2314    aSign = extractFloatx80Sign(a);
   2315
   2316    if (aExp == 0x7FFF) {
   2317        if ((uint64_t) (aSig << 1)) {
   2318            return propagateFloatx80NaNOneArg(a, status);
   2319        }
   2320        if (aSign) {
   2321            return packFloatx80(aSign, one_exp, one_sig);
   2322        }
   2323        return packFloatx80(0, floatx80_infinity.high,
   2324                            floatx80_infinity.low);
   2325    }
   2326
   2327    if (aExp == 0 && aSig == 0) {
   2328        return packFloatx80(aSign, 0, 0);
   2329    }
   2330
   2331    user_rnd_mode = status->float_rounding_mode;
   2332    user_rnd_prec = status->floatx80_rounding_precision;
   2333    status->float_rounding_mode = float_round_nearest_even;
   2334    status->floatx80_rounding_precision = floatx80_precision_x;
   2335
   2336    if (aExp >= 0x3FFD) { /* |X| >= 1/4 */
   2337        compact = floatx80_make_compact(aExp, aSig);
   2338
   2339        if (compact <= 0x4004C215) { /* |X| <= 70 log2 */
   2340            fp0 = a;
   2341            fp1 = a;
   2342            fp0 = floatx80_mul(fp0, float32_to_floatx80(
   2343                               make_float32(0x42B8AA3B), status),
   2344                               status); /* 64/log2 * X */
   2345            n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */
   2346            fp0 = int32_to_floatx80(n, status);
   2347
   2348            j = n & 0x3F; /* J = N mod 64 */
   2349            m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
   2350            if (n < 0 && j) {
   2351                /*
   2352                 * arithmetic right shift is division and
   2353                 * round towards minus infinity
   2354                 */
   2355                m--;
   2356            }
   2357            m1 = -m;
   2358            /*m += 0x3FFF; // biased exponent of 2^(M) */
   2359            /*m1 += 0x3FFF; // biased exponent of -2^(-M) */
   2360
   2361            fp2 = fp0; /* N */
   2362            fp0 = floatx80_mul(fp0, float32_to_floatx80(
   2363                               make_float32(0xBC317218), status),
   2364                               status); /* N * L1, L1 = lead(-log2/64) */
   2365            l2 = packFloatx80(0, 0x3FDC, UINT64_C(0x82E308654361C4C6));
   2366            fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */
   2367            fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */
   2368            fp0 = floatx80_add(fp0, fp2, status); /* R */
   2369
   2370            fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
   2371            fp2 = float32_to_floatx80(make_float32(0x3950097B),
   2372                                      status); /* A6 */
   2373            fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A6 */
   2374            fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3AB60B6A),
   2375                               status), fp1, status); /* fp3 is S*A5 */
   2376            fp2 = floatx80_add(fp2, float64_to_floatx80(
   2377                               make_float64(0x3F81111111174385), status),
   2378                               status); /* fp2 IS A4+S*A6 */
   2379            fp3 = floatx80_add(fp3, float64_to_floatx80(
   2380                               make_float64(0x3FA5555555554F5A), status),
   2381                               status); /* fp3 is A3+S*A5 */
   2382            fp2 = floatx80_mul(fp2, fp1, status); /* fp2 IS S*(A4+S*A6) */
   2383            fp3 = floatx80_mul(fp3, fp1, status); /* fp3 IS S*(A3+S*A5) */
   2384            fp2 = floatx80_add(fp2, float64_to_floatx80(
   2385                               make_float64(0x3FC5555555555555), status),
   2386                               status); /* fp2 IS A2+S*(A4+S*A6) */
   2387            fp3 = floatx80_add(fp3, float32_to_floatx80(
   2388                               make_float32(0x3F000000), status),
   2389                               status); /* fp3 IS A1+S*(A3+S*A5) */
   2390            fp2 = floatx80_mul(fp2, fp1,
   2391                               status); /* fp2 IS S*(A2+S*(A4+S*A6)) */
   2392            fp1 = floatx80_mul(fp1, fp3,
   2393                               status); /* fp1 IS S*(A1+S*(A3+S*A5)) */
   2394            fp2 = floatx80_mul(fp2, fp0,
   2395                               status); /* fp2 IS R*S*(A2+S*(A4+S*A6)) */
   2396            fp0 = floatx80_add(fp0, fp1,
   2397                               status); /* fp0 IS R+S*(A1+S*(A3+S*A5)) */
   2398            fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */
   2399
   2400            fp0 = floatx80_mul(fp0, exp_tbl[j],
   2401                               status); /* 2^(J/64)*(Exp(R)-1) */
   2402
   2403            if (m >= 64) {
   2404                fp1 = float32_to_floatx80(exp_tbl2[j], status);
   2405                onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */
   2406                fp1 = floatx80_add(fp1, onebysc, status);
   2407                fp0 = floatx80_add(fp0, fp1, status);
   2408                fp0 = floatx80_add(fp0, exp_tbl[j], status);
   2409            } else if (m < -3) {
   2410                fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j],
   2411                                   status), status);
   2412                fp0 = floatx80_add(fp0, exp_tbl[j], status);
   2413                onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */
   2414                fp0 = floatx80_add(fp0, onebysc, status);
   2415            } else { /* -3 <= m <= 63 */
   2416                fp1 = exp_tbl[j];
   2417                fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j],
   2418                                   status), status);
   2419                onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */
   2420                fp1 = floatx80_add(fp1, onebysc, status);
   2421                fp0 = floatx80_add(fp0, fp1, status);
   2422            }
   2423
   2424            sc = packFloatx80(0, m + 0x3FFF, one_sig);
   2425
   2426            status->float_rounding_mode = user_rnd_mode;
   2427            status->floatx80_rounding_precision = user_rnd_prec;
   2428
   2429            a = floatx80_mul(fp0, sc, status);
   2430
   2431            float_raise(float_flag_inexact, status);
   2432
   2433            return a;
   2434        } else { /* |X| > 70 log2 */
   2435            if (aSign) {
   2436                fp0 = float32_to_floatx80(make_float32(0xBF800000),
   2437                      status); /* -1 */
   2438
   2439                status->float_rounding_mode = user_rnd_mode;
   2440                status->floatx80_rounding_precision = user_rnd_prec;
   2441
   2442                a = floatx80_add(fp0, float32_to_floatx80(
   2443                                 make_float32(0x00800000), status),
   2444                                 status); /* -1 + 2^(-126) */
   2445
   2446                float_raise(float_flag_inexact, status);
   2447
   2448                return a;
   2449            } else {
   2450                status->float_rounding_mode = user_rnd_mode;
   2451                status->floatx80_rounding_precision = user_rnd_prec;
   2452
   2453                return floatx80_etox(a, status);
   2454            }
   2455        }
   2456    } else { /* |X| < 1/4 */
   2457        if (aExp >= 0x3FBE) {
   2458            fp0 = a;
   2459            fp0 = floatx80_mul(fp0, fp0, status); /* S = X*X */
   2460            fp1 = float32_to_floatx80(make_float32(0x2F30CAA8),
   2461                                      status); /* B12 */
   2462            fp1 = floatx80_mul(fp1, fp0, status); /* S * B12 */
   2463            fp2 = float32_to_floatx80(make_float32(0x310F8290),
   2464                                      status); /* B11 */
   2465            fp1 = floatx80_add(fp1, float32_to_floatx80(
   2466                               make_float32(0x32D73220), status),
   2467                               status); /* B10 */
   2468            fp2 = floatx80_mul(fp2, fp0, status);
   2469            fp1 = floatx80_mul(fp1, fp0, status);
   2470            fp2 = floatx80_add(fp2, float32_to_floatx80(
   2471                               make_float32(0x3493F281), status),
   2472                               status); /* B9 */
   2473            fp1 = floatx80_add(fp1, float64_to_floatx80(
   2474                               make_float64(0x3EC71DE3A5774682), status),
   2475                               status); /* B8 */
   2476            fp2 = floatx80_mul(fp2, fp0, status);
   2477            fp1 = floatx80_mul(fp1, fp0, status);
   2478            fp2 = floatx80_add(fp2, float64_to_floatx80(
   2479                               make_float64(0x3EFA01A019D7CB68), status),
   2480                               status); /* B7 */
   2481            fp1 = floatx80_add(fp1, float64_to_floatx80(
   2482                               make_float64(0x3F2A01A01A019DF3), status),
   2483                               status); /* B6 */
   2484            fp2 = floatx80_mul(fp2, fp0, status);
   2485            fp1 = floatx80_mul(fp1, fp0, status);
   2486            fp2 = floatx80_add(fp2, float64_to_floatx80(
   2487                               make_float64(0x3F56C16C16C170E2), status),
   2488                               status); /* B5 */
   2489            fp1 = floatx80_add(fp1, float64_to_floatx80(
   2490                               make_float64(0x3F81111111111111), status),
   2491                               status); /* B4 */
   2492            fp2 = floatx80_mul(fp2, fp0, status);
   2493            fp1 = floatx80_mul(fp1, fp0, status);
   2494            fp2 = floatx80_add(fp2, float64_to_floatx80(
   2495                               make_float64(0x3FA5555555555555), status),
   2496                               status); /* B3 */
   2497            fp3 = packFloatx80(0, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAAAB));
   2498            fp1 = floatx80_add(fp1, fp3, status); /* B2 */
   2499            fp2 = floatx80_mul(fp2, fp0, status);
   2500            fp1 = floatx80_mul(fp1, fp0, status);
   2501
   2502            fp2 = floatx80_mul(fp2, fp0, status);
   2503            fp1 = floatx80_mul(fp1, a, status);
   2504
   2505            fp0 = floatx80_mul(fp0, float32_to_floatx80(
   2506                               make_float32(0x3F000000), status),
   2507                               status); /* S*B1 */
   2508            fp1 = floatx80_add(fp1, fp2, status); /* Q */
   2509            fp0 = floatx80_add(fp0, fp1, status); /* S*B1+Q */
   2510
   2511            status->float_rounding_mode = user_rnd_mode;
   2512            status->floatx80_rounding_precision = user_rnd_prec;
   2513
   2514            a = floatx80_add(fp0, a, status);
   2515
   2516            float_raise(float_flag_inexact, status);
   2517
   2518            return a;
   2519        } else { /* |X| < 2^(-65) */
   2520            sc = packFloatx80(1, 1, one_sig);
   2521            fp0 = a;
   2522
   2523            if (aExp < 0x0033) { /* |X| < 2^(-16382) */
   2524                fp0 = floatx80_mul(fp0, float64_to_floatx80(
   2525                                   make_float64(0x48B0000000000000), status),
   2526                                   status);
   2527                fp0 = floatx80_add(fp0, sc, status);
   2528
   2529                status->float_rounding_mode = user_rnd_mode;
   2530                status->floatx80_rounding_precision = user_rnd_prec;
   2531
   2532                a = floatx80_mul(fp0, float64_to_floatx80(
   2533                                 make_float64(0x3730000000000000), status),
   2534                                 status);
   2535            } else {
   2536                status->float_rounding_mode = user_rnd_mode;
   2537                status->floatx80_rounding_precision = user_rnd_prec;
   2538
   2539                a = floatx80_add(fp0, sc, status);
   2540            }
   2541
   2542            float_raise(float_flag_inexact, status);
   2543
   2544            return a;
   2545        }
   2546    }
   2547}
   2548
   2549/*
   2550 * Hyperbolic tangent
   2551 */
   2552
   2553floatx80 floatx80_tanh(floatx80 a, float_status *status)
   2554{
   2555    bool aSign, vSign;
   2556    int32_t aExp, vExp;
   2557    uint64_t aSig, vSig;
   2558
   2559    FloatRoundMode user_rnd_mode;
   2560    FloatX80RoundPrec user_rnd_prec;
   2561
   2562    int32_t compact;
   2563    floatx80 fp0, fp1;
   2564    uint32_t sign;
   2565
   2566    aSig = extractFloatx80Frac(a);
   2567    aExp = extractFloatx80Exp(a);
   2568    aSign = extractFloatx80Sign(a);
   2569
   2570    if (aExp == 0x7FFF) {
   2571        if ((uint64_t) (aSig << 1)) {
   2572            return propagateFloatx80NaNOneArg(a, status);
   2573        }
   2574        return packFloatx80(aSign, one_exp, one_sig);
   2575    }
   2576
   2577    if (aExp == 0 && aSig == 0) {
   2578        return packFloatx80(aSign, 0, 0);
   2579    }
   2580
   2581    user_rnd_mode = status->float_rounding_mode;
   2582    user_rnd_prec = status->floatx80_rounding_precision;
   2583    status->float_rounding_mode = float_round_nearest_even;
   2584    status->floatx80_rounding_precision = floatx80_precision_x;
   2585
   2586    compact = floatx80_make_compact(aExp, aSig);
   2587
   2588    if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) {
   2589        /* TANHBORS */
   2590        if (compact < 0x3FFF8000) {
   2591            /* TANHSM */
   2592            status->float_rounding_mode = user_rnd_mode;
   2593            status->floatx80_rounding_precision = user_rnd_prec;
   2594
   2595            a = floatx80_move(a, status);
   2596
   2597            float_raise(float_flag_inexact, status);
   2598
   2599            return a;
   2600        } else {
   2601            if (compact > 0x40048AA1) {
   2602                /* TANHHUGE */
   2603                sign = 0x3F800000;
   2604                sign |= aSign ? 0x80000000 : 0x00000000;
   2605                fp0 = float32_to_floatx80(make_float32(sign), status);
   2606                sign &= 0x80000000;
   2607                sign ^= 0x80800000; /* -SIGN(X)*EPS */
   2608
   2609                status->float_rounding_mode = user_rnd_mode;
   2610                status->floatx80_rounding_precision = user_rnd_prec;
   2611
   2612                a = floatx80_add(fp0, float32_to_floatx80(make_float32(sign),
   2613                                 status), status);
   2614
   2615                float_raise(float_flag_inexact, status);
   2616
   2617                return a;
   2618            } else {
   2619                fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */
   2620                fp0 = floatx80_etox(fp0, status); /* FP0 IS EXP(Y) */
   2621                fp0 = floatx80_add(fp0, float32_to_floatx80(
   2622                                   make_float32(0x3F800000),
   2623                                   status), status); /* EXP(Y)+1 */
   2624                sign = aSign ? 0x80000000 : 0x00000000;
   2625                fp1 = floatx80_div(float32_to_floatx80(make_float32(
   2626                                   sign ^ 0xC0000000), status), fp0,
   2627                                   status); /* -SIGN(X)*2 / [EXP(Y)+1] */
   2628                fp0 = float32_to_floatx80(make_float32(sign | 0x3F800000),
   2629                                          status); /* SIGN */
   2630
   2631                status->float_rounding_mode = user_rnd_mode;
   2632                status->floatx80_rounding_precision = user_rnd_prec;
   2633
   2634                a = floatx80_add(fp1, fp0, status);
   2635
   2636                float_raise(float_flag_inexact, status);
   2637
   2638                return a;
   2639            }
   2640        }
   2641    } else { /* 2**(-40) < |X| < (5/2)LOG2 */
   2642        fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */
   2643        fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */
   2644        fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x40000000),
   2645                           status),
   2646                           status); /* Z+2 */
   2647
   2648        vSign = extractFloatx80Sign(fp1);
   2649        vExp = extractFloatx80Exp(fp1);
   2650        vSig = extractFloatx80Frac(fp1);
   2651
   2652        fp1 = packFloatx80(vSign ^ aSign, vExp, vSig);
   2653
   2654        status->float_rounding_mode = user_rnd_mode;
   2655        status->floatx80_rounding_precision = user_rnd_prec;
   2656
   2657        a = floatx80_div(fp0, fp1, status);
   2658
   2659        float_raise(float_flag_inexact, status);
   2660
   2661        return a;
   2662    }
   2663}
   2664
   2665/*
   2666 * Hyperbolic sine
   2667 */
   2668
   2669floatx80 floatx80_sinh(floatx80 a, float_status *status)
   2670{
   2671    bool aSign;
   2672    int32_t aExp;
   2673    uint64_t aSig;
   2674
   2675    FloatRoundMode user_rnd_mode;
   2676    FloatX80RoundPrec user_rnd_prec;
   2677
   2678    int32_t compact;
   2679    floatx80 fp0, fp1, fp2;
   2680    float32 fact;
   2681
   2682    aSig = extractFloatx80Frac(a);
   2683    aExp = extractFloatx80Exp(a);
   2684    aSign = extractFloatx80Sign(a);
   2685
   2686    if (aExp == 0x7FFF) {
   2687        if ((uint64_t) (aSig << 1)) {
   2688            return propagateFloatx80NaNOneArg(a, status);
   2689        }
   2690        return packFloatx80(aSign, floatx80_infinity.high,
   2691                            floatx80_infinity.low);
   2692    }
   2693
   2694    if (aExp == 0 && aSig == 0) {
   2695        return packFloatx80(aSign, 0, 0);
   2696    }
   2697
   2698    user_rnd_mode = status->float_rounding_mode;
   2699    user_rnd_prec = status->floatx80_rounding_precision;
   2700    status->float_rounding_mode = float_round_nearest_even;
   2701    status->floatx80_rounding_precision = floatx80_precision_x;
   2702
   2703    compact = floatx80_make_compact(aExp, aSig);
   2704
   2705    if (compact > 0x400CB167) {
   2706        /* SINHBIG */
   2707        if (compact > 0x400CB2B3) {
   2708            status->float_rounding_mode = user_rnd_mode;
   2709            status->floatx80_rounding_precision = user_rnd_prec;
   2710
   2711            return roundAndPackFloatx80(status->floatx80_rounding_precision,
   2712                                        aSign, 0x8000, aSig, 0, status);
   2713        } else {
   2714            fp0 = floatx80_abs(a); /* Y = |X| */
   2715            fp0 = floatx80_sub(fp0, float64_to_floatx80(
   2716                               make_float64(0x40C62D38D3D64634), status),
   2717                               status); /* (|X|-16381LOG2_LEAD) */
   2718            fp0 = floatx80_sub(fp0, float64_to_floatx80(
   2719                               make_float64(0x3D6F90AEB1E75CC7), status),
   2720                               status); /* |X| - 16381 LOG2, ACCURATE */
   2721            fp0 = floatx80_etox(fp0, status);
   2722            fp2 = packFloatx80(aSign, 0x7FFB, one_sig);
   2723
   2724            status->float_rounding_mode = user_rnd_mode;
   2725            status->floatx80_rounding_precision = user_rnd_prec;
   2726
   2727            a = floatx80_mul(fp0, fp2, status);
   2728
   2729            float_raise(float_flag_inexact, status);
   2730
   2731            return a;
   2732        }
   2733    } else { /* |X| < 16380 LOG2 */
   2734        fp0 = floatx80_abs(a); /* Y = |X| */
   2735        fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */
   2736        fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
   2737                           status), status); /* 1+Z */
   2738        fp2 = fp0;
   2739        fp0 = floatx80_div(fp0, fp1, status); /* Z/(1+Z) */
   2740        fp0 = floatx80_add(fp0, fp2, status);
   2741
   2742        fact = packFloat32(aSign, 0x7E, 0);
   2743
   2744        status->float_rounding_mode = user_rnd_mode;
   2745        status->floatx80_rounding_precision = user_rnd_prec;
   2746
   2747        a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status);
   2748
   2749        float_raise(float_flag_inexact, status);
   2750
   2751        return a;
   2752    }
   2753}
   2754
   2755/*
   2756 * Hyperbolic cosine
   2757 */
   2758
   2759floatx80 floatx80_cosh(floatx80 a, float_status *status)
   2760{
   2761    int32_t aExp;
   2762    uint64_t aSig;
   2763
   2764    FloatRoundMode user_rnd_mode;
   2765    FloatX80RoundPrec user_rnd_prec;
   2766
   2767    int32_t compact;
   2768    floatx80 fp0, fp1;
   2769
   2770    aSig = extractFloatx80Frac(a);
   2771    aExp = extractFloatx80Exp(a);
   2772
   2773    if (aExp == 0x7FFF) {
   2774        if ((uint64_t) (aSig << 1)) {
   2775            return propagateFloatx80NaNOneArg(a, status);
   2776        }
   2777        return packFloatx80(0, floatx80_infinity.high,
   2778                            floatx80_infinity.low);
   2779    }
   2780
   2781    if (aExp == 0 && aSig == 0) {
   2782        return packFloatx80(0, one_exp, one_sig);
   2783    }
   2784
   2785    user_rnd_mode = status->float_rounding_mode;
   2786    user_rnd_prec = status->floatx80_rounding_precision;
   2787    status->float_rounding_mode = float_round_nearest_even;
   2788    status->floatx80_rounding_precision = floatx80_precision_x;
   2789
   2790    compact = floatx80_make_compact(aExp, aSig);
   2791
   2792    if (compact > 0x400CB167) {
   2793        if (compact > 0x400CB2B3) {
   2794            status->float_rounding_mode = user_rnd_mode;
   2795            status->floatx80_rounding_precision = user_rnd_prec;
   2796            return roundAndPackFloatx80(status->floatx80_rounding_precision, 0,
   2797                                        0x8000, one_sig, 0, status);
   2798        } else {
   2799            fp0 = packFloatx80(0, aExp, aSig);
   2800            fp0 = floatx80_sub(fp0, float64_to_floatx80(
   2801                               make_float64(0x40C62D38D3D64634), status),
   2802                               status);
   2803            fp0 = floatx80_sub(fp0, float64_to_floatx80(
   2804                               make_float64(0x3D6F90AEB1E75CC7), status),
   2805                               status);
   2806            fp0 = floatx80_etox(fp0, status);
   2807            fp1 = packFloatx80(0, 0x7FFB, one_sig);
   2808
   2809            status->float_rounding_mode = user_rnd_mode;
   2810            status->floatx80_rounding_precision = user_rnd_prec;
   2811
   2812            a = floatx80_mul(fp0, fp1, status);
   2813
   2814            float_raise(float_flag_inexact, status);
   2815
   2816            return a;
   2817        }
   2818    }
   2819
   2820    fp0 = packFloatx80(0, aExp, aSig); /* |X| */
   2821    fp0 = floatx80_etox(fp0, status); /* EXP(|X|) */
   2822    fp0 = floatx80_mul(fp0, float32_to_floatx80(make_float32(0x3F000000),
   2823                       status), status); /* (1/2)*EXP(|X|) */
   2824    fp1 = float32_to_floatx80(make_float32(0x3E800000), status); /* 1/4 */
   2825    fp1 = floatx80_div(fp1, fp0, status); /* 1/(2*EXP(|X|)) */
   2826
   2827    status->float_rounding_mode = user_rnd_mode;
   2828    status->floatx80_rounding_precision = user_rnd_prec;
   2829
   2830    a = floatx80_add(fp0, fp1, status);
   2831
   2832    float_raise(float_flag_inexact, status);
   2833
   2834    return a;
   2835}