fma_emu.c - cachepc-qemu - Fork of AMDESE/qemu with changes for cachepc side-channel attack

	cachepc-qemu Fork of AMDESE/qemu with changes for cachepc side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-qemu
	Log \| Files \| Refs \| Submodules \| LICENSE \| sfeed.txt
fma_emu.c (20140B)
      1/*
      2 *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
      3 *
      4 *  This program is free software; you can redistribute it and/or modify
      5 *  it under the terms of the GNU General Public License as published by
      6 *  the Free Software Foundation; either version 2 of the License, or
      7 *  (at your option) any later version.
      8 *
      9 *  This program is distributed in the hope that it will be useful,
     10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12 *  GNU General Public License for more details.
     13 *
     14 *  You should have received a copy of the GNU General Public License
     15 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
     16 */
     17
     18#include "qemu/osdep.h"
     19#include "qemu/int128.h"
     20#include "fpu/softfloat.h"
     21#include "macros.h"
     22#include "fma_emu.h"
     23
     24#define DF_INF_EXP     0x7ff
     25#define DF_BIAS        1023
     26#define DF_MANTBITS    52
     27#define DF_NAN         0xffffffffffffffffULL
     28#define DF_INF         0x7ff0000000000000ULL
     29#define DF_MINUS_INF   0xfff0000000000000ULL
     30#define DF_MAXF        0x7fefffffffffffffULL
     31#define DF_MINUS_MAXF  0xffefffffffffffffULL
     32
     33#define SF_INF_EXP     0xff
     34#define SF_BIAS        127
     35#define SF_MANTBITS    23
     36#define SF_INF         0x7f800000
     37#define SF_MINUS_INF   0xff800000
     38#define SF_MAXF        0x7f7fffff
     39#define SF_MINUS_MAXF  0xff7fffff
     40
     41#define HF_INF_EXP 0x1f
     42#define HF_BIAS 15
     43
     44#define WAY_BIG_EXP 4096
     45
     46typedef union {
     47    double f;
     48    uint64_t i;
     49    struct {
     50        uint64_t mant:52;
     51        uint64_t exp:11;
     52        uint64_t sign:1;
     53    };
     54} Double;
     55
     56typedef union {
     57    float f;
     58    uint32_t i;
     59    struct {
     60        uint32_t mant:23;
     61        uint32_t exp:8;
     62        uint32_t sign:1;
     63    };
     64} Float;
     65
     66static uint64_t float64_getmant(float64 f64)
     67{
     68    Double a = { .i = f64 };
     69    if (float64_is_normal(f64)) {
     70        return a.mant | 1ULL << 52;
     71    }
     72    if (float64_is_zero(f64)) {
     73        return 0;
     74    }
     75    if (float64_is_denormal(f64)) {
     76        return a.mant;
     77    }
     78    return ~0ULL;
     79}
     80
     81int32_t float64_getexp(float64 f64)
     82{
     83    Double a = { .i = f64 };
     84    if (float64_is_normal(f64)) {
     85        return a.exp;
     86    }
     87    if (float64_is_denormal(f64)) {
     88        return a.exp + 1;
     89    }
     90    return -1;
     91}
     92
     93static uint64_t float32_getmant(float32 f32)
     94{
     95    Float a = { .i = f32 };
     96    if (float32_is_normal(f32)) {
     97        return a.mant | 1ULL << 23;
     98    }
     99    if (float32_is_zero(f32)) {
    100        return 0;
    101    }
    102    if (float32_is_denormal(f32)) {
    103        return a.mant;
    104    }
    105    return ~0ULL;
    106}
    107
    108int32_t float32_getexp(float32 f32)
    109{
    110    Float a = { .i = f32 };
    111    if (float32_is_normal(f32)) {
    112        return a.exp;
    113    }
    114    if (float32_is_denormal(f32)) {
    115        return a.exp + 1;
    116    }
    117    return -1;
    118}
    119
    120static uint32_t int128_getw0(Int128 x)
    121{
    122    return int128_getlo(x);
    123}
    124
    125static uint32_t int128_getw1(Int128 x)
    126{
    127    return int128_getlo(x) >> 32;
    128}
    129
    130static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
    131{
    132    Int128 a, b;
    133    uint64_t pp0, pp1a, pp1b, pp1s, pp2;
    134
    135    a = int128_make64(ai);
    136    b = int128_make64(bi);
    137    pp0 = (uint64_t)int128_getw0(a) * (uint64_t)int128_getw0(b);
    138    pp1a = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw0(b);
    139    pp1b = (uint64_t)int128_getw1(b) * (uint64_t)int128_getw0(a);
    140    pp2 = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw1(b);
    141
    142    pp1s = pp1a + pp1b;
    143    if ((pp1s < pp1a) || (pp1s < pp1b)) {
    144        pp2 += (1ULL << 32);
    145    }
    146    uint64_t ret_low = pp0 + (pp1s << 32);
    147    if ((ret_low < pp0) || (ret_low < (pp1s << 32))) {
    148        pp2 += 1;
    149    }
    150
    151    return int128_make128(ret_low, pp2 + (pp1s >> 32));
    152}
    153
    154static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
    155{
    156    Int128 ret = int128_sub(a, b);
    157    if (borrow != 0) {
    158        ret = int128_sub(ret, int128_one());
    159    }
    160    return ret;
    161}
    162
    163typedef struct {
    164    Int128 mant;
    165    int32_t exp;
    166    uint8_t sign;
    167    uint8_t guard;
    168    uint8_t round;
    169    uint8_t sticky;
    170} Accum;
    171
    172static void accum_init(Accum *p)
    173{
    174    p->mant = int128_zero();
    175    p->exp = 0;
    176    p->sign = 0;
    177    p->guard = 0;
    178    p->round = 0;
    179    p->sticky = 0;
    180}
    181
    182static Accum accum_norm_left(Accum a)
    183{
    184    a.exp--;
    185    a.mant = int128_lshift(a.mant, 1);
    186    a.mant = int128_or(a.mant, int128_make64(a.guard));
    187    a.guard = a.round;
    188    a.round = a.sticky;
    189    return a;
    190}
    191
    192/* This function is marked inline for performance reasons */
    193static inline Accum accum_norm_right(Accum a, int amt)
    194{
    195    if (amt > 130) {
    196        a.sticky |=
    197            a.round | a.guard | int128_nz(a.mant);
    198        a.guard = a.round = 0;
    199        a.mant = int128_zero();
    200        a.exp += amt;
    201        return a;
    202
    203    }
    204    while (amt >= 64) {
    205        a.sticky |= a.round | a.guard | (int128_getlo(a.mant) != 0);
    206        a.guard = (int128_getlo(a.mant) >> 63) & 1;
    207        a.round = (int128_getlo(a.mant) >> 62) & 1;
    208        a.mant = int128_make64(int128_gethi(a.mant));
    209        a.exp += 64;
    210        amt -= 64;
    211    }
    212    while (amt > 0) {
    213        a.exp++;
    214        a.sticky |= a.round;
    215        a.round = a.guard;
    216        a.guard = int128_getlo(a.mant) & 1;
    217        a.mant = int128_rshift(a.mant, 1);
    218        amt--;
    219    }
    220    return a;
    221}
    222
    223/*
    224 * On the add/sub, we need to be able to shift out lots of bits, but need a
    225 * sticky bit for what was shifted out, I think.
    226 */
    227static Accum accum_add(Accum a, Accum b);
    228
    229static Accum accum_sub(Accum a, Accum b, int negate)
    230{
    231    Accum ret;
    232    accum_init(&ret);
    233    int borrow;
    234
    235    if (a.sign != b.sign) {
    236        b.sign = !b.sign;
    237        return accum_add(a, b);
    238    }
    239    if (b.exp > a.exp) {
    240        /* small - big == - (big - small) */
    241        return accum_sub(b, a, !negate);
    242    }
    243    if ((b.exp == a.exp) && (int128_gt(b.mant, a.mant))) {
    244        /* small - big == - (big - small) */
    245        return accum_sub(b, a, !negate);
    246    }
    247
    248    while (a.exp > b.exp) {
    249        /* Try to normalize exponents: shrink a exponent and grow mantissa */
    250        if (int128_gethi(a.mant) & (1ULL << 62)) {
    251            /* Can't grow a any more */
    252            break;
    253        } else {
    254            a = accum_norm_left(a);
    255        }
    256    }
    257
    258    while (a.exp > b.exp) {
    259        /* Try to normalize exponents: grow b exponent and shrink mantissa */
    260        /* Keep around shifted out bits... we might need those later */
    261        b = accum_norm_right(b, a.exp - b.exp);
    262    }
    263
    264    if ((int128_gt(b.mant, a.mant))) {
    265        return accum_sub(b, a, !negate);
    266    }
    267
    268    /* OK, now things should be normalized! */
    269    ret.sign = a.sign;
    270    ret.exp = a.exp;
    271    assert(!int128_gt(b.mant, a.mant));
    272    borrow = (b.round << 2) | (b.guard << 1) | b.sticky;
    273    ret.mant = int128_sub_borrow(a.mant, b.mant, (borrow != 0));
    274    borrow = 0 - borrow;
    275    ret.guard = (borrow >> 2) & 1;
    276    ret.round = (borrow >> 1) & 1;
    277    ret.sticky = (borrow >> 0) & 1;
    278    if (negate) {
    279        ret.sign = !ret.sign;
    280    }
    281    return ret;
    282}
    283
    284static Accum accum_add(Accum a, Accum b)
    285{
    286    Accum ret;
    287    accum_init(&ret);
    288    if (a.sign != b.sign) {
    289        b.sign = !b.sign;
    290        return accum_sub(a, b, 0);
    291    }
    292    if (b.exp > a.exp) {
    293        /* small + big ==  (big + small) */
    294        return accum_add(b, a);
    295    }
    296    if ((b.exp == a.exp) && int128_gt(b.mant, a.mant)) {
    297        /* small + big ==  (big + small) */
    298        return accum_add(b, a);
    299    }
    300
    301    while (a.exp > b.exp) {
    302        /* Try to normalize exponents: shrink a exponent and grow mantissa */
    303        if (int128_gethi(a.mant) & (1ULL << 62)) {
    304            /* Can't grow a any more */
    305            break;
    306        } else {
    307            a = accum_norm_left(a);
    308        }
    309    }
    310
    311    while (a.exp > b.exp) {
    312        /* Try to normalize exponents: grow b exponent and shrink mantissa */
    313        /* Keep around shifted out bits... we might need those later */
    314        b = accum_norm_right(b, a.exp - b.exp);
    315    }
    316
    317    /* OK, now things should be normalized! */
    318    if (int128_gt(b.mant, a.mant)) {
    319        return accum_add(b, a);
    320    };
    321    ret.sign = a.sign;
    322    ret.exp = a.exp;
    323    assert(!int128_gt(b.mant, a.mant));
    324    ret.mant = int128_add(a.mant, b.mant);
    325    ret.guard = b.guard;
    326    ret.round = b.round;
    327    ret.sticky = b.sticky;
    328    return ret;
    329}
    330
    331/* Return an infinity with requested sign */
    332static float64 infinite_float64(uint8_t sign)
    333{
    334    if (sign) {
    335        return make_float64(DF_MINUS_INF);
    336    } else {
    337        return make_float64(DF_INF);
    338    }
    339}
    340
    341/* Return a maximum finite value with requested sign */
    342static float64 maxfinite_float64(uint8_t sign)
    343{
    344    if (sign) {
    345        return make_float64(DF_MINUS_MAXF);
    346    } else {
    347        return make_float64(DF_MAXF);
    348    }
    349}
    350
    351/* Return a zero value with requested sign */
    352static float64 zero_float64(uint8_t sign)
    353{
    354    if (sign) {
    355        return make_float64(0x8000000000000000);
    356    } else {
    357        return float64_zero;
    358    }
    359}
    360
    361/* Return an infinity with the requested sign */
    362float32 infinite_float32(uint8_t sign)
    363{
    364    if (sign) {
    365        return make_float32(SF_MINUS_INF);
    366    } else {
    367        return make_float32(SF_INF);
    368    }
    369}
    370
    371/* Return a maximum finite value with the requested sign */
    372static float32 maxfinite_float32(uint8_t sign)
    373{
    374    if (sign) {
    375        return make_float32(SF_MINUS_MAXF);
    376    } else {
    377        return make_float32(SF_MAXF);
    378    }
    379}
    380
    381/* Return a zero value with requested sign */
    382static float32 zero_float32(uint8_t sign)
    383{
    384    if (sign) {
    385        return make_float32(0x80000000);
    386    } else {
    387        return float32_zero;
    388    }
    389}
    390
    391#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
    392static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
    393{ \
    394    if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
    395        && ((a.guard | a.round | a.sticky) == 0)) { \
    396        /* result zero */ \
    397        switch (fp_status->float_rounding_mode) { \
    398        case float_round_down: \
    399            return zero_##SUFFIX(1); \
    400        default: \
    401            return zero_##SUFFIX(0); \
    402        } \
    403    } \
    404    /* Normalize right */ \
    405    /* We want MANTBITS bits of mantissa plus the leading one. */ \
    406    /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
    407    /* So we need to normalize right while the high word is non-zero and \
    408    * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
    409    while ((int128_gethi(a.mant) != 0) || \
    410           ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0)) { \
    411        a = accum_norm_right(a, 1); \
    412    } \
    413    /* \
    414     * OK, now normalize left \
    415     * We want to normalize left until we have a leading one in bit 24 \
    416     * Theoretically, we only need to shift a maximum of one to the left if we \
    417     * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
    418     * shoudl be 0  \
    419     */ \
    420    while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
    421        a = accum_norm_left(a); \
    422    } \
    423    /* \
    424     * OK, now we might need to denormalize because of potential underflow. \
    425     * We need to do this before rounding, and rounding might make us normal \
    426     * again \
    427     */ \
    428    while (a.exp <= 0) { \
    429        a = accum_norm_right(a, 1 - a.exp); \
    430        /* \
    431         * Do we have underflow? \
    432         * That's when we get an inexact answer because we ran out of bits \
    433         * in a denormal. \
    434         */ \
    435        if (a.guard || a.round || a.sticky) { \
    436            float_raise(float_flag_underflow, fp_status); \
    437        } \
    438    } \
    439    /* OK, we're relatively canonical... now we need to round */ \
    440    if (a.guard || a.round || a.sticky) { \
    441        float_raise(float_flag_inexact, fp_status); \
    442        switch (fp_status->float_rounding_mode) { \
    443        case float_round_to_zero: \
    444            /* Chop and we're done */ \
    445            break; \
    446        case float_round_up: \
    447            if (a.sign == 0) { \
    448                a.mant = int128_add(a.mant, int128_one()); \
    449            } \
    450            break; \
    451        case float_round_down: \
    452            if (a.sign != 0) { \
    453                a.mant = int128_add(a.mant, int128_one()); \
    454            } \
    455            break; \
    456        default: \
    457            if (a.round || a.sticky) { \
    458                /* round up if guard is 1, down if guard is zero */ \
    459                a.mant = int128_add(a.mant, int128_make64(a.guard)); \
    460            } else if (a.guard) { \
    461                /* exactly .5, round up if odd */ \
    462                a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); \
    463            } \
    464            break; \
    465        } \
    466    } \
    467    /* \
    468     * OK, now we might have carried all the way up. \
    469     * So we might need to shr once \
    470     * at least we know that the lsb should be zero if we rounded and \
    471     * got a carry out... \
    472     */ \
    473    if ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0) { \
    474        a = accum_norm_right(a, 1); \
    475    } \
    476    /* Overflow? */ \
    477    if (a.exp >= INF_EXP) { \
    478        /* Yep, inf result */ \
    479        float_raise(float_flag_overflow, fp_status); \
    480        float_raise(float_flag_inexact, fp_status); \
    481        switch (fp_status->float_rounding_mode) { \
    482        case float_round_to_zero: \
    483            return maxfinite_##SUFFIX(a.sign); \
    484        case float_round_up: \
    485            if (a.sign == 0) { \
    486                return infinite_##SUFFIX(a.sign); \
    487            } else { \
    488                return maxfinite_##SUFFIX(a.sign); \
    489            } \
    490        case float_round_down: \
    491            if (a.sign != 0) { \
    492                return infinite_##SUFFIX(a.sign); \
    493            } else { \
    494                return maxfinite_##SUFFIX(a.sign); \
    495            } \
    496        default: \
    497            return infinite_##SUFFIX(a.sign); \
    498        } \
    499    } \
    500    /* Underflow? */ \
    501    if (int128_getlo(a.mant) & (1ULL << MANTBITS)) { \
    502        /* Leading one means: No, we're normal. So, we should be done... */ \
    503        INTERNAL_TYPE ret; \
    504        ret.i = 0; \
    505        ret.sign = a.sign; \
    506        ret.exp = a.exp; \
    507        ret.mant = int128_getlo(a.mant); \
    508        return ret.i; \
    509    } \
    510    assert(a.exp == 1); \
    511    INTERNAL_TYPE ret; \
    512    ret.i = 0; \
    513    ret.sign = a.sign; \
    514    ret.exp = 0; \
    515    ret.mant = int128_getlo(a.mant); \
    516    return ret.i; \
    517}
    518
    519GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
    520GEN_XF_ROUND(float32, SF_MANTBITS, SF_INF_EXP, Float)
    521
    522static bool is_inf_prod(float64 a, float64 b)
    523{
    524    return ((float64_is_infinity(a) && float64_is_infinity(b)) ||
    525            (float64_is_infinity(a) && is_finite(b) && (!float64_is_zero(b))) ||
    526            (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
    527}
    528
    529static float64 special_fma(float64 a, float64 b, float64 c,
    530                           float_status *fp_status)
    531{
    532    float64 ret = make_float64(0);
    533
    534    /*
    535     * If A multiplied by B is an exact infinity and C is also an infinity
    536     * but with the opposite sign, FMA returns NaN and raises invalid.
    537     */
    538    uint8_t a_sign = float64_is_neg(a);
    539    uint8_t b_sign = float64_is_neg(b);
    540    uint8_t c_sign = float64_is_neg(c);
    541    if (is_inf_prod(a, b) && float64_is_infinity(c)) {
    542        if ((a_sign ^ b_sign) != c_sign) {
    543            ret = make_float64(DF_NAN);
    544            float_raise(float_flag_invalid, fp_status);
    545            return ret;
    546        }
    547    }
    548    if ((float64_is_infinity(a) && float64_is_zero(b)) ||
    549        (float64_is_zero(a) && float64_is_infinity(b))) {
    550        ret = make_float64(DF_NAN);
    551        float_raise(float_flag_invalid, fp_status);
    552        return ret;
    553    }
    554    /*
    555     * If none of the above checks are true and C is a NaN,
    556     * a NaN shall be returned
    557     * If A or B are NaN, a NAN shall be returned.
    558     */
    559    if (float64_is_any_nan(a) ||
    560        float64_is_any_nan(b) ||
    561        float64_is_any_nan(c)) {
    562        if (float64_is_any_nan(a) && (fGETBIT(51, a) == 0)) {
    563            float_raise(float_flag_invalid, fp_status);
    564        }
    565        if (float64_is_any_nan(b) && (fGETBIT(51, b) == 0)) {
    566            float_raise(float_flag_invalid, fp_status);
    567        }
    568        if (float64_is_any_nan(c) && (fGETBIT(51, c) == 0)) {
    569            float_raise(float_flag_invalid, fp_status);
    570        }
    571        ret = make_float64(DF_NAN);
    572        return ret;
    573    }
    574    /*
    575     * We have checked for adding opposite-signed infinities.
    576     * Other infinities return infinity with the correct sign
    577     */
    578    if (float64_is_infinity(c)) {
    579        ret = infinite_float64(c_sign);
    580        return ret;
    581    }
    582    if (float64_is_infinity(a) || float64_is_infinity(b)) {
    583        ret = infinite_float64(a_sign ^ b_sign);
    584        return ret;
    585    }
    586    g_assert_not_reached();
    587}
    588
    589static float32 special_fmaf(float32 a, float32 b, float32 c,
    590                            float_status *fp_status)
    591{
    592    float64 aa, bb, cc;
    593    aa = float32_to_float64(a, fp_status);
    594    bb = float32_to_float64(b, fp_status);
    595    cc = float32_to_float64(c, fp_status);
    596    return float64_to_float32(special_fma(aa, bb, cc, fp_status), fp_status);
    597}
    598
    599float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
    600                       float_status *fp_status)
    601{
    602    Accum prod;
    603    Accum acc;
    604    Accum result;
    605    accum_init(&prod);
    606    accum_init(&acc);
    607    accum_init(&result);
    608
    609    uint8_t a_sign = float32_is_neg(a);
    610    uint8_t b_sign = float32_is_neg(b);
    611    uint8_t c_sign = float32_is_neg(c);
    612    if (float32_is_infinity(a) ||
    613        float32_is_infinity(b) ||
    614        float32_is_infinity(c)) {
    615        return special_fmaf(a, b, c, fp_status);
    616    }
    617    if (float32_is_any_nan(a) ||
    618        float32_is_any_nan(b) ||
    619        float32_is_any_nan(c)) {
    620        return special_fmaf(a, b, c, fp_status);
    621    }
    622    if ((scale == 0) && (float32_is_zero(a) || float32_is_zero(b))) {
    623        float32 tmp = float32_mul(a, b, fp_status);
    624        tmp = float32_add(tmp, c, fp_status);
    625        return tmp;
    626    }
    627
    628    /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
    629    prod.mant = int128_mul_6464(float32_getmant(a), float32_getmant(b));
    630
    631    /*
    632     * Note: extracting the mantissa into an int is multiplying by
    633     * 2**23, so adjust here
    634     */
    635    prod.exp = float32_getexp(a) + float32_getexp(b) - SF_BIAS - 23;
    636    prod.sign = a_sign ^ b_sign;
    637    if (float32_is_zero(a) || float32_is_zero(b)) {
    638        prod.exp = -2 * WAY_BIG_EXP;
    639    }
    640    if ((scale > 0) && float32_is_denormal(c)) {
    641        acc.mant = int128_mul_6464(0, 0);
    642        acc.exp = -WAY_BIG_EXP;
    643        acc.sign = c_sign;
    644        acc.sticky = 1;
    645        result = accum_add(prod, acc);
    646    } else if (!float32_is_zero(c)) {
    647        acc.mant = int128_mul_6464(float32_getmant(c), 1);
    648        acc.exp = float32_getexp(c);
    649        acc.sign = c_sign;
    650        result = accum_add(prod, acc);
    651    } else {
    652        result = prod;
    653    }
    654    result.exp += scale;
    655    return accum_round_float32(result, fp_status);
    656}
    657
    658float32 internal_mpyf(float32 a, float32 b, float_status *fp_status)
    659{
    660    if (float32_is_zero(a) || float32_is_zero(b)) {
    661        return float32_mul(a, b, fp_status);
    662    }
    663    return internal_fmafx(a, b, float32_zero, 0, fp_status);
    664}
    665
    666float64 internal_mpyhh(float64 a, float64 b,
    667                      unsigned long long int accumulated,
    668                      float_status *fp_status)
    669{
    670    Accum x;
    671    unsigned long long int prod;
    672    unsigned int sticky;
    673    uint8_t a_sign, b_sign;
    674
    675    sticky = accumulated & 1;
    676    accumulated >>= 1;
    677    accum_init(&x);
    678    if (float64_is_zero(a) ||
    679        float64_is_any_nan(a) ||
    680        float64_is_infinity(a)) {
    681        return float64_mul(a, b, fp_status);
    682    }
    683    if (float64_is_zero(b) ||
    684        float64_is_any_nan(b) ||
    685        float64_is_infinity(b)) {
    686        return float64_mul(a, b, fp_status);
    687    }
    688    x.mant = int128_mul_6464(accumulated, 1);
    689    x.sticky = sticky;
    690    prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
    691    x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
    692    x.exp = float64_getexp(a) + float64_getexp(b) - DF_BIAS - 20;
    693    if (!float64_is_normal(a) || !float64_is_normal(b)) {
    694        /* crush to inexact zero */
    695        x.sticky = 1;
    696        x.exp = -4096;
    697    }
    698    a_sign = float64_is_neg(a);
    699    b_sign = float64_is_neg(b);
    700    x.sign = a_sign ^ b_sign;
    701    return accum_round_float64(x, fp_status);
    702}