softfloat.c - cachepc-qemu - Fork of AMDESE/qemu with changes for cachepc side-channel attack

	cachepc-qemu Fork of AMDESE/qemu with changes for cachepc side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-qemu
	Log \| Files \| Refs \| Submodules \| LICENSE \| sfeed.txt
softfloat.c (140339B)
      1/*
      2 * QEMU float support
      3 *
      4 * The code in this source file is derived from release 2a of the SoftFloat
      5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
      6 * some later contributions) are provided under that license, as detailed below.
      7 * It has subsequently been modified by contributors to the QEMU Project,
      8 * so some portions are provided under:
      9 *  the SoftFloat-2a license
     10 *  the BSD license
     11 *  GPL-v2-or-later
     12 *
     13 * Any future contributions to this file after December 1st 2014 will be
     14 * taken to be licensed under the Softfloat-2a license unless specifically
     15 * indicated otherwise.
     16 */
     17
     18/*
     19===============================================================================
     20This C source file is part of the SoftFloat IEC/IEEE Floating-point
     21Arithmetic Package, Release 2a.
     22
     23Written by John R. Hauser.  This work was made possible in part by the
     24International Computer Science Institute, located at Suite 600, 1947 Center
     25Street, Berkeley, California 94704.  Funding was partially provided by the
     26National Science Foundation under grant MIP-9311980.  The original version
     27of this code was written as part of a project to build a fixed-point vector
     28processor in collaboration with the University of California at Berkeley,
     29overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     31arithmetic/SoftFloat.html'.
     32
     33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     35TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     38
     39Derivative works are acceptable, even for commercial purposes, so long as
     40(1) they include prominent notice that the work is derivative, and (2) they
     41include prominent notice akin to these four paragraphs for those parts of
     42this code that are retained.
     43
     44===============================================================================
     45*/
     46
     47/* BSD licensing:
     48 * Copyright (c) 2006, Fabrice Bellard
     49 * All rights reserved.
     50 *
     51 * Redistribution and use in source and binary forms, with or without
     52 * modification, are permitted provided that the following conditions are met:
     53 *
     54 * 1. Redistributions of source code must retain the above copyright notice,
     55 * this list of conditions and the following disclaimer.
     56 *
     57 * 2. Redistributions in binary form must reproduce the above copyright notice,
     58 * this list of conditions and the following disclaimer in the documentation
     59 * and/or other materials provided with the distribution.
     60 *
     61 * 3. Neither the name of the copyright holder nor the names of its contributors
     62 * may be used to endorse or promote products derived from this software without
     63 * specific prior written permission.
     64 *
     65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
     69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
     75 * THE POSSIBILITY OF SUCH DAMAGE.
     76 */
     77
     78/* Portions of this work are licensed under the terms of the GNU GPL,
     79 * version 2 or later. See the COPYING file in the top-level directory.
     80 */
     81
     82/* softfloat (and in particular the code in softfloat-specialize.h) is
     83 * target-dependent and needs the TARGET_* macros.
     84 */
     85#include "qemu/osdep.h"
     86#include <math.h>
     87#include "qemu/bitops.h"
     88#include "fpu/softfloat.h"
     89
     90/* We only need stdlib for abort() */
     91
     92/*----------------------------------------------------------------------------
     93| Primitive arithmetic functions, including multi-word arithmetic, and
     94| division and square root approximations.  (Can be specialized to target if
     95| desired.)
     96*----------------------------------------------------------------------------*/
     97#include "fpu/softfloat-macros.h"
     98
     99/*
    100 * Hardfloat
    101 *
    102 * Fast emulation of guest FP instructions is challenging for two reasons.
    103 * First, FP instruction semantics are similar but not identical, particularly
    104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
    105 * exception flags is not trivial: reading the host's flags register with a
    106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
    107 * and trapping on every FP exception is not fast nor pleasant to work with.
    108 *
    109 * We address these challenges by leveraging the host FPU for a subset of the
    110 * operations. To do this we expand on the idea presented in this paper:
    111 *
    112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
    113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
    114 *
    115 * The idea is thus to leverage the host FPU to (1) compute FP operations
    116 * and (2) identify whether FP exceptions occurred while avoiding
    117 * expensive exception flag register accesses.
    118 *
    119 * An important optimization shown in the paper is that given that exception
    120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
    121 * This is particularly useful for the inexact flag, which is very frequently
    122 * raised in floating-point workloads.
    123 *
    124 * We optimize the code further by deferring to soft-fp whenever FP exception
    125 * detection might get hairy. Two examples: (1) when at least one operand is
    126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
    127 * and the result is < the minimum normal.
    128 */
    129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
    130    static inline void name(soft_t *a, float_status *s)                 \
    131    {                                                                   \
    132        if (unlikely(soft_t ## _is_denormal(*a))) {                     \
    133            *a = soft_t ## _set_sign(soft_t ## _zero,                   \
    134                                     soft_t ## _is_neg(*a));            \
    135            float_raise(float_flag_input_denormal, s);                  \
    136        }                                                               \
    137    }
    138
    139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
    140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
    141#undef GEN_INPUT_FLUSH__NOCHECK
    142
    143#define GEN_INPUT_FLUSH1(name, soft_t)                  \
    144    static inline void name(soft_t *a, float_status *s) \
    145    {                                                   \
    146        if (likely(!s->flush_inputs_to_zero)) {         \
    147            return;                                     \
    148        }                                               \
    149        soft_t ## _input_flush__nocheck(a, s);          \
    150    }
    151
    152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
    153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
    154#undef GEN_INPUT_FLUSH1
    155
    156#define GEN_INPUT_FLUSH2(name, soft_t)                                  \
    157    static inline void name(soft_t *a, soft_t *b, float_status *s)      \
    158    {                                                                   \
    159        if (likely(!s->flush_inputs_to_zero)) {                         \
    160            return;                                                     \
    161        }                                                               \
    162        soft_t ## _input_flush__nocheck(a, s);                          \
    163        soft_t ## _input_flush__nocheck(b, s);                          \
    164    }
    165
    166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
    167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
    168#undef GEN_INPUT_FLUSH2
    169
    170#define GEN_INPUT_FLUSH3(name, soft_t)                                  \
    171    static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
    172    {                                                                   \
    173        if (likely(!s->flush_inputs_to_zero)) {                         \
    174            return;                                                     \
    175        }                                                               \
    176        soft_t ## _input_flush__nocheck(a, s);                          \
    177        soft_t ## _input_flush__nocheck(b, s);                          \
    178        soft_t ## _input_flush__nocheck(c, s);                          \
    179    }
    180
    181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
    182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
    183#undef GEN_INPUT_FLUSH3
    184
    185/*
    186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
    187 * hardfloat functions. Each combination of number of inputs and float size
    188 * gets its own value.
    189 */
    190#if defined(__x86_64__)
    191# define QEMU_HARDFLOAT_1F32_USE_FP 0
    192# define QEMU_HARDFLOAT_1F64_USE_FP 1
    193# define QEMU_HARDFLOAT_2F32_USE_FP 0
    194# define QEMU_HARDFLOAT_2F64_USE_FP 1
    195# define QEMU_HARDFLOAT_3F32_USE_FP 0
    196# define QEMU_HARDFLOAT_3F64_USE_FP 1
    197#else
    198# define QEMU_HARDFLOAT_1F32_USE_FP 0
    199# define QEMU_HARDFLOAT_1F64_USE_FP 0
    200# define QEMU_HARDFLOAT_2F32_USE_FP 0
    201# define QEMU_HARDFLOAT_2F64_USE_FP 0
    202# define QEMU_HARDFLOAT_3F32_USE_FP 0
    203# define QEMU_HARDFLOAT_3F64_USE_FP 0
    204#endif
    205
    206/*
    207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
    208 * float{32,64}_is_infinity when !USE_FP.
    209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
    210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
    211 */
    212#if defined(__x86_64__) || defined(__aarch64__)
    213# define QEMU_HARDFLOAT_USE_ISINF   1
    214#else
    215# define QEMU_HARDFLOAT_USE_ISINF   0
    216#endif
    217
    218/*
    219 * Some targets clear the FP flags before most FP operations. This prevents
    220 * the use of hardfloat, since hardfloat relies on the inexact flag being
    221 * already set.
    222 */
    223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
    224# if defined(__FAST_MATH__)
    225#  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
    226    IEEE implementation
    227# endif
    228# define QEMU_NO_HARDFLOAT 1
    229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
    230#else
    231# define QEMU_NO_HARDFLOAT 0
    232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
    233#endif
    234
    235static inline bool can_use_fpu(const float_status *s)
    236{
    237    if (QEMU_NO_HARDFLOAT) {
    238        return false;
    239    }
    240    return likely(s->float_exception_flags & float_flag_inexact &&
    241                  s->float_rounding_mode == float_round_nearest_even);
    242}
    243
    244/*
    245 * Hardfloat generation functions. Each operation can have two flavors:
    246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
    247 * most condition checks, or native ones (e.g. fpclassify).
    248 *
    249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
    250 * compiler to propagate constants and inline everything into the callers.
    251 *
    252 * We only generate functions for operations with two inputs, since only
    253 * these are common enough to justify consolidating them into common code.
    254 */
    255
    256typedef union {
    257    float32 s;
    258    float h;
    259} union_float32;
    260
    261typedef union {
    262    float64 s;
    263    double h;
    264} union_float64;
    265
    266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
    267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
    268
    269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
    270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
    271typedef float   (*hard_f32_op2_fn)(float a, float b);
    272typedef double  (*hard_f64_op2_fn)(double a, double b);
    273
    274/* 2-input is-zero-or-normal */
    275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
    276{
    277    if (QEMU_HARDFLOAT_2F32_USE_FP) {
    278        /*
    279         * Not using a temp variable for consecutive fpclassify calls ends up
    280         * generating faster code.
    281         */
    282        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
    283               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
    284    }
    285    return float32_is_zero_or_normal(a.s) &&
    286           float32_is_zero_or_normal(b.s);
    287}
    288
    289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
    290{
    291    if (QEMU_HARDFLOAT_2F64_USE_FP) {
    292        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
    293               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
    294    }
    295    return float64_is_zero_or_normal(a.s) &&
    296           float64_is_zero_or_normal(b.s);
    297}
    298
    299/* 3-input is-zero-or-normal */
    300static inline
    301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
    302{
    303    if (QEMU_HARDFLOAT_3F32_USE_FP) {
    304        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
    305               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
    306               (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
    307    }
    308    return float32_is_zero_or_normal(a.s) &&
    309           float32_is_zero_or_normal(b.s) &&
    310           float32_is_zero_or_normal(c.s);
    311}
    312
    313static inline
    314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
    315{
    316    if (QEMU_HARDFLOAT_3F64_USE_FP) {
    317        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
    318               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
    319               (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
    320    }
    321    return float64_is_zero_or_normal(a.s) &&
    322           float64_is_zero_or_normal(b.s) &&
    323           float64_is_zero_or_normal(c.s);
    324}
    325
    326static inline bool f32_is_inf(union_float32 a)
    327{
    328    if (QEMU_HARDFLOAT_USE_ISINF) {
    329        return isinf(a.h);
    330    }
    331    return float32_is_infinity(a.s);
    332}
    333
    334static inline bool f64_is_inf(union_float64 a)
    335{
    336    if (QEMU_HARDFLOAT_USE_ISINF) {
    337        return isinf(a.h);
    338    }
    339    return float64_is_infinity(a.s);
    340}
    341
    342static inline float32
    343float32_gen2(float32 xa, float32 xb, float_status *s,
    344             hard_f32_op2_fn hard, soft_f32_op2_fn soft,
    345             f32_check_fn pre, f32_check_fn post)
    346{
    347    union_float32 ua, ub, ur;
    348
    349    ua.s = xa;
    350    ub.s = xb;
    351
    352    if (unlikely(!can_use_fpu(s))) {
    353        goto soft;
    354    }
    355
    356    float32_input_flush2(&ua.s, &ub.s, s);
    357    if (unlikely(!pre(ua, ub))) {
    358        goto soft;
    359    }
    360
    361    ur.h = hard(ua.h, ub.h);
    362    if (unlikely(f32_is_inf(ur))) {
    363        float_raise(float_flag_overflow, s);
    364    } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
    365        goto soft;
    366    }
    367    return ur.s;
    368
    369 soft:
    370    return soft(ua.s, ub.s, s);
    371}
    372
    373static inline float64
    374float64_gen2(float64 xa, float64 xb, float_status *s,
    375             hard_f64_op2_fn hard, soft_f64_op2_fn soft,
    376             f64_check_fn pre, f64_check_fn post)
    377{
    378    union_float64 ua, ub, ur;
    379
    380    ua.s = xa;
    381    ub.s = xb;
    382
    383    if (unlikely(!can_use_fpu(s))) {
    384        goto soft;
    385    }
    386
    387    float64_input_flush2(&ua.s, &ub.s, s);
    388    if (unlikely(!pre(ua, ub))) {
    389        goto soft;
    390    }
    391
    392    ur.h = hard(ua.h, ub.h);
    393    if (unlikely(f64_is_inf(ur))) {
    394        float_raise(float_flag_overflow, s);
    395    } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
    396        goto soft;
    397    }
    398    return ur.s;
    399
    400 soft:
    401    return soft(ua.s, ub.s, s);
    402}
    403
    404/*
    405 * Classify a floating point number. Everything above float_class_qnan
    406 * is a NaN so cls >= float_class_qnan is any NaN.
    407 */
    408
    409typedef enum __attribute__ ((__packed__)) {
    410    float_class_unclassified,
    411    float_class_zero,
    412    float_class_normal,
    413    float_class_inf,
    414    float_class_qnan,  /* all NaNs from here */
    415    float_class_snan,
    416} FloatClass;
    417
    418#define float_cmask(bit)  (1u << (bit))
    419
    420enum {
    421    float_cmask_zero    = float_cmask(float_class_zero),
    422    float_cmask_normal  = float_cmask(float_class_normal),
    423    float_cmask_inf     = float_cmask(float_class_inf),
    424    float_cmask_qnan    = float_cmask(float_class_qnan),
    425    float_cmask_snan    = float_cmask(float_class_snan),
    426
    427    float_cmask_infzero = float_cmask_zero | float_cmask_inf,
    428    float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
    429};
    430
    431/* Flags for parts_minmax. */
    432enum {
    433    /* Set for minimum; clear for maximum. */
    434    minmax_ismin = 1,
    435    /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
    436    minmax_isnum = 2,
    437    /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
    438    minmax_ismag = 4,
    439};
    440
    441/* Simple helpers for checking if, or what kind of, NaN we have */
    442static inline __attribute__((unused)) bool is_nan(FloatClass c)
    443{
    444    return unlikely(c >= float_class_qnan);
    445}
    446
    447static inline __attribute__((unused)) bool is_snan(FloatClass c)
    448{
    449    return c == float_class_snan;
    450}
    451
    452static inline __attribute__((unused)) bool is_qnan(FloatClass c)
    453{
    454    return c == float_class_qnan;
    455}
    456
    457/*
    458 * Structure holding all of the decomposed parts of a float.
    459 * The exponent is unbiased and the fraction is normalized.
    460 *
    461 * The fraction words are stored in big-endian word ordering,
    462 * so that truncation from a larger format to a smaller format
    463 * can be done simply by ignoring subsequent elements.
    464 */
    465
    466typedef struct {
    467    FloatClass cls;
    468    bool sign;
    469    int32_t exp;
    470    union {
    471        /* Routines that know the structure may reference the singular name. */
    472        uint64_t frac;
    473        /*
    474         * Routines expanded with multiple structures reference "hi" and "lo"
    475         * depending on the operation.  In FloatParts64, "hi" and "lo" are
    476         * both the same word and aliased here.
    477         */
    478        uint64_t frac_hi;
    479        uint64_t frac_lo;
    480    };
    481} FloatParts64;
    482
    483typedef struct {
    484    FloatClass cls;
    485    bool sign;
    486    int32_t exp;
    487    uint64_t frac_hi;
    488    uint64_t frac_lo;
    489} FloatParts128;
    490
    491typedef struct {
    492    FloatClass cls;
    493    bool sign;
    494    int32_t exp;
    495    uint64_t frac_hi;
    496    uint64_t frac_hm;  /* high-middle */
    497    uint64_t frac_lm;  /* low-middle */
    498    uint64_t frac_lo;
    499} FloatParts256;
    500
    501/* These apply to the most significant word of each FloatPartsN. */
    502#define DECOMPOSED_BINARY_POINT    63
    503#define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
    504
    505/* Structure holding all of the relevant parameters for a format.
    506 *   exp_size: the size of the exponent field
    507 *   exp_bias: the offset applied to the exponent field
    508 *   exp_max: the maximum normalised exponent
    509 *   frac_size: the size of the fraction field
    510 *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
    511 * The following are computed based the size of fraction
    512 *   round_mask: bits below lsb which must be rounded
    513 * The following optional modifiers are available:
    514 *   arm_althp: handle ARM Alternative Half Precision
    515 */
    516typedef struct {
    517    int exp_size;
    518    int exp_bias;
    519    int exp_max;
    520    int frac_size;
    521    int frac_shift;
    522    bool arm_althp;
    523    uint64_t round_mask;
    524} FloatFmt;
    525
    526/* Expand fields based on the size of exponent and fraction */
    527#define FLOAT_PARAMS_(E)                                \
    528    .exp_size       = E,                                \
    529    .exp_bias       = ((1 << E) - 1) >> 1,              \
    530    .exp_max        = (1 << E) - 1
    531
    532#define FLOAT_PARAMS(E, F)                              \
    533    FLOAT_PARAMS_(E),                                   \
    534    .frac_size      = F,                                \
    535    .frac_shift     = (-F - 1) & 63,                    \
    536    .round_mask     = (1ull << ((-F - 1) & 63)) - 1
    537
    538static const FloatFmt float16_params = {
    539    FLOAT_PARAMS(5, 10)
    540};
    541
    542static const FloatFmt float16_params_ahp = {
    543    FLOAT_PARAMS(5, 10),
    544    .arm_althp = true
    545};
    546
    547static const FloatFmt bfloat16_params = {
    548    FLOAT_PARAMS(8, 7)
    549};
    550
    551static const FloatFmt float32_params = {
    552    FLOAT_PARAMS(8, 23)
    553};
    554
    555static const FloatFmt float64_params = {
    556    FLOAT_PARAMS(11, 52)
    557};
    558
    559static const FloatFmt float128_params = {
    560    FLOAT_PARAMS(15, 112)
    561};
    562
    563#define FLOATX80_PARAMS(R)              \
    564    FLOAT_PARAMS_(15),                  \
    565    .frac_size = R == 64 ? 63 : R,      \
    566    .frac_shift = 0,                    \
    567    .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
    568
    569static const FloatFmt floatx80_params[3] = {
    570    [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
    571    [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
    572    [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
    573};
    574
    575/* Unpack a float to parts, but do not canonicalize.  */
    576static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
    577{
    578    const int f_size = fmt->frac_size;
    579    const int e_size = fmt->exp_size;
    580
    581    *r = (FloatParts64) {
    582        .cls = float_class_unclassified,
    583        .sign = extract64(raw, f_size + e_size, 1),
    584        .exp = extract64(raw, f_size, e_size),
    585        .frac = extract64(raw, 0, f_size)
    586    };
    587}
    588
    589static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
    590{
    591    unpack_raw64(p, &float16_params, f);
    592}
    593
    594static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
    595{
    596    unpack_raw64(p, &bfloat16_params, f);
    597}
    598
    599static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
    600{
    601    unpack_raw64(p, &float32_params, f);
    602}
    603
    604static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
    605{
    606    unpack_raw64(p, &float64_params, f);
    607}
    608
    609static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
    610{
    611    *p = (FloatParts128) {
    612        .cls = float_class_unclassified,
    613        .sign = extract32(f.high, 15, 1),
    614        .exp = extract32(f.high, 0, 15),
    615        .frac_hi = f.low
    616    };
    617}
    618
    619static void float128_unpack_raw(FloatParts128 *p, float128 f)
    620{
    621    const int f_size = float128_params.frac_size - 64;
    622    const int e_size = float128_params.exp_size;
    623
    624    *p = (FloatParts128) {
    625        .cls = float_class_unclassified,
    626        .sign = extract64(f.high, f_size + e_size, 1),
    627        .exp = extract64(f.high, f_size, e_size),
    628        .frac_hi = extract64(f.high, 0, f_size),
    629        .frac_lo = f.low,
    630    };
    631}
    632
    633/* Pack a float from parts, but do not canonicalize.  */
    634static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
    635{
    636    const int f_size = fmt->frac_size;
    637    const int e_size = fmt->exp_size;
    638    uint64_t ret;
    639
    640    ret = (uint64_t)p->sign << (f_size + e_size);
    641    ret = deposit64(ret, f_size, e_size, p->exp);
    642    ret = deposit64(ret, 0, f_size, p->frac);
    643    return ret;
    644}
    645
    646static inline float16 float16_pack_raw(const FloatParts64 *p)
    647{
    648    return make_float16(pack_raw64(p, &float16_params));
    649}
    650
    651static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
    652{
    653    return pack_raw64(p, &bfloat16_params);
    654}
    655
    656static inline float32 float32_pack_raw(const FloatParts64 *p)
    657{
    658    return make_float32(pack_raw64(p, &float32_params));
    659}
    660
    661static inline float64 float64_pack_raw(const FloatParts64 *p)
    662{
    663    return make_float64(pack_raw64(p, &float64_params));
    664}
    665
    666static float128 float128_pack_raw(const FloatParts128 *p)
    667{
    668    const int f_size = float128_params.frac_size - 64;
    669    const int e_size = float128_params.exp_size;
    670    uint64_t hi;
    671
    672    hi = (uint64_t)p->sign << (f_size + e_size);
    673    hi = deposit64(hi, f_size, e_size, p->exp);
    674    hi = deposit64(hi, 0, f_size, p->frac_hi);
    675    return make_float128(hi, p->frac_lo);
    676}
    677
    678/*----------------------------------------------------------------------------
    679| Functions and definitions to determine:  (1) whether tininess for underflow
    680| is detected before or after rounding by default, (2) what (if anything)
    681| happens when exceptions are raised, (3) how signaling NaNs are distinguished
    682| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
    683| are propagated from function inputs to output.  These details are target-
    684| specific.
    685*----------------------------------------------------------------------------*/
    686#include "softfloat-specialize.c.inc"
    687
    688#define PARTS_GENERIC_64_128(NAME, P) \
    689    _Generic((P), FloatParts64 *: parts64_##NAME, \
    690                  FloatParts128 *: parts128_##NAME)
    691
    692#define PARTS_GENERIC_64_128_256(NAME, P) \
    693    _Generic((P), FloatParts64 *: parts64_##NAME, \
    694                  FloatParts128 *: parts128_##NAME, \
    695                  FloatParts256 *: parts256_##NAME)
    696
    697#define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
    698#define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
    699
    700static void parts64_return_nan(FloatParts64 *a, float_status *s);
    701static void parts128_return_nan(FloatParts128 *a, float_status *s);
    702
    703#define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
    704
    705static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
    706                                      float_status *s);
    707static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
    708                                        float_status *s);
    709
    710#define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
    711
    712static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
    713                                             FloatParts64 *c, float_status *s,
    714                                             int ab_mask, int abc_mask);
    715static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
    716                                               FloatParts128 *b,
    717                                               FloatParts128 *c,
    718                                               float_status *s,
    719                                               int ab_mask, int abc_mask);
    720
    721#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
    722    PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
    723
    724static void parts64_canonicalize(FloatParts64 *p, float_status *status,
    725                                 const FloatFmt *fmt);
    726static void parts128_canonicalize(FloatParts128 *p, float_status *status,
    727                                  const FloatFmt *fmt);
    728
    729#define parts_canonicalize(A, S, F) \
    730    PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
    731
    732static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
    733                                   const FloatFmt *fmt);
    734static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
    735                                    const FloatFmt *fmt);
    736
    737#define parts_uncanon_normal(A, S, F) \
    738    PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
    739
    740static void parts64_uncanon(FloatParts64 *p, float_status *status,
    741                            const FloatFmt *fmt);
    742static void parts128_uncanon(FloatParts128 *p, float_status *status,
    743                             const FloatFmt *fmt);
    744
    745#define parts_uncanon(A, S, F) \
    746    PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
    747
    748static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
    749static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
    750static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
    751
    752#define parts_add_normal(A, B) \
    753    PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
    754
    755static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
    756static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
    757static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
    758
    759#define parts_sub_normal(A, B) \
    760    PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
    761
    762static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
    763                                    float_status *s, bool subtract);
    764static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
    765                                      float_status *s, bool subtract);
    766
    767#define parts_addsub(A, B, S, Z) \
    768    PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
    769
    770static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
    771                                 float_status *s);
    772static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
    773                                   float_status *s);
    774
    775#define parts_mul(A, B, S) \
    776    PARTS_GENERIC_64_128(mul, A)(A, B, S)
    777
    778static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
    779                                    FloatParts64 *c, int flags,
    780                                    float_status *s);
    781static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
    782                                      FloatParts128 *c, int flags,
    783                                      float_status *s);
    784
    785#define parts_muladd(A, B, C, Z, S) \
    786    PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
    787
    788static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
    789                                 float_status *s);
    790static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
    791                                   float_status *s);
    792
    793#define parts_div(A, B, S) \
    794    PARTS_GENERIC_64_128(div, A)(A, B, S)
    795
    796static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
    797                                    uint64_t *mod_quot, float_status *s);
    798static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
    799                                      uint64_t *mod_quot, float_status *s);
    800
    801#define parts_modrem(A, B, Q, S) \
    802    PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
    803
    804static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
    805static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
    806
    807#define parts_sqrt(A, S, F) \
    808    PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
    809
    810static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
    811                                        int scale, int frac_size);
    812static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
    813                                         int scale, int frac_size);
    814
    815#define parts_round_to_int_normal(A, R, C, F) \
    816    PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
    817
    818static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
    819                                 int scale, float_status *s,
    820                                 const FloatFmt *fmt);
    821static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
    822                                  int scale, float_status *s,
    823                                  const FloatFmt *fmt);
    824
    825#define parts_round_to_int(A, R, C, S, F) \
    826    PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
    827
    828static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
    829                                     int scale, int64_t min, int64_t max,
    830                                     float_status *s);
    831static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
    832                                     int scale, int64_t min, int64_t max,
    833                                     float_status *s);
    834
    835#define parts_float_to_sint(P, R, Z, MN, MX, S) \
    836    PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
    837
    838static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
    839                                      int scale, uint64_t max,
    840                                      float_status *s);
    841static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
    842                                       int scale, uint64_t max,
    843                                       float_status *s);
    844
    845#define parts_float_to_uint(P, R, Z, M, S) \
    846    PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
    847
    848static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
    849                                  int scale, float_status *s);
    850static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
    851                                   int scale, float_status *s);
    852
    853#define parts_sint_to_float(P, I, Z, S) \
    854    PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
    855
    856static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
    857                                  int scale, float_status *s);
    858static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
    859                                   int scale, float_status *s);
    860
    861#define parts_uint_to_float(P, I, Z, S) \
    862    PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
    863
    864static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
    865                                    float_status *s, int flags);
    866static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
    867                                      float_status *s, int flags);
    868
    869#define parts_minmax(A, B, S, F) \
    870    PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
    871
    872static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
    873                           float_status *s, bool q);
    874static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
    875                            float_status *s, bool q);
    876
    877#define parts_compare(A, B, S, Q) \
    878    PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
    879
    880static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
    881static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
    882
    883#define parts_scalbn(A, N, S) \
    884    PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
    885
    886static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
    887static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
    888
    889#define parts_log2(A, S, F) \
    890    PARTS_GENERIC_64_128(log2, A)(A, S, F)
    891
    892/*
    893 * Helper functions for softfloat-parts.c.inc, per-size operations.
    894 */
    895
    896#define FRAC_GENERIC_64_128(NAME, P) \
    897    _Generic((P), FloatParts64 *: frac64_##NAME, \
    898                  FloatParts128 *: frac128_##NAME)
    899
    900#define FRAC_GENERIC_64_128_256(NAME, P) \
    901    _Generic((P), FloatParts64 *: frac64_##NAME, \
    902                  FloatParts128 *: frac128_##NAME, \
    903                  FloatParts256 *: frac256_##NAME)
    904
    905static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
    906{
    907    return uadd64_overflow(a->frac, b->frac, &r->frac);
    908}
    909
    910static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
    911{
    912    bool c = 0;
    913    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
    914    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
    915    return c;
    916}
    917
    918static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
    919{
    920    bool c = 0;
    921    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
    922    r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
    923    r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
    924    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
    925    return c;
    926}
    927
    928#define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
    929
    930static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
    931{
    932    return uadd64_overflow(a->frac, c, &r->frac);
    933}
    934
    935static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
    936{
    937    c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
    938    return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
    939}
    940
    941#define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
    942
    943static void frac64_allones(FloatParts64 *a)
    944{
    945    a->frac = -1;
    946}
    947
    948static void frac128_allones(FloatParts128 *a)
    949{
    950    a->frac_hi = a->frac_lo = -1;
    951}
    952
    953#define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
    954
    955static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
    956{
    957    return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
    958}
    959
    960static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
    961{
    962    uint64_t ta = a->frac_hi, tb = b->frac_hi;
    963    if (ta == tb) {
    964        ta = a->frac_lo, tb = b->frac_lo;
    965        if (ta == tb) {
    966            return 0;
    967        }
    968    }
    969    return ta < tb ? -1 : 1;
    970}
    971
    972#define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
    973
    974static void frac64_clear(FloatParts64 *a)
    975{
    976    a->frac = 0;
    977}
    978
    979static void frac128_clear(FloatParts128 *a)
    980{
    981    a->frac_hi = a->frac_lo = 0;
    982}
    983
    984#define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
    985
    986static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
    987{
    988    uint64_t n1, n0, r, q;
    989    bool ret;
    990
    991    /*
    992     * We want a 2*N / N-bit division to produce exactly an N-bit
    993     * result, so that we do not lose any precision and so that we
    994     * do not have to renormalize afterward.  If A.frac < B.frac,
    995     * then division would produce an (N-1)-bit result; shift A left
    996     * by one to produce the an N-bit result, and return true to
    997     * decrement the exponent to match.
    998     *
    999     * The udiv_qrnnd algorithm that we're using requires normalization,
   1000     * i.e. the msb of the denominator must be set, which is already true.
   1001     */
   1002    ret = a->frac < b->frac;
   1003    if (ret) {
   1004        n0 = a->frac;
   1005        n1 = 0;
   1006    } else {
   1007        n0 = a->frac >> 1;
   1008        n1 = a->frac << 63;
   1009    }
   1010    q = udiv_qrnnd(&r, n0, n1, b->frac);
   1011
   1012    /* Set lsb if there is a remainder, to set inexact. */
   1013    a->frac = q | (r != 0);
   1014
   1015    return ret;
   1016}
   1017
   1018static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
   1019{
   1020    uint64_t q0, q1, a0, a1, b0, b1;
   1021    uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
   1022    bool ret = false;
   1023
   1024    a0 = a->frac_hi, a1 = a->frac_lo;
   1025    b0 = b->frac_hi, b1 = b->frac_lo;
   1026
   1027    ret = lt128(a0, a1, b0, b1);
   1028    if (!ret) {
   1029        a1 = shr_double(a0, a1, 1);
   1030        a0 = a0 >> 1;
   1031    }
   1032
   1033    /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
   1034    q0 = estimateDiv128To64(a0, a1, b0);
   1035
   1036    /*
   1037     * Estimate is high because B1 was not included (unless B1 == 0).
   1038     * Reduce quotient and increase remainder until remainder is non-negative.
   1039     * This loop will execute 0 to 2 times.
   1040     */
   1041    mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
   1042    sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
   1043    while (r0 != 0) {
   1044        q0--;
   1045        add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
   1046    }
   1047
   1048    /* Repeat using the remainder, producing a second word of quotient. */
   1049    q1 = estimateDiv128To64(r1, r2, b0);
   1050    mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
   1051    sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
   1052    while (r1 != 0) {
   1053        q1--;
   1054        add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
   1055    }
   1056
   1057    /* Any remainder indicates inexact; set sticky bit. */
   1058    q1 |= (r2 | r3) != 0;
   1059
   1060    a->frac_hi = q0;
   1061    a->frac_lo = q1;
   1062    return ret;
   1063}
   1064
   1065#define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
   1066
   1067static bool frac64_eqz(FloatParts64 *a)
   1068{
   1069    return a->frac == 0;
   1070}
   1071
   1072static bool frac128_eqz(FloatParts128 *a)
   1073{
   1074    return (a->frac_hi | a->frac_lo) == 0;
   1075}
   1076
   1077#define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
   1078
   1079static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
   1080{
   1081    mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
   1082}
   1083
   1084static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
   1085{
   1086    mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
   1087                &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
   1088}
   1089
   1090#define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
   1091
   1092static void frac64_neg(FloatParts64 *a)
   1093{
   1094    a->frac = -a->frac;
   1095}
   1096
   1097static void frac128_neg(FloatParts128 *a)
   1098{
   1099    bool c = 0;
   1100    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
   1101    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
   1102}
   1103
   1104static void frac256_neg(FloatParts256 *a)
   1105{
   1106    bool c = 0;
   1107    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
   1108    a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
   1109    a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
   1110    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
   1111}
   1112
   1113#define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
   1114
   1115static int frac64_normalize(FloatParts64 *a)
   1116{
   1117    if (a->frac) {
   1118        int shift = clz64(a->frac);
   1119        a->frac <<= shift;
   1120        return shift;
   1121    }
   1122    return 64;
   1123}
   1124
   1125static int frac128_normalize(FloatParts128 *a)
   1126{
   1127    if (a->frac_hi) {
   1128        int shl = clz64(a->frac_hi);
   1129        a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
   1130        a->frac_lo <<= shl;
   1131        return shl;
   1132    } else if (a->frac_lo) {
   1133        int shl = clz64(a->frac_lo);
   1134        a->frac_hi = a->frac_lo << shl;
   1135        a->frac_lo = 0;
   1136        return shl + 64;
   1137    }
   1138    return 128;
   1139}
   1140
   1141static int frac256_normalize(FloatParts256 *a)
   1142{
   1143    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
   1144    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
   1145    int ret, shl;
   1146
   1147    if (likely(a0)) {
   1148        shl = clz64(a0);
   1149        if (shl == 0) {
   1150            return 0;
   1151        }
   1152        ret = shl;
   1153    } else {
   1154        if (a1) {
   1155            ret = 64;
   1156            a0 = a1, a1 = a2, a2 = a3, a3 = 0;
   1157        } else if (a2) {
   1158            ret = 128;
   1159            a0 = a2, a1 = a3, a2 = 0, a3 = 0;
   1160        } else if (a3) {
   1161            ret = 192;
   1162            a0 = a3, a1 = 0, a2 = 0, a3 = 0;
   1163        } else {
   1164            ret = 256;
   1165            a0 = 0, a1 = 0, a2 = 0, a3 = 0;
   1166            goto done;
   1167        }
   1168        shl = clz64(a0);
   1169        if (shl == 0) {
   1170            goto done;
   1171        }
   1172        ret += shl;
   1173    }
   1174
   1175    a0 = shl_double(a0, a1, shl);
   1176    a1 = shl_double(a1, a2, shl);
   1177    a2 = shl_double(a2, a3, shl);
   1178    a3 <<= shl;
   1179
   1180 done:
   1181    a->frac_hi = a0;
   1182    a->frac_hm = a1;
   1183    a->frac_lm = a2;
   1184    a->frac_lo = a3;
   1185    return ret;
   1186}
   1187
   1188#define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
   1189
   1190static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
   1191{
   1192    uint64_t a0, a1, b0, t0, t1, q, quot;
   1193    int exp_diff = a->exp - b->exp;
   1194    int shift;
   1195
   1196    a0 = a->frac;
   1197    a1 = 0;
   1198
   1199    if (exp_diff < -1) {
   1200        if (mod_quot) {
   1201            *mod_quot = 0;
   1202        }
   1203        return;
   1204    }
   1205    if (exp_diff == -1) {
   1206        a0 >>= 1;
   1207        exp_diff = 0;
   1208    }
   1209
   1210    b0 = b->frac;
   1211    quot = q = b0 <= a0;
   1212    if (q) {
   1213        a0 -= b0;
   1214    }
   1215
   1216    exp_diff -= 64;
   1217    while (exp_diff > 0) {
   1218        q = estimateDiv128To64(a0, a1, b0);
   1219        q = q > 2 ? q - 2 : 0;
   1220        mul64To128(b0, q, &t0, &t1);
   1221        sub128(a0, a1, t0, t1, &a0, &a1);
   1222        shortShift128Left(a0, a1, 62, &a0, &a1);
   1223        exp_diff -= 62;
   1224        quot = (quot << 62) + q;
   1225    }
   1226
   1227    exp_diff += 64;
   1228    if (exp_diff > 0) {
   1229        q = estimateDiv128To64(a0, a1, b0);
   1230        q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
   1231        mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
   1232        sub128(a0, a1, t0, t1, &a0, &a1);
   1233        shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
   1234        while (le128(t0, t1, a0, a1)) {
   1235            ++q;
   1236            sub128(a0, a1, t0, t1, &a0, &a1);
   1237        }
   1238        quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
   1239    } else {
   1240        t0 = b0;
   1241        t1 = 0;
   1242    }
   1243
   1244    if (mod_quot) {
   1245        *mod_quot = quot;
   1246    } else {
   1247        sub128(t0, t1, a0, a1, &t0, &t1);
   1248        if (lt128(t0, t1, a0, a1) ||
   1249            (eq128(t0, t1, a0, a1) && (q & 1))) {
   1250            a0 = t0;
   1251            a1 = t1;
   1252            a->sign = !a->sign;
   1253        }
   1254    }
   1255
   1256    if (likely(a0)) {
   1257        shift = clz64(a0);
   1258        shortShift128Left(a0, a1, shift, &a0, &a1);
   1259    } else if (likely(a1)) {
   1260        shift = clz64(a1);
   1261        a0 = a1 << shift;
   1262        a1 = 0;
   1263        shift += 64;
   1264    } else {
   1265        a->cls = float_class_zero;
   1266        return;
   1267    }
   1268
   1269    a->exp = b->exp + exp_diff - shift;
   1270    a->frac = a0 | (a1 != 0);
   1271}
   1272
   1273static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
   1274                           uint64_t *mod_quot)
   1275{
   1276    uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
   1277    int exp_diff = a->exp - b->exp;
   1278    int shift;
   1279
   1280    a0 = a->frac_hi;
   1281    a1 = a->frac_lo;
   1282    a2 = 0;
   1283
   1284    if (exp_diff < -1) {
   1285        if (mod_quot) {
   1286            *mod_quot = 0;
   1287        }
   1288        return;
   1289    }
   1290    if (exp_diff == -1) {
   1291        shift128Right(a0, a1, 1, &a0, &a1);
   1292        exp_diff = 0;
   1293    }
   1294
   1295    b0 = b->frac_hi;
   1296    b1 = b->frac_lo;
   1297
   1298    quot = q = le128(b0, b1, a0, a1);
   1299    if (q) {
   1300        sub128(a0, a1, b0, b1, &a0, &a1);
   1301    }
   1302
   1303    exp_diff -= 64;
   1304    while (exp_diff > 0) {
   1305        q = estimateDiv128To64(a0, a1, b0);
   1306        q = q > 4 ? q - 4 : 0;
   1307        mul128By64To192(b0, b1, q, &t0, &t1, &t2);
   1308        sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
   1309        shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
   1310        exp_diff -= 61;
   1311        quot = (quot << 61) + q;
   1312    }
   1313
   1314    exp_diff += 64;
   1315    if (exp_diff > 0) {
   1316        q = estimateDiv128To64(a0, a1, b0);
   1317        q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
   1318        mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
   1319        sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
   1320        shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
   1321        while (le192(t0, t1, t2, a0, a1, a2)) {
   1322            ++q;
   1323            sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
   1324        }
   1325        quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
   1326    } else {
   1327        t0 = b0;
   1328        t1 = b1;
   1329        t2 = 0;
   1330    }
   1331
   1332    if (mod_quot) {
   1333        *mod_quot = quot;
   1334    } else {
   1335        sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
   1336        if (lt192(t0, t1, t2, a0, a1, a2) ||
   1337            (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
   1338            a0 = t0;
   1339            a1 = t1;
   1340            a2 = t2;
   1341            a->sign = !a->sign;
   1342        }
   1343    }
   1344
   1345    if (likely(a0)) {
   1346        shift = clz64(a0);
   1347        shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
   1348    } else if (likely(a1)) {
   1349        shift = clz64(a1);
   1350        shortShift128Left(a1, a2, shift, &a0, &a1);
   1351        a2 = 0;
   1352        shift += 64;
   1353    } else if (likely(a2)) {
   1354        shift = clz64(a2);
   1355        a0 = a2 << shift;
   1356        a1 = a2 = 0;
   1357        shift += 128;
   1358    } else {
   1359        a->cls = float_class_zero;
   1360        return;
   1361    }
   1362
   1363    a->exp = b->exp + exp_diff - shift;
   1364    a->frac_hi = a0;
   1365    a->frac_lo = a1 | (a2 != 0);
   1366}
   1367
   1368#define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
   1369
   1370static void frac64_shl(FloatParts64 *a, int c)
   1371{
   1372    a->frac <<= c;
   1373}
   1374
   1375static void frac128_shl(FloatParts128 *a, int c)
   1376{
   1377    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
   1378
   1379    if (c & 64) {
   1380        a0 = a1, a1 = 0;
   1381    }
   1382
   1383    c &= 63;
   1384    if (c) {
   1385        a0 = shl_double(a0, a1, c);
   1386        a1 = a1 << c;
   1387    }
   1388
   1389    a->frac_hi = a0;
   1390    a->frac_lo = a1;
   1391}
   1392
   1393#define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
   1394
   1395static void frac64_shr(FloatParts64 *a, int c)
   1396{
   1397    a->frac >>= c;
   1398}
   1399
   1400static void frac128_shr(FloatParts128 *a, int c)
   1401{
   1402    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
   1403
   1404    if (c & 64) {
   1405        a1 = a0, a0 = 0;
   1406    }
   1407
   1408    c &= 63;
   1409    if (c) {
   1410        a1 = shr_double(a0, a1, c);
   1411        a0 = a0 >> c;
   1412    }
   1413
   1414    a->frac_hi = a0;
   1415    a->frac_lo = a1;
   1416}
   1417
   1418#define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
   1419
   1420static void frac64_shrjam(FloatParts64 *a, int c)
   1421{
   1422    uint64_t a0 = a->frac;
   1423
   1424    if (likely(c != 0)) {
   1425        if (likely(c < 64)) {
   1426            a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
   1427        } else {
   1428            a0 = a0 != 0;
   1429        }
   1430        a->frac = a0;
   1431    }
   1432}
   1433
   1434static void frac128_shrjam(FloatParts128 *a, int c)
   1435{
   1436    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
   1437    uint64_t sticky = 0;
   1438
   1439    if (unlikely(c == 0)) {
   1440        return;
   1441    } else if (likely(c < 64)) {
   1442        /* nothing */
   1443    } else if (likely(c < 128)) {
   1444        sticky = a1;
   1445        a1 = a0;
   1446        a0 = 0;
   1447        c &= 63;
   1448        if (c == 0) {
   1449            goto done;
   1450        }
   1451    } else {
   1452        sticky = a0 | a1;
   1453        a0 = a1 = 0;
   1454        goto done;
   1455    }
   1456
   1457    sticky |= shr_double(a1, 0, c);
   1458    a1 = shr_double(a0, a1, c);
   1459    a0 = a0 >> c;
   1460
   1461 done:
   1462    a->frac_lo = a1 | (sticky != 0);
   1463    a->frac_hi = a0;
   1464}
   1465
   1466static void frac256_shrjam(FloatParts256 *a, int c)
   1467{
   1468    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
   1469    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
   1470    uint64_t sticky = 0;
   1471
   1472    if (unlikely(c == 0)) {
   1473        return;
   1474    } else if (likely(c < 64)) {
   1475        /* nothing */
   1476    } else if (likely(c < 256)) {
   1477        if (unlikely(c & 128)) {
   1478            sticky |= a2 | a3;
   1479            a3 = a1, a2 = a0, a1 = 0, a0 = 0;
   1480        }
   1481        if (unlikely(c & 64)) {
   1482            sticky |= a3;
   1483            a3 = a2, a2 = a1, a1 = a0, a0 = 0;
   1484        }
   1485        c &= 63;
   1486        if (c == 0) {
   1487            goto done;
   1488        }
   1489    } else {
   1490        sticky = a0 | a1 | a2 | a3;
   1491        a0 = a1 = a2 = a3 = 0;
   1492        goto done;
   1493    }
   1494
   1495    sticky |= shr_double(a3, 0, c);
   1496    a3 = shr_double(a2, a3, c);
   1497    a2 = shr_double(a1, a2, c);
   1498    a1 = shr_double(a0, a1, c);
   1499    a0 = a0 >> c;
   1500
   1501 done:
   1502    a->frac_lo = a3 | (sticky != 0);
   1503    a->frac_lm = a2;
   1504    a->frac_hm = a1;
   1505    a->frac_hi = a0;
   1506}
   1507
   1508#define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
   1509
   1510static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
   1511{
   1512    return usub64_overflow(a->frac, b->frac, &r->frac);
   1513}
   1514
   1515static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
   1516{
   1517    bool c = 0;
   1518    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
   1519    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
   1520    return c;
   1521}
   1522
   1523static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
   1524{
   1525    bool c = 0;
   1526    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
   1527    r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
   1528    r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
   1529    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
   1530    return c;
   1531}
   1532
   1533#define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
   1534
   1535static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
   1536{
   1537    r->frac = a->frac_hi | (a->frac_lo != 0);
   1538}
   1539
   1540static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
   1541{
   1542    r->frac_hi = a->frac_hi;
   1543    r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
   1544}
   1545
   1546#define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
   1547
   1548static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
   1549{
   1550    r->frac_hi = a->frac;
   1551    r->frac_lo = 0;
   1552}
   1553
   1554static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
   1555{
   1556    r->frac_hi = a->frac_hi;
   1557    r->frac_hm = a->frac_lo;
   1558    r->frac_lm = 0;
   1559    r->frac_lo = 0;
   1560}
   1561
   1562#define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
   1563
   1564/*
   1565 * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
   1566 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
   1567 * and thus MIT licenced.
   1568 */
   1569static const uint16_t rsqrt_tab[128] = {
   1570    0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
   1571    0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
   1572    0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
   1573    0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
   1574    0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
   1575    0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
   1576    0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
   1577    0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
   1578    0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
   1579    0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
   1580    0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
   1581    0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
   1582    0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
   1583    0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
   1584    0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
   1585    0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
   1586};
   1587
   1588#define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
   1589#define FloatPartsN    glue(FloatParts,N)
   1590#define FloatPartsW    glue(FloatParts,W)
   1591
   1592#define N 64
   1593#define W 128
   1594
   1595#include "softfloat-parts-addsub.c.inc"
   1596#include "softfloat-parts.c.inc"
   1597
   1598#undef  N
   1599#undef  W
   1600#define N 128
   1601#define W 256
   1602
   1603#include "softfloat-parts-addsub.c.inc"
   1604#include "softfloat-parts.c.inc"
   1605
   1606#undef  N
   1607#undef  W
   1608#define N            256
   1609
   1610#include "softfloat-parts-addsub.c.inc"
   1611
   1612#undef  N
   1613#undef  W
   1614#undef  partsN
   1615#undef  FloatPartsN
   1616#undef  FloatPartsW
   1617
   1618/*
   1619 * Pack/unpack routines with a specific FloatFmt.
   1620 */
   1621
   1622static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
   1623                                      float_status *s, const FloatFmt *params)
   1624{
   1625    float16_unpack_raw(p, f);
   1626    parts_canonicalize(p, s, params);
   1627}
   1628
   1629static void float16_unpack_canonical(FloatParts64 *p, float16 f,
   1630                                     float_status *s)
   1631{
   1632    float16a_unpack_canonical(p, f, s, &float16_params);
   1633}
   1634
   1635static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
   1636                                      float_status *s)
   1637{
   1638    bfloat16_unpack_raw(p, f);
   1639    parts_canonicalize(p, s, &bfloat16_params);
   1640}
   1641
   1642static float16 float16a_round_pack_canonical(FloatParts64 *p,
   1643                                             float_status *s,
   1644                                             const FloatFmt *params)
   1645{
   1646    parts_uncanon(p, s, params);
   1647    return float16_pack_raw(p);
   1648}
   1649
   1650static float16 float16_round_pack_canonical(FloatParts64 *p,
   1651                                            float_status *s)
   1652{
   1653    return float16a_round_pack_canonical(p, s, &float16_params);
   1654}
   1655
   1656static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
   1657                                              float_status *s)
   1658{
   1659    parts_uncanon(p, s, &bfloat16_params);
   1660    return bfloat16_pack_raw(p);
   1661}
   1662
   1663static void float32_unpack_canonical(FloatParts64 *p, float32 f,
   1664                                     float_status *s)
   1665{
   1666    float32_unpack_raw(p, f);
   1667    parts_canonicalize(p, s, &float32_params);
   1668}
   1669
   1670static float32 float32_round_pack_canonical(FloatParts64 *p,
   1671                                            float_status *s)
   1672{
   1673    parts_uncanon(p, s, &float32_params);
   1674    return float32_pack_raw(p);
   1675}
   1676
   1677static void float64_unpack_canonical(FloatParts64 *p, float64 f,
   1678                                     float_status *s)
   1679{
   1680    float64_unpack_raw(p, f);
   1681    parts_canonicalize(p, s, &float64_params);
   1682}
   1683
   1684static float64 float64_round_pack_canonical(FloatParts64 *p,
   1685                                            float_status *s)
   1686{
   1687    parts_uncanon(p, s, &float64_params);
   1688    return float64_pack_raw(p);
   1689}
   1690
   1691static void float128_unpack_canonical(FloatParts128 *p, float128 f,
   1692                                      float_status *s)
   1693{
   1694    float128_unpack_raw(p, f);
   1695    parts_canonicalize(p, s, &float128_params);
   1696}
   1697
   1698static float128 float128_round_pack_canonical(FloatParts128 *p,
   1699                                              float_status *s)
   1700{
   1701    parts_uncanon(p, s, &float128_params);
   1702    return float128_pack_raw(p);
   1703}
   1704
   1705/* Returns false if the encoding is invalid. */
   1706static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
   1707                                      float_status *s)
   1708{
   1709    /* Ensure rounding precision is set before beginning. */
   1710    switch (s->floatx80_rounding_precision) {
   1711    case floatx80_precision_x:
   1712    case floatx80_precision_d:
   1713    case floatx80_precision_s:
   1714        break;
   1715    default:
   1716        g_assert_not_reached();
   1717    }
   1718
   1719    if (unlikely(floatx80_invalid_encoding(f))) {
   1720        float_raise(float_flag_invalid, s);
   1721        return false;
   1722    }
   1723
   1724    floatx80_unpack_raw(p, f);
   1725
   1726    if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
   1727        parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
   1728    } else {
   1729        /* The explicit integer bit is ignored, after invalid checks. */
   1730        p->frac_hi &= MAKE_64BIT_MASK(0, 63);
   1731        p->cls = (p->frac_hi == 0 ? float_class_inf
   1732                  : parts_is_snan_frac(p->frac_hi, s)
   1733                  ? float_class_snan : float_class_qnan);
   1734    }
   1735    return true;
   1736}
   1737
   1738static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
   1739                                              float_status *s)
   1740{
   1741    const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
   1742    uint64_t frac;
   1743    int exp;
   1744
   1745    switch (p->cls) {
   1746    case float_class_normal:
   1747        if (s->floatx80_rounding_precision == floatx80_precision_x) {
   1748            parts_uncanon_normal(p, s, fmt);
   1749            frac = p->frac_hi;
   1750            exp = p->exp;
   1751        } else {
   1752            FloatParts64 p64;
   1753
   1754            p64.sign = p->sign;
   1755            p64.exp = p->exp;
   1756            frac_truncjam(&p64, p);
   1757            parts_uncanon_normal(&p64, s, fmt);
   1758            frac = p64.frac;
   1759            exp = p64.exp;
   1760        }
   1761        if (exp != fmt->exp_max) {
   1762            break;
   1763        }
   1764        /* rounded to inf -- fall through to set frac correctly */
   1765
   1766    case float_class_inf:
   1767        /* x86 and m68k differ in the setting of the integer bit. */
   1768        frac = floatx80_infinity_low;
   1769        exp = fmt->exp_max;
   1770        break;
   1771
   1772    case float_class_zero:
   1773        frac = 0;
   1774        exp = 0;
   1775        break;
   1776
   1777    case float_class_snan:
   1778    case float_class_qnan:
   1779        /* NaNs have the integer bit set. */
   1780        frac = p->frac_hi | (1ull << 63);
   1781        exp = fmt->exp_max;
   1782        break;
   1783
   1784    default:
   1785        g_assert_not_reached();
   1786    }
   1787
   1788    return packFloatx80(p->sign, exp, frac);
   1789}
   1790
   1791/*
   1792 * Addition and subtraction
   1793 */
   1794
   1795static float16 QEMU_FLATTEN
   1796float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
   1797{
   1798    FloatParts64 pa, pb, *pr;
   1799
   1800    float16_unpack_canonical(&pa, a, status);
   1801    float16_unpack_canonical(&pb, b, status);
   1802    pr = parts_addsub(&pa, &pb, status, subtract);
   1803
   1804    return float16_round_pack_canonical(pr, status);
   1805}
   1806
   1807float16 float16_add(float16 a, float16 b, float_status *status)
   1808{
   1809    return float16_addsub(a, b, status, false);
   1810}
   1811
   1812float16 float16_sub(float16 a, float16 b, float_status *status)
   1813{
   1814    return float16_addsub(a, b, status, true);
   1815}
   1816
   1817static float32 QEMU_SOFTFLOAT_ATTR
   1818soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
   1819{
   1820    FloatParts64 pa, pb, *pr;
   1821
   1822    float32_unpack_canonical(&pa, a, status);
   1823    float32_unpack_canonical(&pb, b, status);
   1824    pr = parts_addsub(&pa, &pb, status, subtract);
   1825
   1826    return float32_round_pack_canonical(pr, status);
   1827}
   1828
   1829static float32 soft_f32_add(float32 a, float32 b, float_status *status)
   1830{
   1831    return soft_f32_addsub(a, b, status, false);
   1832}
   1833
   1834static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
   1835{
   1836    return soft_f32_addsub(a, b, status, true);
   1837}
   1838
   1839static float64 QEMU_SOFTFLOAT_ATTR
   1840soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
   1841{
   1842    FloatParts64 pa, pb, *pr;
   1843
   1844    float64_unpack_canonical(&pa, a, status);
   1845    float64_unpack_canonical(&pb, b, status);
   1846    pr = parts_addsub(&pa, &pb, status, subtract);
   1847
   1848    return float64_round_pack_canonical(pr, status);
   1849}
   1850
   1851static float64 soft_f64_add(float64 a, float64 b, float_status *status)
   1852{
   1853    return soft_f64_addsub(a, b, status, false);
   1854}
   1855
   1856static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
   1857{
   1858    return soft_f64_addsub(a, b, status, true);
   1859}
   1860
   1861static float hard_f32_add(float a, float b)
   1862{
   1863    return a + b;
   1864}
   1865
   1866static float hard_f32_sub(float a, float b)
   1867{
   1868    return a - b;
   1869}
   1870
   1871static double hard_f64_add(double a, double b)
   1872{
   1873    return a + b;
   1874}
   1875
   1876static double hard_f64_sub(double a, double b)
   1877{
   1878    return a - b;
   1879}
   1880
   1881static bool f32_addsubmul_post(union_float32 a, union_float32 b)
   1882{
   1883    if (QEMU_HARDFLOAT_2F32_USE_FP) {
   1884        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
   1885    }
   1886    return !(float32_is_zero(a.s) && float32_is_zero(b.s));
   1887}
   1888
   1889static bool f64_addsubmul_post(union_float64 a, union_float64 b)
   1890{
   1891    if (QEMU_HARDFLOAT_2F64_USE_FP) {
   1892        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
   1893    } else {
   1894        return !(float64_is_zero(a.s) && float64_is_zero(b.s));
   1895    }
   1896}
   1897
   1898static float32 float32_addsub(float32 a, float32 b, float_status *s,
   1899                              hard_f32_op2_fn hard, soft_f32_op2_fn soft)
   1900{
   1901    return float32_gen2(a, b, s, hard, soft,
   1902                        f32_is_zon2, f32_addsubmul_post);
   1903}
   1904
   1905static float64 float64_addsub(float64 a, float64 b, float_status *s,
   1906                              hard_f64_op2_fn hard, soft_f64_op2_fn soft)
   1907{
   1908    return float64_gen2(a, b, s, hard, soft,
   1909                        f64_is_zon2, f64_addsubmul_post);
   1910}
   1911
   1912float32 QEMU_FLATTEN
   1913float32_add(float32 a, float32 b, float_status *s)
   1914{
   1915    return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
   1916}
   1917
   1918float32 QEMU_FLATTEN
   1919float32_sub(float32 a, float32 b, float_status *s)
   1920{
   1921    return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
   1922}
   1923
   1924float64 QEMU_FLATTEN
   1925float64_add(float64 a, float64 b, float_status *s)
   1926{
   1927    return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
   1928}
   1929
   1930float64 QEMU_FLATTEN
   1931float64_sub(float64 a, float64 b, float_status *s)
   1932{
   1933    return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
   1934}
   1935
   1936static bfloat16 QEMU_FLATTEN
   1937bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
   1938{
   1939    FloatParts64 pa, pb, *pr;
   1940
   1941    bfloat16_unpack_canonical(&pa, a, status);
   1942    bfloat16_unpack_canonical(&pb, b, status);
   1943    pr = parts_addsub(&pa, &pb, status, subtract);
   1944
   1945    return bfloat16_round_pack_canonical(pr, status);
   1946}
   1947
   1948bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
   1949{
   1950    return bfloat16_addsub(a, b, status, false);
   1951}
   1952
   1953bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
   1954{
   1955    return bfloat16_addsub(a, b, status, true);
   1956}
   1957
   1958static float128 QEMU_FLATTEN
   1959float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
   1960{
   1961    FloatParts128 pa, pb, *pr;
   1962
   1963    float128_unpack_canonical(&pa, a, status);
   1964    float128_unpack_canonical(&pb, b, status);
   1965    pr = parts_addsub(&pa, &pb, status, subtract);
   1966
   1967    return float128_round_pack_canonical(pr, status);
   1968}
   1969
   1970float128 float128_add(float128 a, float128 b, float_status *status)
   1971{
   1972    return float128_addsub(a, b, status, false);
   1973}
   1974
   1975float128 float128_sub(float128 a, float128 b, float_status *status)
   1976{
   1977    return float128_addsub(a, b, status, true);
   1978}
   1979
   1980static floatx80 QEMU_FLATTEN
   1981floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
   1982{
   1983    FloatParts128 pa, pb, *pr;
   1984
   1985    if (!floatx80_unpack_canonical(&pa, a, status) ||
   1986        !floatx80_unpack_canonical(&pb, b, status)) {
   1987        return floatx80_default_nan(status);
   1988    }
   1989
   1990    pr = parts_addsub(&pa, &pb, status, subtract);
   1991    return floatx80_round_pack_canonical(pr, status);
   1992}
   1993
   1994floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
   1995{
   1996    return floatx80_addsub(a, b, status, false);
   1997}
   1998
   1999floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
   2000{
   2001    return floatx80_addsub(a, b, status, true);
   2002}
   2003
   2004/*
   2005 * Multiplication
   2006 */
   2007
   2008float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
   2009{
   2010    FloatParts64 pa, pb, *pr;
   2011
   2012    float16_unpack_canonical(&pa, a, status);
   2013    float16_unpack_canonical(&pb, b, status);
   2014    pr = parts_mul(&pa, &pb, status);
   2015
   2016    return float16_round_pack_canonical(pr, status);
   2017}
   2018
   2019static float32 QEMU_SOFTFLOAT_ATTR
   2020soft_f32_mul(float32 a, float32 b, float_status *status)
   2021{
   2022    FloatParts64 pa, pb, *pr;
   2023
   2024    float32_unpack_canonical(&pa, a, status);
   2025    float32_unpack_canonical(&pb, b, status);
   2026    pr = parts_mul(&pa, &pb, status);
   2027
   2028    return float32_round_pack_canonical(pr, status);
   2029}
   2030
   2031static float64 QEMU_SOFTFLOAT_ATTR
   2032soft_f64_mul(float64 a, float64 b, float_status *status)
   2033{
   2034    FloatParts64 pa, pb, *pr;
   2035
   2036    float64_unpack_canonical(&pa, a, status);
   2037    float64_unpack_canonical(&pb, b, status);
   2038    pr = parts_mul(&pa, &pb, status);
   2039
   2040    return float64_round_pack_canonical(pr, status);
   2041}
   2042
   2043static float hard_f32_mul(float a, float b)
   2044{
   2045    return a * b;
   2046}
   2047
   2048static double hard_f64_mul(double a, double b)
   2049{
   2050    return a * b;
   2051}
   2052
   2053float32 QEMU_FLATTEN
   2054float32_mul(float32 a, float32 b, float_status *s)
   2055{
   2056    return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
   2057                        f32_is_zon2, f32_addsubmul_post);
   2058}
   2059
   2060float64 QEMU_FLATTEN
   2061float64_mul(float64 a, float64 b, float_status *s)
   2062{
   2063    return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
   2064                        f64_is_zon2, f64_addsubmul_post);
   2065}
   2066
   2067bfloat16 QEMU_FLATTEN
   2068bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
   2069{
   2070    FloatParts64 pa, pb, *pr;
   2071
   2072    bfloat16_unpack_canonical(&pa, a, status);
   2073    bfloat16_unpack_canonical(&pb, b, status);
   2074    pr = parts_mul(&pa, &pb, status);
   2075
   2076    return bfloat16_round_pack_canonical(pr, status);
   2077}
   2078
   2079float128 QEMU_FLATTEN
   2080float128_mul(float128 a, float128 b, float_status *status)
   2081{
   2082    FloatParts128 pa, pb, *pr;
   2083
   2084    float128_unpack_canonical(&pa, a, status);
   2085    float128_unpack_canonical(&pb, b, status);
   2086    pr = parts_mul(&pa, &pb, status);
   2087
   2088    return float128_round_pack_canonical(pr, status);
   2089}
   2090
   2091floatx80 QEMU_FLATTEN
   2092floatx80_mul(floatx80 a, floatx80 b, float_status *status)
   2093{
   2094    FloatParts128 pa, pb, *pr;
   2095
   2096    if (!floatx80_unpack_canonical(&pa, a, status) ||
   2097        !floatx80_unpack_canonical(&pb, b, status)) {
   2098        return floatx80_default_nan(status);
   2099    }
   2100
   2101    pr = parts_mul(&pa, &pb, status);
   2102    return floatx80_round_pack_canonical(pr, status);
   2103}
   2104
   2105/*
   2106 * Fused multiply-add
   2107 */
   2108
   2109float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
   2110                                    int flags, float_status *status)
   2111{
   2112    FloatParts64 pa, pb, pc, *pr;
   2113
   2114    float16_unpack_canonical(&pa, a, status);
   2115    float16_unpack_canonical(&pb, b, status);
   2116    float16_unpack_canonical(&pc, c, status);
   2117    pr = parts_muladd(&pa, &pb, &pc, flags, status);
   2118
   2119    return float16_round_pack_canonical(pr, status);
   2120}
   2121
   2122static float32 QEMU_SOFTFLOAT_ATTR
   2123soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
   2124                float_status *status)
   2125{
   2126    FloatParts64 pa, pb, pc, *pr;
   2127
   2128    float32_unpack_canonical(&pa, a, status);
   2129    float32_unpack_canonical(&pb, b, status);
   2130    float32_unpack_canonical(&pc, c, status);
   2131    pr = parts_muladd(&pa, &pb, &pc, flags, status);
   2132
   2133    return float32_round_pack_canonical(pr, status);
   2134}
   2135
   2136static float64 QEMU_SOFTFLOAT_ATTR
   2137soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
   2138                float_status *status)
   2139{
   2140    FloatParts64 pa, pb, pc, *pr;
   2141
   2142    float64_unpack_canonical(&pa, a, status);
   2143    float64_unpack_canonical(&pb, b, status);
   2144    float64_unpack_canonical(&pc, c, status);
   2145    pr = parts_muladd(&pa, &pb, &pc, flags, status);
   2146
   2147    return float64_round_pack_canonical(pr, status);
   2148}
   2149
   2150static bool force_soft_fma;
   2151
   2152float32 QEMU_FLATTEN
   2153float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
   2154{
   2155    union_float32 ua, ub, uc, ur;
   2156
   2157    ua.s = xa;
   2158    ub.s = xb;
   2159    uc.s = xc;
   2160
   2161    if (unlikely(!can_use_fpu(s))) {
   2162        goto soft;
   2163    }
   2164    if (unlikely(flags & float_muladd_halve_result)) {
   2165        goto soft;
   2166    }
   2167
   2168    float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
   2169    if (unlikely(!f32_is_zon3(ua, ub, uc))) {
   2170        goto soft;
   2171    }
   2172
   2173    if (unlikely(force_soft_fma)) {
   2174        goto soft;
   2175    }
   2176
   2177    /*
   2178     * When (a || b) == 0, there's no need to check for under/over flow,
   2179     * since we know the addend is (normal || 0) and the product is 0.
   2180     */
   2181    if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
   2182        union_float32 up;
   2183        bool prod_sign;
   2184
   2185        prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
   2186        prod_sign ^= !!(flags & float_muladd_negate_product);
   2187        up.s = float32_set_sign(float32_zero, prod_sign);
   2188
   2189        if (flags & float_muladd_negate_c) {
   2190            uc.h = -uc.h;
   2191        }
   2192        ur.h = up.h + uc.h;
   2193    } else {
   2194        union_float32 ua_orig = ua;
   2195        union_float32 uc_orig = uc;
   2196
   2197        if (flags & float_muladd_negate_product) {
   2198            ua.h = -ua.h;
   2199        }
   2200        if (flags & float_muladd_negate_c) {
   2201            uc.h = -uc.h;
   2202        }
   2203
   2204        ur.h = fmaf(ua.h, ub.h, uc.h);
   2205
   2206        if (unlikely(f32_is_inf(ur))) {
   2207            float_raise(float_flag_overflow, s);
   2208        } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
   2209            ua = ua_orig;
   2210            uc = uc_orig;
   2211            goto soft;
   2212        }
   2213    }
   2214    if (flags & float_muladd_negate_result) {
   2215        return float32_chs(ur.s);
   2216    }
   2217    return ur.s;
   2218
   2219 soft:
   2220    return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
   2221}
   2222
   2223float64 QEMU_FLATTEN
   2224float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
   2225{
   2226    union_float64 ua, ub, uc, ur;
   2227
   2228    ua.s = xa;
   2229    ub.s = xb;
   2230    uc.s = xc;
   2231
   2232    if (unlikely(!can_use_fpu(s))) {
   2233        goto soft;
   2234    }
   2235    if (unlikely(flags & float_muladd_halve_result)) {
   2236        goto soft;
   2237    }
   2238
   2239    float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
   2240    if (unlikely(!f64_is_zon3(ua, ub, uc))) {
   2241        goto soft;
   2242    }
   2243
   2244    if (unlikely(force_soft_fma)) {
   2245        goto soft;
   2246    }
   2247
   2248    /*
   2249     * When (a || b) == 0, there's no need to check for under/over flow,
   2250     * since we know the addend is (normal || 0) and the product is 0.
   2251     */
   2252    if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
   2253        union_float64 up;
   2254        bool prod_sign;
   2255
   2256        prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
   2257        prod_sign ^= !!(flags & float_muladd_negate_product);
   2258        up.s = float64_set_sign(float64_zero, prod_sign);
   2259
   2260        if (flags & float_muladd_negate_c) {
   2261            uc.h = -uc.h;
   2262        }
   2263        ur.h = up.h + uc.h;
   2264    } else {
   2265        union_float64 ua_orig = ua;
   2266        union_float64 uc_orig = uc;
   2267
   2268        if (flags & float_muladd_negate_product) {
   2269            ua.h = -ua.h;
   2270        }
   2271        if (flags & float_muladd_negate_c) {
   2272            uc.h = -uc.h;
   2273        }
   2274
   2275        ur.h = fma(ua.h, ub.h, uc.h);
   2276
   2277        if (unlikely(f64_is_inf(ur))) {
   2278            float_raise(float_flag_overflow, s);
   2279        } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
   2280            ua = ua_orig;
   2281            uc = uc_orig;
   2282            goto soft;
   2283        }
   2284    }
   2285    if (flags & float_muladd_negate_result) {
   2286        return float64_chs(ur.s);
   2287    }
   2288    return ur.s;
   2289
   2290 soft:
   2291    return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
   2292}
   2293
   2294bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
   2295                                      int flags, float_status *status)
   2296{
   2297    FloatParts64 pa, pb, pc, *pr;
   2298
   2299    bfloat16_unpack_canonical(&pa, a, status);
   2300    bfloat16_unpack_canonical(&pb, b, status);
   2301    bfloat16_unpack_canonical(&pc, c, status);
   2302    pr = parts_muladd(&pa, &pb, &pc, flags, status);
   2303
   2304    return bfloat16_round_pack_canonical(pr, status);
   2305}
   2306
   2307float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
   2308                                      int flags, float_status *status)
   2309{
   2310    FloatParts128 pa, pb, pc, *pr;
   2311
   2312    float128_unpack_canonical(&pa, a, status);
   2313    float128_unpack_canonical(&pb, b, status);
   2314    float128_unpack_canonical(&pc, c, status);
   2315    pr = parts_muladd(&pa, &pb, &pc, flags, status);
   2316
   2317    return float128_round_pack_canonical(pr, status);
   2318}
   2319
   2320/*
   2321 * Division
   2322 */
   2323
   2324float16 float16_div(float16 a, float16 b, float_status *status)
   2325{
   2326    FloatParts64 pa, pb, *pr;
   2327
   2328    float16_unpack_canonical(&pa, a, status);
   2329    float16_unpack_canonical(&pb, b, status);
   2330    pr = parts_div(&pa, &pb, status);
   2331
   2332    return float16_round_pack_canonical(pr, status);
   2333}
   2334
   2335static float32 QEMU_SOFTFLOAT_ATTR
   2336soft_f32_div(float32 a, float32 b, float_status *status)
   2337{
   2338    FloatParts64 pa, pb, *pr;
   2339
   2340    float32_unpack_canonical(&pa, a, status);
   2341    float32_unpack_canonical(&pb, b, status);
   2342    pr = parts_div(&pa, &pb, status);
   2343
   2344    return float32_round_pack_canonical(pr, status);
   2345}
   2346
   2347static float64 QEMU_SOFTFLOAT_ATTR
   2348soft_f64_div(float64 a, float64 b, float_status *status)
   2349{
   2350    FloatParts64 pa, pb, *pr;
   2351
   2352    float64_unpack_canonical(&pa, a, status);
   2353    float64_unpack_canonical(&pb, b, status);
   2354    pr = parts_div(&pa, &pb, status);
   2355
   2356    return float64_round_pack_canonical(pr, status);
   2357}
   2358
   2359static float hard_f32_div(float a, float b)
   2360{
   2361    return a / b;
   2362}
   2363
   2364static double hard_f64_div(double a, double b)
   2365{
   2366    return a / b;
   2367}
   2368
   2369static bool f32_div_pre(union_float32 a, union_float32 b)
   2370{
   2371    if (QEMU_HARDFLOAT_2F32_USE_FP) {
   2372        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
   2373               fpclassify(b.h) == FP_NORMAL;
   2374    }
   2375    return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
   2376}
   2377
   2378static bool f64_div_pre(union_float64 a, union_float64 b)
   2379{
   2380    if (QEMU_HARDFLOAT_2F64_USE_FP) {
   2381        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
   2382               fpclassify(b.h) == FP_NORMAL;
   2383    }
   2384    return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
   2385}
   2386
   2387static bool f32_div_post(union_float32 a, union_float32 b)
   2388{
   2389    if (QEMU_HARDFLOAT_2F32_USE_FP) {
   2390        return fpclassify(a.h) != FP_ZERO;
   2391    }
   2392    return !float32_is_zero(a.s);
   2393}
   2394
   2395static bool f64_div_post(union_float64 a, union_float64 b)
   2396{
   2397    if (QEMU_HARDFLOAT_2F64_USE_FP) {
   2398        return fpclassify(a.h) != FP_ZERO;
   2399    }
   2400    return !float64_is_zero(a.s);
   2401}
   2402
   2403float32 QEMU_FLATTEN
   2404float32_div(float32 a, float32 b, float_status *s)
   2405{
   2406    return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
   2407                        f32_div_pre, f32_div_post);
   2408}
   2409
   2410float64 QEMU_FLATTEN
   2411float64_div(float64 a, float64 b, float_status *s)
   2412{
   2413    return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
   2414                        f64_div_pre, f64_div_post);
   2415}
   2416
   2417bfloat16 QEMU_FLATTEN
   2418bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
   2419{
   2420    FloatParts64 pa, pb, *pr;
   2421
   2422    bfloat16_unpack_canonical(&pa, a, status);
   2423    bfloat16_unpack_canonical(&pb, b, status);
   2424    pr = parts_div(&pa, &pb, status);
   2425
   2426    return bfloat16_round_pack_canonical(pr, status);
   2427}
   2428
   2429float128 QEMU_FLATTEN
   2430float128_div(float128 a, float128 b, float_status *status)
   2431{
   2432    FloatParts128 pa, pb, *pr;
   2433
   2434    float128_unpack_canonical(&pa, a, status);
   2435    float128_unpack_canonical(&pb, b, status);
   2436    pr = parts_div(&pa, &pb, status);
   2437
   2438    return float128_round_pack_canonical(pr, status);
   2439}
   2440
   2441floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
   2442{
   2443    FloatParts128 pa, pb, *pr;
   2444
   2445    if (!floatx80_unpack_canonical(&pa, a, status) ||
   2446        !floatx80_unpack_canonical(&pb, b, status)) {
   2447        return floatx80_default_nan(status);
   2448    }
   2449
   2450    pr = parts_div(&pa, &pb, status);
   2451    return floatx80_round_pack_canonical(pr, status);
   2452}
   2453
   2454/*
   2455 * Remainder
   2456 */
   2457
   2458float32 float32_rem(float32 a, float32 b, float_status *status)
   2459{
   2460    FloatParts64 pa, pb, *pr;
   2461
   2462    float32_unpack_canonical(&pa, a, status);
   2463    float32_unpack_canonical(&pb, b, status);
   2464    pr = parts_modrem(&pa, &pb, NULL, status);
   2465
   2466    return float32_round_pack_canonical(pr, status);
   2467}
   2468
   2469float64 float64_rem(float64 a, float64 b, float_status *status)
   2470{
   2471    FloatParts64 pa, pb, *pr;
   2472
   2473    float64_unpack_canonical(&pa, a, status);
   2474    float64_unpack_canonical(&pb, b, status);
   2475    pr = parts_modrem(&pa, &pb, NULL, status);
   2476
   2477    return float64_round_pack_canonical(pr, status);
   2478}
   2479
   2480float128 float128_rem(float128 a, float128 b, float_status *status)
   2481{
   2482    FloatParts128 pa, pb, *pr;
   2483
   2484    float128_unpack_canonical(&pa, a, status);
   2485    float128_unpack_canonical(&pb, b, status);
   2486    pr = parts_modrem(&pa, &pb, NULL, status);
   2487
   2488    return float128_round_pack_canonical(pr, status);
   2489}
   2490
   2491/*
   2492 * Returns the remainder of the extended double-precision floating-point value
   2493 * `a' with respect to the corresponding value `b'.
   2494 * If 'mod' is false, the operation is performed according to the IEC/IEEE
   2495 * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
   2496 * the remainder based on truncating the quotient toward zero instead and
   2497 * *quotient is set to the low 64 bits of the absolute value of the integer
   2498 * quotient.
   2499 */
   2500floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
   2501                         uint64_t *quotient, float_status *status)
   2502{
   2503    FloatParts128 pa, pb, *pr;
   2504
   2505    *quotient = 0;
   2506    if (!floatx80_unpack_canonical(&pa, a, status) ||
   2507        !floatx80_unpack_canonical(&pb, b, status)) {
   2508        return floatx80_default_nan(status);
   2509    }
   2510    pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
   2511
   2512    return floatx80_round_pack_canonical(pr, status);
   2513}
   2514
   2515floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
   2516{
   2517    uint64_t quotient;
   2518    return floatx80_modrem(a, b, false, &quotient, status);
   2519}
   2520
   2521floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
   2522{
   2523    uint64_t quotient;
   2524    return floatx80_modrem(a, b, true, &quotient, status);
   2525}
   2526
   2527/*
   2528 * Float to Float conversions
   2529 *
   2530 * Returns the result of converting one float format to another. The
   2531 * conversion is performed according to the IEC/IEEE Standard for
   2532 * Binary Floating-Point Arithmetic.
   2533 *
   2534 * Usually this only needs to take care of raising invalid exceptions
   2535 * and handling the conversion on NaNs.
   2536 */
   2537
   2538static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
   2539{
   2540    switch (a->cls) {
   2541    case float_class_qnan:
   2542    case float_class_snan:
   2543        /*
   2544         * There is no NaN in the destination format.  Raise Invalid
   2545         * and return a zero with the sign of the input NaN.
   2546         */
   2547        float_raise(float_flag_invalid, s);
   2548        a->cls = float_class_zero;
   2549        break;
   2550
   2551    case float_class_inf:
   2552        /*
   2553         * There is no Inf in the destination format.  Raise Invalid
   2554         * and return the maximum normal with the correct sign.
   2555         */
   2556        float_raise(float_flag_invalid, s);
   2557        a->cls = float_class_normal;
   2558        a->exp = float16_params_ahp.exp_max;
   2559        a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
   2560                                  float16_params_ahp.frac_size + 1);
   2561        break;
   2562
   2563    case float_class_normal:
   2564    case float_class_zero:
   2565        break;
   2566
   2567    default:
   2568        g_assert_not_reached();
   2569    }
   2570}
   2571
   2572static void parts64_float_to_float(FloatParts64 *a, float_status *s)
   2573{
   2574    if (is_nan(a->cls)) {
   2575        parts_return_nan(a, s);
   2576    }
   2577}
   2578
   2579static void parts128_float_to_float(FloatParts128 *a, float_status *s)
   2580{
   2581    if (is_nan(a->cls)) {
   2582        parts_return_nan(a, s);
   2583    }
   2584}
   2585
   2586#define parts_float_to_float(P, S) \
   2587    PARTS_GENERIC_64_128(float_to_float, P)(P, S)
   2588
   2589static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
   2590                                        float_status *s)
   2591{
   2592    a->cls = b->cls;
   2593    a->sign = b->sign;
   2594    a->exp = b->exp;
   2595
   2596    if (a->cls == float_class_normal) {
   2597        frac_truncjam(a, b);
   2598    } else if (is_nan(a->cls)) {
   2599        /* Discard the low bits of the NaN. */
   2600        a->frac = b->frac_hi;
   2601        parts_return_nan(a, s);
   2602    }
   2603}
   2604
   2605static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
   2606                                       float_status *s)
   2607{
   2608    a->cls = b->cls;
   2609    a->sign = b->sign;
   2610    a->exp = b->exp;
   2611    frac_widen(a, b);
   2612
   2613    if (is_nan(a->cls)) {
   2614        parts_return_nan(a, s);
   2615    }
   2616}
   2617
   2618float32 float16_to_float32(float16 a, bool ieee, float_status *s)
   2619{
   2620    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
   2621    FloatParts64 p;
   2622
   2623    float16a_unpack_canonical(&p, a, s, fmt16);
   2624    parts_float_to_float(&p, s);
   2625    return float32_round_pack_canonical(&p, s);
   2626}
   2627
   2628float64 float16_to_float64(float16 a, bool ieee, float_status *s)
   2629{
   2630    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
   2631    FloatParts64 p;
   2632
   2633    float16a_unpack_canonical(&p, a, s, fmt16);
   2634    parts_float_to_float(&p, s);
   2635    return float64_round_pack_canonical(&p, s);
   2636}
   2637
   2638float16 float32_to_float16(float32 a, bool ieee, float_status *s)
   2639{
   2640    FloatParts64 p;
   2641    const FloatFmt *fmt;
   2642
   2643    float32_unpack_canonical(&p, a, s);
   2644    if (ieee) {
   2645        parts_float_to_float(&p, s);
   2646        fmt = &float16_params;
   2647    } else {
   2648        parts_float_to_ahp(&p, s);
   2649        fmt = &float16_params_ahp;
   2650    }
   2651    return float16a_round_pack_canonical(&p, s, fmt);
   2652}
   2653
   2654static float64 QEMU_SOFTFLOAT_ATTR
   2655soft_float32_to_float64(float32 a, float_status *s)
   2656{
   2657    FloatParts64 p;
   2658
   2659    float32_unpack_canonical(&p, a, s);
   2660    parts_float_to_float(&p, s);
   2661    return float64_round_pack_canonical(&p, s);
   2662}
   2663
   2664float64 float32_to_float64(float32 a, float_status *s)
   2665{
   2666    if (likely(float32_is_normal(a))) {
   2667        /* Widening conversion can never produce inexact results.  */
   2668        union_float32 uf;
   2669        union_float64 ud;
   2670        uf.s = a;
   2671        ud.h = uf.h;
   2672        return ud.s;
   2673    } else if (float32_is_zero(a)) {
   2674        return float64_set_sign(float64_zero, float32_is_neg(a));
   2675    } else {
   2676        return soft_float32_to_float64(a, s);
   2677    }
   2678}
   2679
   2680float16 float64_to_float16(float64 a, bool ieee, float_status *s)
   2681{
   2682    FloatParts64 p;
   2683    const FloatFmt *fmt;
   2684
   2685    float64_unpack_canonical(&p, a, s);
   2686    if (ieee) {
   2687        parts_float_to_float(&p, s);
   2688        fmt = &float16_params;
   2689    } else {
   2690        parts_float_to_ahp(&p, s);
   2691        fmt = &float16_params_ahp;
   2692    }
   2693    return float16a_round_pack_canonical(&p, s, fmt);
   2694}
   2695
   2696float32 float64_to_float32(float64 a, float_status *s)
   2697{
   2698    FloatParts64 p;
   2699
   2700    float64_unpack_canonical(&p, a, s);
   2701    parts_float_to_float(&p, s);
   2702    return float32_round_pack_canonical(&p, s);
   2703}
   2704
   2705float32 bfloat16_to_float32(bfloat16 a, float_status *s)
   2706{
   2707    FloatParts64 p;
   2708
   2709    bfloat16_unpack_canonical(&p, a, s);
   2710    parts_float_to_float(&p, s);
   2711    return float32_round_pack_canonical(&p, s);
   2712}
   2713
   2714float64 bfloat16_to_float64(bfloat16 a, float_status *s)
   2715{
   2716    FloatParts64 p;
   2717
   2718    bfloat16_unpack_canonical(&p, a, s);
   2719    parts_float_to_float(&p, s);
   2720    return float64_round_pack_canonical(&p, s);
   2721}
   2722
   2723bfloat16 float32_to_bfloat16(float32 a, float_status *s)
   2724{
   2725    FloatParts64 p;
   2726
   2727    float32_unpack_canonical(&p, a, s);
   2728    parts_float_to_float(&p, s);
   2729    return bfloat16_round_pack_canonical(&p, s);
   2730}
   2731
   2732bfloat16 float64_to_bfloat16(float64 a, float_status *s)
   2733{
   2734    FloatParts64 p;
   2735
   2736    float64_unpack_canonical(&p, a, s);
   2737    parts_float_to_float(&p, s);
   2738    return bfloat16_round_pack_canonical(&p, s);
   2739}
   2740
   2741float32 float128_to_float32(float128 a, float_status *s)
   2742{
   2743    FloatParts64 p64;
   2744    FloatParts128 p128;
   2745
   2746    float128_unpack_canonical(&p128, a, s);
   2747    parts_float_to_float_narrow(&p64, &p128, s);
   2748    return float32_round_pack_canonical(&p64, s);
   2749}
   2750
   2751float64 float128_to_float64(float128 a, float_status *s)
   2752{
   2753    FloatParts64 p64;
   2754    FloatParts128 p128;
   2755
   2756    float128_unpack_canonical(&p128, a, s);
   2757    parts_float_to_float_narrow(&p64, &p128, s);
   2758    return float64_round_pack_canonical(&p64, s);
   2759}
   2760
   2761float128 float32_to_float128(float32 a, float_status *s)
   2762{
   2763    FloatParts64 p64;
   2764    FloatParts128 p128;
   2765
   2766    float32_unpack_canonical(&p64, a, s);
   2767    parts_float_to_float_widen(&p128, &p64, s);
   2768    return float128_round_pack_canonical(&p128, s);
   2769}
   2770
   2771float128 float64_to_float128(float64 a, float_status *s)
   2772{
   2773    FloatParts64 p64;
   2774    FloatParts128 p128;
   2775
   2776    float64_unpack_canonical(&p64, a, s);
   2777    parts_float_to_float_widen(&p128, &p64, s);
   2778    return float128_round_pack_canonical(&p128, s);
   2779}
   2780
   2781float32 floatx80_to_float32(floatx80 a, float_status *s)
   2782{
   2783    FloatParts64 p64;
   2784    FloatParts128 p128;
   2785
   2786    if (floatx80_unpack_canonical(&p128, a, s)) {
   2787        parts_float_to_float_narrow(&p64, &p128, s);
   2788    } else {
   2789        parts_default_nan(&p64, s);
   2790    }
   2791    return float32_round_pack_canonical(&p64, s);
   2792}
   2793
   2794float64 floatx80_to_float64(floatx80 a, float_status *s)
   2795{
   2796    FloatParts64 p64;
   2797    FloatParts128 p128;
   2798
   2799    if (floatx80_unpack_canonical(&p128, a, s)) {
   2800        parts_float_to_float_narrow(&p64, &p128, s);
   2801    } else {
   2802        parts_default_nan(&p64, s);
   2803    }
   2804    return float64_round_pack_canonical(&p64, s);
   2805}
   2806
   2807float128 floatx80_to_float128(floatx80 a, float_status *s)
   2808{
   2809    FloatParts128 p;
   2810
   2811    if (floatx80_unpack_canonical(&p, a, s)) {
   2812        parts_float_to_float(&p, s);
   2813    } else {
   2814        parts_default_nan(&p, s);
   2815    }
   2816    return float128_round_pack_canonical(&p, s);
   2817}
   2818
   2819floatx80 float32_to_floatx80(float32 a, float_status *s)
   2820{
   2821    FloatParts64 p64;
   2822    FloatParts128 p128;
   2823
   2824    float32_unpack_canonical(&p64, a, s);
   2825    parts_float_to_float_widen(&p128, &p64, s);
   2826    return floatx80_round_pack_canonical(&p128, s);
   2827}
   2828
   2829floatx80 float64_to_floatx80(float64 a, float_status *s)
   2830{
   2831    FloatParts64 p64;
   2832    FloatParts128 p128;
   2833
   2834    float64_unpack_canonical(&p64, a, s);
   2835    parts_float_to_float_widen(&p128, &p64, s);
   2836    return floatx80_round_pack_canonical(&p128, s);
   2837}
   2838
   2839floatx80 float128_to_floatx80(float128 a, float_status *s)
   2840{
   2841    FloatParts128 p;
   2842
   2843    float128_unpack_canonical(&p, a, s);
   2844    parts_float_to_float(&p, s);
   2845    return floatx80_round_pack_canonical(&p, s);
   2846}
   2847
   2848/*
   2849 * Round to integral value
   2850 */
   2851
   2852float16 float16_round_to_int(float16 a, float_status *s)
   2853{
   2854    FloatParts64 p;
   2855
   2856    float16_unpack_canonical(&p, a, s);
   2857    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
   2858    return float16_round_pack_canonical(&p, s);
   2859}
   2860
   2861float32 float32_round_to_int(float32 a, float_status *s)
   2862{
   2863    FloatParts64 p;
   2864
   2865    float32_unpack_canonical(&p, a, s);
   2866    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
   2867    return float32_round_pack_canonical(&p, s);
   2868}
   2869
   2870float64 float64_round_to_int(float64 a, float_status *s)
   2871{
   2872    FloatParts64 p;
   2873
   2874    float64_unpack_canonical(&p, a, s);
   2875    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
   2876    return float64_round_pack_canonical(&p, s);
   2877}
   2878
   2879bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
   2880{
   2881    FloatParts64 p;
   2882
   2883    bfloat16_unpack_canonical(&p, a, s);
   2884    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
   2885    return bfloat16_round_pack_canonical(&p, s);
   2886}
   2887
   2888float128 float128_round_to_int(float128 a, float_status *s)
   2889{
   2890    FloatParts128 p;
   2891
   2892    float128_unpack_canonical(&p, a, s);
   2893    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
   2894    return float128_round_pack_canonical(&p, s);
   2895}
   2896
   2897floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
   2898{
   2899    FloatParts128 p;
   2900
   2901    if (!floatx80_unpack_canonical(&p, a, status)) {
   2902        return floatx80_default_nan(status);
   2903    }
   2904
   2905    parts_round_to_int(&p, status->float_rounding_mode, 0, status,
   2906                       &floatx80_params[status->floatx80_rounding_precision]);
   2907    return floatx80_round_pack_canonical(&p, status);
   2908}
   2909
   2910/*
   2911 * Floating-point to signed integer conversions
   2912 */
   2913
   2914int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
   2915                              float_status *s)
   2916{
   2917    FloatParts64 p;
   2918
   2919    float16_unpack_canonical(&p, a, s);
   2920    return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
   2921}
   2922
   2923int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
   2924                                float_status *s)
   2925{
   2926    FloatParts64 p;
   2927
   2928    float16_unpack_canonical(&p, a, s);
   2929    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
   2930}
   2931
   2932int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
   2933                                float_status *s)
   2934{
   2935    FloatParts64 p;
   2936
   2937    float16_unpack_canonical(&p, a, s);
   2938    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
   2939}
   2940
   2941int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
   2942                                float_status *s)
   2943{
   2944    FloatParts64 p;
   2945
   2946    float16_unpack_canonical(&p, a, s);
   2947    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
   2948}
   2949
   2950int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
   2951                                float_status *s)
   2952{
   2953    FloatParts64 p;
   2954
   2955    float32_unpack_canonical(&p, a, s);
   2956    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
   2957}
   2958
   2959int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
   2960                                float_status *s)
   2961{
   2962    FloatParts64 p;
   2963
   2964    float32_unpack_canonical(&p, a, s);
   2965    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
   2966}
   2967
   2968int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
   2969                                float_status *s)
   2970{
   2971    FloatParts64 p;
   2972
   2973    float32_unpack_canonical(&p, a, s);
   2974    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
   2975}
   2976
   2977int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
   2978                                float_status *s)
   2979{
   2980    FloatParts64 p;
   2981
   2982    float64_unpack_canonical(&p, a, s);
   2983    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
   2984}
   2985
   2986int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
   2987                                float_status *s)
   2988{
   2989    FloatParts64 p;
   2990
   2991    float64_unpack_canonical(&p, a, s);
   2992    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
   2993}
   2994
   2995int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
   2996                                float_status *s)
   2997{
   2998    FloatParts64 p;
   2999
   3000    float64_unpack_canonical(&p, a, s);
   3001    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
   3002}
   3003
   3004int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
   3005                                 float_status *s)
   3006{
   3007    FloatParts64 p;
   3008
   3009    bfloat16_unpack_canonical(&p, a, s);
   3010    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
   3011}
   3012
   3013int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
   3014                                 float_status *s)
   3015{
   3016    FloatParts64 p;
   3017
   3018    bfloat16_unpack_canonical(&p, a, s);
   3019    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
   3020}
   3021
   3022int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
   3023                                 float_status *s)
   3024{
   3025    FloatParts64 p;
   3026
   3027    bfloat16_unpack_canonical(&p, a, s);
   3028    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
   3029}
   3030
   3031static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
   3032                                        int scale, float_status *s)
   3033{
   3034    FloatParts128 p;
   3035
   3036    float128_unpack_canonical(&p, a, s);
   3037    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
   3038}
   3039
   3040static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
   3041                                        int scale, float_status *s)
   3042{
   3043    FloatParts128 p;
   3044
   3045    float128_unpack_canonical(&p, a, s);
   3046    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
   3047}
   3048
   3049static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
   3050                                        int scale, float_status *s)
   3051{
   3052    FloatParts128 p;
   3053
   3054    if (!floatx80_unpack_canonical(&p, a, s)) {
   3055        parts_default_nan(&p, s);
   3056    }
   3057    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
   3058}
   3059
   3060static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
   3061                                        int scale, float_status *s)
   3062{
   3063    FloatParts128 p;
   3064
   3065    if (!floatx80_unpack_canonical(&p, a, s)) {
   3066        parts_default_nan(&p, s);
   3067    }
   3068    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
   3069}
   3070
   3071int8_t float16_to_int8(float16 a, float_status *s)
   3072{
   3073    return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
   3074}
   3075
   3076int16_t float16_to_int16(float16 a, float_status *s)
   3077{
   3078    return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
   3079}
   3080
   3081int32_t float16_to_int32(float16 a, float_status *s)
   3082{
   3083    return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
   3084}
   3085
   3086int64_t float16_to_int64(float16 a, float_status *s)
   3087{
   3088    return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
   3089}
   3090
   3091int16_t float32_to_int16(float32 a, float_status *s)
   3092{
   3093    return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
   3094}
   3095
   3096int32_t float32_to_int32(float32 a, float_status *s)
   3097{
   3098    return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
   3099}
   3100
   3101int64_t float32_to_int64(float32 a, float_status *s)
   3102{
   3103    return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
   3104}
   3105
   3106int16_t float64_to_int16(float64 a, float_status *s)
   3107{
   3108    return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
   3109}
   3110
   3111int32_t float64_to_int32(float64 a, float_status *s)
   3112{
   3113    return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
   3114}
   3115
   3116int64_t float64_to_int64(float64 a, float_status *s)
   3117{
   3118    return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
   3119}
   3120
   3121int32_t float128_to_int32(float128 a, float_status *s)
   3122{
   3123    return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
   3124}
   3125
   3126int64_t float128_to_int64(float128 a, float_status *s)
   3127{
   3128    return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
   3129}
   3130
   3131int32_t floatx80_to_int32(floatx80 a, float_status *s)
   3132{
   3133    return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
   3134}
   3135
   3136int64_t floatx80_to_int64(floatx80 a, float_status *s)
   3137{
   3138    return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
   3139}
   3140
   3141int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
   3142{
   3143    return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
   3144}
   3145
   3146int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
   3147{
   3148    return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
   3149}
   3150
   3151int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
   3152{
   3153    return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
   3154}
   3155
   3156int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
   3157{
   3158    return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
   3159}
   3160
   3161int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
   3162{
   3163    return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
   3164}
   3165
   3166int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
   3167{
   3168    return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
   3169}
   3170
   3171int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
   3172{
   3173    return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
   3174}
   3175
   3176int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
   3177{
   3178    return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
   3179}
   3180
   3181int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
   3182{
   3183    return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
   3184}
   3185
   3186int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
   3187{
   3188    return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
   3189}
   3190
   3191int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
   3192{
   3193    return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
   3194}
   3195
   3196int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
   3197{
   3198    return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
   3199}
   3200
   3201int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
   3202{
   3203    return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
   3204}
   3205
   3206int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
   3207{
   3208    return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
   3209}
   3210
   3211int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
   3212{
   3213    return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
   3214}
   3215
   3216int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
   3217{
   3218    return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
   3219}
   3220
   3221int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
   3222{
   3223    return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
   3224}
   3225
   3226int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
   3227{
   3228    return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
   3229}
   3230
   3231int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
   3232{
   3233    return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
   3234}
   3235
   3236/*
   3237 * Floating-point to unsigned integer conversions
   3238 */
   3239
   3240uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
   3241                                float_status *s)
   3242{
   3243    FloatParts64 p;
   3244
   3245    float16_unpack_canonical(&p, a, s);
   3246    return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
   3247}
   3248
   3249uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
   3250                                  float_status *s)
   3251{
   3252    FloatParts64 p;
   3253
   3254    float16_unpack_canonical(&p, a, s);
   3255    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
   3256}
   3257
   3258uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
   3259                                  float_status *s)
   3260{
   3261    FloatParts64 p;
   3262
   3263    float16_unpack_canonical(&p, a, s);
   3264    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
   3265}
   3266
   3267uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
   3268                                  float_status *s)
   3269{
   3270    FloatParts64 p;
   3271
   3272    float16_unpack_canonical(&p, a, s);
   3273    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
   3274}
   3275
   3276uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
   3277                                  float_status *s)
   3278{
   3279    FloatParts64 p;
   3280
   3281    float32_unpack_canonical(&p, a, s);
   3282    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
   3283}
   3284
   3285uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
   3286                                  float_status *s)
   3287{
   3288    FloatParts64 p;
   3289
   3290    float32_unpack_canonical(&p, a, s);
   3291    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
   3292}
   3293
   3294uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
   3295                                  float_status *s)
   3296{
   3297    FloatParts64 p;
   3298
   3299    float32_unpack_canonical(&p, a, s);
   3300    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
   3301}
   3302
   3303uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
   3304                                  float_status *s)
   3305{
   3306    FloatParts64 p;
   3307
   3308    float64_unpack_canonical(&p, a, s);
   3309    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
   3310}
   3311
   3312uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
   3313                                  float_status *s)
   3314{
   3315    FloatParts64 p;
   3316
   3317    float64_unpack_canonical(&p, a, s);
   3318    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
   3319}
   3320
   3321uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
   3322                                  float_status *s)
   3323{
   3324    FloatParts64 p;
   3325
   3326    float64_unpack_canonical(&p, a, s);
   3327    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
   3328}
   3329
   3330uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
   3331                                   int scale, float_status *s)
   3332{
   3333    FloatParts64 p;
   3334
   3335    bfloat16_unpack_canonical(&p, a, s);
   3336    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
   3337}
   3338
   3339uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
   3340                                   int scale, float_status *s)
   3341{
   3342    FloatParts64 p;
   3343
   3344    bfloat16_unpack_canonical(&p, a, s);
   3345    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
   3346}
   3347
   3348uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
   3349                                   int scale, float_status *s)
   3350{
   3351    FloatParts64 p;
   3352
   3353    bfloat16_unpack_canonical(&p, a, s);
   3354    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
   3355}
   3356
   3357static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
   3358                                          int scale, float_status *s)
   3359{
   3360    FloatParts128 p;
   3361
   3362    float128_unpack_canonical(&p, a, s);
   3363    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
   3364}
   3365
   3366static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
   3367                                          int scale, float_status *s)
   3368{
   3369    FloatParts128 p;
   3370
   3371    float128_unpack_canonical(&p, a, s);
   3372    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
   3373}
   3374
   3375uint8_t float16_to_uint8(float16 a, float_status *s)
   3376{
   3377    return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
   3378}
   3379
   3380uint16_t float16_to_uint16(float16 a, float_status *s)
   3381{
   3382    return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
   3383}
   3384
   3385uint32_t float16_to_uint32(float16 a, float_status *s)
   3386{
   3387    return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
   3388}
   3389
   3390uint64_t float16_to_uint64(float16 a, float_status *s)
   3391{
   3392    return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
   3393}
   3394
   3395uint16_t float32_to_uint16(float32 a, float_status *s)
   3396{
   3397    return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
   3398}
   3399
   3400uint32_t float32_to_uint32(float32 a, float_status *s)
   3401{
   3402    return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
   3403}
   3404
   3405uint64_t float32_to_uint64(float32 a, float_status *s)
   3406{
   3407    return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
   3408}
   3409
   3410uint16_t float64_to_uint16(float64 a, float_status *s)
   3411{
   3412    return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
   3413}
   3414
   3415uint32_t float64_to_uint32(float64 a, float_status *s)
   3416{
   3417    return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
   3418}
   3419
   3420uint64_t float64_to_uint64(float64 a, float_status *s)
   3421{
   3422    return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
   3423}
   3424
   3425uint32_t float128_to_uint32(float128 a, float_status *s)
   3426{
   3427    return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
   3428}
   3429
   3430uint64_t float128_to_uint64(float128 a, float_status *s)
   3431{
   3432    return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
   3433}
   3434
   3435uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
   3436{
   3437    return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
   3438}
   3439
   3440uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
   3441{
   3442    return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
   3443}
   3444
   3445uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
   3446{
   3447    return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
   3448}
   3449
   3450uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
   3451{
   3452    return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
   3453}
   3454
   3455uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
   3456{
   3457    return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
   3458}
   3459
   3460uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
   3461{
   3462    return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
   3463}
   3464
   3465uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
   3466{
   3467    return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
   3468}
   3469
   3470uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
   3471{
   3472    return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
   3473}
   3474
   3475uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
   3476{
   3477    return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
   3478}
   3479
   3480uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
   3481{
   3482    return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
   3483}
   3484
   3485uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
   3486{
   3487    return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
   3488}
   3489
   3490uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
   3491{
   3492    return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
   3493}
   3494
   3495uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
   3496{
   3497    return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
   3498}
   3499
   3500uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
   3501{
   3502    return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
   3503}
   3504
   3505uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
   3506{
   3507    return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
   3508}
   3509
   3510uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
   3511{
   3512    return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
   3513}
   3514
   3515uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
   3516{
   3517    return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
   3518}
   3519
   3520/*
   3521 * Signed integer to floating-point conversions
   3522 */
   3523
   3524float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
   3525{
   3526    FloatParts64 p;
   3527
   3528    parts_sint_to_float(&p, a, scale, status);
   3529    return float16_round_pack_canonical(&p, status);
   3530}
   3531
   3532float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
   3533{
   3534    return int64_to_float16_scalbn(a, scale, status);
   3535}
   3536
   3537float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
   3538{
   3539    return int64_to_float16_scalbn(a, scale, status);
   3540}
   3541
   3542float16 int64_to_float16(int64_t a, float_status *status)
   3543{
   3544    return int64_to_float16_scalbn(a, 0, status);
   3545}
   3546
   3547float16 int32_to_float16(int32_t a, float_status *status)
   3548{
   3549    return int64_to_float16_scalbn(a, 0, status);
   3550}
   3551
   3552float16 int16_to_float16(int16_t a, float_status *status)
   3553{
   3554    return int64_to_float16_scalbn(a, 0, status);
   3555}
   3556
   3557float16 int8_to_float16(int8_t a, float_status *status)
   3558{
   3559    return int64_to_float16_scalbn(a, 0, status);
   3560}
   3561
   3562float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
   3563{
   3564    FloatParts64 p;
   3565
   3566    /* Without scaling, there are no overflow concerns. */
   3567    if (likely(scale == 0) && can_use_fpu(status)) {
   3568        union_float32 ur;
   3569        ur.h = a;
   3570        return ur.s;
   3571    }
   3572
   3573    parts64_sint_to_float(&p, a, scale, status);
   3574    return float32_round_pack_canonical(&p, status);
   3575}
   3576
   3577float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
   3578{
   3579    return int64_to_float32_scalbn(a, scale, status);
   3580}
   3581
   3582float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
   3583{
   3584    return int64_to_float32_scalbn(a, scale, status);
   3585}
   3586
   3587float32 int64_to_float32(int64_t a, float_status *status)
   3588{
   3589    return int64_to_float32_scalbn(a, 0, status);
   3590}
   3591
   3592float32 int32_to_float32(int32_t a, float_status *status)
   3593{
   3594    return int64_to_float32_scalbn(a, 0, status);
   3595}
   3596
   3597float32 int16_to_float32(int16_t a, float_status *status)
   3598{
   3599    return int64_to_float32_scalbn(a, 0, status);
   3600}
   3601
   3602float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
   3603{
   3604    FloatParts64 p;
   3605
   3606    /* Without scaling, there are no overflow concerns. */
   3607    if (likely(scale == 0) && can_use_fpu(status)) {
   3608        union_float64 ur;
   3609        ur.h = a;
   3610        return ur.s;
   3611    }
   3612
   3613    parts_sint_to_float(&p, a, scale, status);
   3614    return float64_round_pack_canonical(&p, status);
   3615}
   3616
   3617float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
   3618{
   3619    return int64_to_float64_scalbn(a, scale, status);
   3620}
   3621
   3622float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
   3623{
   3624    return int64_to_float64_scalbn(a, scale, status);
   3625}
   3626
   3627float64 int64_to_float64(int64_t a, float_status *status)
   3628{
   3629    return int64_to_float64_scalbn(a, 0, status);
   3630}
   3631
   3632float64 int32_to_float64(int32_t a, float_status *status)
   3633{
   3634    return int64_to_float64_scalbn(a, 0, status);
   3635}
   3636
   3637float64 int16_to_float64(int16_t a, float_status *status)
   3638{
   3639    return int64_to_float64_scalbn(a, 0, status);
   3640}
   3641
   3642bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
   3643{
   3644    FloatParts64 p;
   3645
   3646    parts_sint_to_float(&p, a, scale, status);
   3647    return bfloat16_round_pack_canonical(&p, status);
   3648}
   3649
   3650bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
   3651{
   3652    return int64_to_bfloat16_scalbn(a, scale, status);
   3653}
   3654
   3655bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
   3656{
   3657    return int64_to_bfloat16_scalbn(a, scale, status);
   3658}
   3659
   3660bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
   3661{
   3662    return int64_to_bfloat16_scalbn(a, 0, status);
   3663}
   3664
   3665bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
   3666{
   3667    return int64_to_bfloat16_scalbn(a, 0, status);
   3668}
   3669
   3670bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
   3671{
   3672    return int64_to_bfloat16_scalbn(a, 0, status);
   3673}
   3674
   3675float128 int64_to_float128(int64_t a, float_status *status)
   3676{
   3677    FloatParts128 p;
   3678
   3679    parts_sint_to_float(&p, a, 0, status);
   3680    return float128_round_pack_canonical(&p, status);
   3681}
   3682
   3683float128 int32_to_float128(int32_t a, float_status *status)
   3684{
   3685    return int64_to_float128(a, status);
   3686}
   3687
   3688floatx80 int64_to_floatx80(int64_t a, float_status *status)
   3689{
   3690    FloatParts128 p;
   3691
   3692    parts_sint_to_float(&p, a, 0, status);
   3693    return floatx80_round_pack_canonical(&p, status);
   3694}
   3695
   3696floatx80 int32_to_floatx80(int32_t a, float_status *status)
   3697{
   3698    return int64_to_floatx80(a, status);
   3699}
   3700
   3701/*
   3702 * Unsigned Integer to floating-point conversions
   3703 */
   3704
   3705float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
   3706{
   3707    FloatParts64 p;
   3708
   3709    parts_uint_to_float(&p, a, scale, status);
   3710    return float16_round_pack_canonical(&p, status);
   3711}
   3712
   3713float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
   3714{
   3715    return uint64_to_float16_scalbn(a, scale, status);
   3716}
   3717
   3718float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
   3719{
   3720    return uint64_to_float16_scalbn(a, scale, status);
   3721}
   3722
   3723float16 uint64_to_float16(uint64_t a, float_status *status)
   3724{
   3725    return uint64_to_float16_scalbn(a, 0, status);
   3726}
   3727
   3728float16 uint32_to_float16(uint32_t a, float_status *status)
   3729{
   3730    return uint64_to_float16_scalbn(a, 0, status);
   3731}
   3732
   3733float16 uint16_to_float16(uint16_t a, float_status *status)
   3734{
   3735    return uint64_to_float16_scalbn(a, 0, status);
   3736}
   3737
   3738float16 uint8_to_float16(uint8_t a, float_status *status)
   3739{
   3740    return uint64_to_float16_scalbn(a, 0, status);
   3741}
   3742
   3743float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
   3744{
   3745    FloatParts64 p;
   3746
   3747    /* Without scaling, there are no overflow concerns. */
   3748    if (likely(scale == 0) && can_use_fpu(status)) {
   3749        union_float32 ur;
   3750        ur.h = a;
   3751        return ur.s;
   3752    }
   3753
   3754    parts_uint_to_float(&p, a, scale, status);
   3755    return float32_round_pack_canonical(&p, status);
   3756}
   3757
   3758float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
   3759{
   3760    return uint64_to_float32_scalbn(a, scale, status);
   3761}
   3762
   3763float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
   3764{
   3765    return uint64_to_float32_scalbn(a, scale, status);
   3766}
   3767
   3768float32 uint64_to_float32(uint64_t a, float_status *status)
   3769{
   3770    return uint64_to_float32_scalbn(a, 0, status);
   3771}
   3772
   3773float32 uint32_to_float32(uint32_t a, float_status *status)
   3774{
   3775    return uint64_to_float32_scalbn(a, 0, status);
   3776}
   3777
   3778float32 uint16_to_float32(uint16_t a, float_status *status)
   3779{
   3780    return uint64_to_float32_scalbn(a, 0, status);
   3781}
   3782
   3783float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
   3784{
   3785    FloatParts64 p;
   3786
   3787    /* Without scaling, there are no overflow concerns. */
   3788    if (likely(scale == 0) && can_use_fpu(status)) {
   3789        union_float64 ur;
   3790        ur.h = a;
   3791        return ur.s;
   3792    }
   3793
   3794    parts_uint_to_float(&p, a, scale, status);
   3795    return float64_round_pack_canonical(&p, status);
   3796}
   3797
   3798float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
   3799{
   3800    return uint64_to_float64_scalbn(a, scale, status);
   3801}
   3802
   3803float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
   3804{
   3805    return uint64_to_float64_scalbn(a, scale, status);
   3806}
   3807
   3808float64 uint64_to_float64(uint64_t a, float_status *status)
   3809{
   3810    return uint64_to_float64_scalbn(a, 0, status);
   3811}
   3812
   3813float64 uint32_to_float64(uint32_t a, float_status *status)
   3814{
   3815    return uint64_to_float64_scalbn(a, 0, status);
   3816}
   3817
   3818float64 uint16_to_float64(uint16_t a, float_status *status)
   3819{
   3820    return uint64_to_float64_scalbn(a, 0, status);
   3821}
   3822
   3823bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
   3824{
   3825    FloatParts64 p;
   3826
   3827    parts_uint_to_float(&p, a, scale, status);
   3828    return bfloat16_round_pack_canonical(&p, status);
   3829}
   3830
   3831bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
   3832{
   3833    return uint64_to_bfloat16_scalbn(a, scale, status);
   3834}
   3835
   3836bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
   3837{
   3838    return uint64_to_bfloat16_scalbn(a, scale, status);
   3839}
   3840
   3841bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
   3842{
   3843    return uint64_to_bfloat16_scalbn(a, 0, status);
   3844}
   3845
   3846bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
   3847{
   3848    return uint64_to_bfloat16_scalbn(a, 0, status);
   3849}
   3850
   3851bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
   3852{
   3853    return uint64_to_bfloat16_scalbn(a, 0, status);
   3854}
   3855
   3856float128 uint64_to_float128(uint64_t a, float_status *status)
   3857{
   3858    FloatParts128 p;
   3859
   3860    parts_uint_to_float(&p, a, 0, status);
   3861    return float128_round_pack_canonical(&p, status);
   3862}
   3863
   3864/*
   3865 * Minimum and maximum
   3866 */
   3867
   3868static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
   3869{
   3870    FloatParts64 pa, pb, *pr;
   3871
   3872    float16_unpack_canonical(&pa, a, s);
   3873    float16_unpack_canonical(&pb, b, s);
   3874    pr = parts_minmax(&pa, &pb, s, flags);
   3875
   3876    return float16_round_pack_canonical(pr, s);
   3877}
   3878
   3879static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
   3880                                float_status *s, int flags)
   3881{
   3882    FloatParts64 pa, pb, *pr;
   3883
   3884    bfloat16_unpack_canonical(&pa, a, s);
   3885    bfloat16_unpack_canonical(&pb, b, s);
   3886    pr = parts_minmax(&pa, &pb, s, flags);
   3887
   3888    return bfloat16_round_pack_canonical(pr, s);
   3889}
   3890
   3891static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
   3892{
   3893    FloatParts64 pa, pb, *pr;
   3894
   3895    float32_unpack_canonical(&pa, a, s);
   3896    float32_unpack_canonical(&pb, b, s);
   3897    pr = parts_minmax(&pa, &pb, s, flags);
   3898
   3899    return float32_round_pack_canonical(pr, s);
   3900}
   3901
   3902static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
   3903{
   3904    FloatParts64 pa, pb, *pr;
   3905
   3906    float64_unpack_canonical(&pa, a, s);
   3907    float64_unpack_canonical(&pb, b, s);
   3908    pr = parts_minmax(&pa, &pb, s, flags);
   3909
   3910    return float64_round_pack_canonical(pr, s);
   3911}
   3912
   3913static float128 float128_minmax(float128 a, float128 b,
   3914                                float_status *s, int flags)
   3915{
   3916    FloatParts128 pa, pb, *pr;
   3917
   3918    float128_unpack_canonical(&pa, a, s);
   3919    float128_unpack_canonical(&pb, b, s);
   3920    pr = parts_minmax(&pa, &pb, s, flags);
   3921
   3922    return float128_round_pack_canonical(pr, s);
   3923}
   3924
   3925#define MINMAX_1(type, name, flags) \
   3926    type type##_##name(type a, type b, float_status *s) \
   3927    { return type##_minmax(a, b, s, flags); }
   3928
   3929#define MINMAX_2(type) \
   3930    MINMAX_1(type, max, 0)                                      \
   3931    MINMAX_1(type, maxnum, minmax_isnum)                        \
   3932    MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)      \
   3933    MINMAX_1(type, min, minmax_ismin)                           \
   3934    MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)         \
   3935    MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
   3936
   3937MINMAX_2(float16)
   3938MINMAX_2(bfloat16)
   3939MINMAX_2(float32)
   3940MINMAX_2(float64)
   3941MINMAX_2(float128)
   3942
   3943#undef MINMAX_1
   3944#undef MINMAX_2
   3945
   3946/*
   3947 * Floating point compare
   3948 */
   3949
   3950static FloatRelation QEMU_FLATTEN
   3951float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
   3952{
   3953    FloatParts64 pa, pb;
   3954
   3955    float16_unpack_canonical(&pa, a, s);
   3956    float16_unpack_canonical(&pb, b, s);
   3957    return parts_compare(&pa, &pb, s, is_quiet);
   3958}
   3959
   3960FloatRelation float16_compare(float16 a, float16 b, float_status *s)
   3961{
   3962    return float16_do_compare(a, b, s, false);
   3963}
   3964
   3965FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
   3966{
   3967    return float16_do_compare(a, b, s, true);
   3968}
   3969
   3970static FloatRelation QEMU_SOFTFLOAT_ATTR
   3971float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
   3972{
   3973    FloatParts64 pa, pb;
   3974
   3975    float32_unpack_canonical(&pa, a, s);
   3976    float32_unpack_canonical(&pb, b, s);
   3977    return parts_compare(&pa, &pb, s, is_quiet);
   3978}
   3979
   3980static FloatRelation QEMU_FLATTEN
   3981float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
   3982{
   3983    union_float32 ua, ub;
   3984
   3985    ua.s = xa;
   3986    ub.s = xb;
   3987
   3988    if (QEMU_NO_HARDFLOAT) {
   3989        goto soft;
   3990    }
   3991
   3992    float32_input_flush2(&ua.s, &ub.s, s);
   3993    if (isgreaterequal(ua.h, ub.h)) {
   3994        if (isgreater(ua.h, ub.h)) {
   3995            return float_relation_greater;
   3996        }
   3997        return float_relation_equal;
   3998    }
   3999    if (likely(isless(ua.h, ub.h))) {
   4000        return float_relation_less;
   4001    }
   4002    /*
   4003     * The only condition remaining is unordered.
   4004     * Fall through to set flags.
   4005     */
   4006 soft:
   4007    return float32_do_compare(ua.s, ub.s, s, is_quiet);
   4008}
   4009
   4010FloatRelation float32_compare(float32 a, float32 b, float_status *s)
   4011{
   4012    return float32_hs_compare(a, b, s, false);
   4013}
   4014
   4015FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
   4016{
   4017    return float32_hs_compare(a, b, s, true);
   4018}
   4019
   4020static FloatRelation QEMU_SOFTFLOAT_ATTR
   4021float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
   4022{
   4023    FloatParts64 pa, pb;
   4024
   4025    float64_unpack_canonical(&pa, a, s);
   4026    float64_unpack_canonical(&pb, b, s);
   4027    return parts_compare(&pa, &pb, s, is_quiet);
   4028}
   4029
   4030static FloatRelation QEMU_FLATTEN
   4031float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
   4032{
   4033    union_float64 ua, ub;
   4034
   4035    ua.s = xa;
   4036    ub.s = xb;
   4037
   4038    if (QEMU_NO_HARDFLOAT) {
   4039        goto soft;
   4040    }
   4041
   4042    float64_input_flush2(&ua.s, &ub.s, s);
   4043    if (isgreaterequal(ua.h, ub.h)) {
   4044        if (isgreater(ua.h, ub.h)) {
   4045            return float_relation_greater;
   4046        }
   4047        return float_relation_equal;
   4048    }
   4049    if (likely(isless(ua.h, ub.h))) {
   4050        return float_relation_less;
   4051    }
   4052    /*
   4053     * The only condition remaining is unordered.
   4054     * Fall through to set flags.
   4055     */
   4056 soft:
   4057    return float64_do_compare(ua.s, ub.s, s, is_quiet);
   4058}
   4059
   4060FloatRelation float64_compare(float64 a, float64 b, float_status *s)
   4061{
   4062    return float64_hs_compare(a, b, s, false);
   4063}
   4064
   4065FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
   4066{
   4067    return float64_hs_compare(a, b, s, true);
   4068}
   4069
   4070static FloatRelation QEMU_FLATTEN
   4071bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
   4072{
   4073    FloatParts64 pa, pb;
   4074
   4075    bfloat16_unpack_canonical(&pa, a, s);
   4076    bfloat16_unpack_canonical(&pb, b, s);
   4077    return parts_compare(&pa, &pb, s, is_quiet);
   4078}
   4079
   4080FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
   4081{
   4082    return bfloat16_do_compare(a, b, s, false);
   4083}
   4084
   4085FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
   4086{
   4087    return bfloat16_do_compare(a, b, s, true);
   4088}
   4089
   4090static FloatRelation QEMU_FLATTEN
   4091float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
   4092{
   4093    FloatParts128 pa, pb;
   4094
   4095    float128_unpack_canonical(&pa, a, s);
   4096    float128_unpack_canonical(&pb, b, s);
   4097    return parts_compare(&pa, &pb, s, is_quiet);
   4098}
   4099
   4100FloatRelation float128_compare(float128 a, float128 b, float_status *s)
   4101{
   4102    return float128_do_compare(a, b, s, false);
   4103}
   4104
   4105FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
   4106{
   4107    return float128_do_compare(a, b, s, true);
   4108}
   4109
   4110static FloatRelation QEMU_FLATTEN
   4111floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
   4112{
   4113    FloatParts128 pa, pb;
   4114
   4115    if (!floatx80_unpack_canonical(&pa, a, s) ||
   4116        !floatx80_unpack_canonical(&pb, b, s)) {
   4117        return float_relation_unordered;
   4118    }
   4119    return parts_compare(&pa, &pb, s, is_quiet);
   4120}
   4121
   4122FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
   4123{
   4124    return floatx80_do_compare(a, b, s, false);
   4125}
   4126
   4127FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
   4128{
   4129    return floatx80_do_compare(a, b, s, true);
   4130}
   4131
   4132/*
   4133 * Scale by 2**N
   4134 */
   4135
   4136float16 float16_scalbn(float16 a, int n, float_status *status)
   4137{
   4138    FloatParts64 p;
   4139
   4140    float16_unpack_canonical(&p, a, status);
   4141    parts_scalbn(&p, n, status);
   4142    return float16_round_pack_canonical(&p, status);
   4143}
   4144
   4145float32 float32_scalbn(float32 a, int n, float_status *status)
   4146{
   4147    FloatParts64 p;
   4148
   4149    float32_unpack_canonical(&p, a, status);
   4150    parts_scalbn(&p, n, status);
   4151    return float32_round_pack_canonical(&p, status);
   4152}
   4153
   4154float64 float64_scalbn(float64 a, int n, float_status *status)
   4155{
   4156    FloatParts64 p;
   4157
   4158    float64_unpack_canonical(&p, a, status);
   4159    parts_scalbn(&p, n, status);
   4160    return float64_round_pack_canonical(&p, status);
   4161}
   4162
   4163bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
   4164{
   4165    FloatParts64 p;
   4166
   4167    bfloat16_unpack_canonical(&p, a, status);
   4168    parts_scalbn(&p, n, status);
   4169    return bfloat16_round_pack_canonical(&p, status);
   4170}
   4171
   4172float128 float128_scalbn(float128 a, int n, float_status *status)
   4173{
   4174    FloatParts128 p;
   4175
   4176    float128_unpack_canonical(&p, a, status);
   4177    parts_scalbn(&p, n, status);
   4178    return float128_round_pack_canonical(&p, status);
   4179}
   4180
   4181floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
   4182{
   4183    FloatParts128 p;
   4184
   4185    if (!floatx80_unpack_canonical(&p, a, status)) {
   4186        return floatx80_default_nan(status);
   4187    }
   4188    parts_scalbn(&p, n, status);
   4189    return floatx80_round_pack_canonical(&p, status);
   4190}
   4191
   4192/*
   4193 * Square Root
   4194 */
   4195
   4196float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
   4197{
   4198    FloatParts64 p;
   4199
   4200    float16_unpack_canonical(&p, a, status);
   4201    parts_sqrt(&p, status, &float16_params);
   4202    return float16_round_pack_canonical(&p, status);
   4203}
   4204
   4205static float32 QEMU_SOFTFLOAT_ATTR
   4206soft_f32_sqrt(float32 a, float_status *status)
   4207{
   4208    FloatParts64 p;
   4209
   4210    float32_unpack_canonical(&p, a, status);
   4211    parts_sqrt(&p, status, &float32_params);
   4212    return float32_round_pack_canonical(&p, status);
   4213}
   4214
   4215static float64 QEMU_SOFTFLOAT_ATTR
   4216soft_f64_sqrt(float64 a, float_status *status)
   4217{
   4218    FloatParts64 p;
   4219
   4220    float64_unpack_canonical(&p, a, status);
   4221    parts_sqrt(&p, status, &float64_params);
   4222    return float64_round_pack_canonical(&p, status);
   4223}
   4224
   4225float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
   4226{
   4227    union_float32 ua, ur;
   4228
   4229    ua.s = xa;
   4230    if (unlikely(!can_use_fpu(s))) {
   4231        goto soft;
   4232    }
   4233
   4234    float32_input_flush1(&ua.s, s);
   4235    if (QEMU_HARDFLOAT_1F32_USE_FP) {
   4236        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
   4237                       fpclassify(ua.h) == FP_ZERO) ||
   4238                     signbit(ua.h))) {
   4239            goto soft;
   4240        }
   4241    } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
   4242                        float32_is_neg(ua.s))) {
   4243        goto soft;
   4244    }
   4245    ur.h = sqrtf(ua.h);
   4246    return ur.s;
   4247
   4248 soft:
   4249    return soft_f32_sqrt(ua.s, s);
   4250}
   4251
   4252float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
   4253{
   4254    union_float64 ua, ur;
   4255
   4256    ua.s = xa;
   4257    if (unlikely(!can_use_fpu(s))) {
   4258        goto soft;
   4259    }
   4260
   4261    float64_input_flush1(&ua.s, s);
   4262    if (QEMU_HARDFLOAT_1F64_USE_FP) {
   4263        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
   4264                       fpclassify(ua.h) == FP_ZERO) ||
   4265                     signbit(ua.h))) {
   4266            goto soft;
   4267        }
   4268    } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
   4269                        float64_is_neg(ua.s))) {
   4270        goto soft;
   4271    }
   4272    ur.h = sqrt(ua.h);
   4273    return ur.s;
   4274
   4275 soft:
   4276    return soft_f64_sqrt(ua.s, s);
   4277}
   4278
   4279bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
   4280{
   4281    FloatParts64 p;
   4282
   4283    bfloat16_unpack_canonical(&p, a, status);
   4284    parts_sqrt(&p, status, &bfloat16_params);
   4285    return bfloat16_round_pack_canonical(&p, status);
   4286}
   4287
   4288float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
   4289{
   4290    FloatParts128 p;
   4291
   4292    float128_unpack_canonical(&p, a, status);
   4293    parts_sqrt(&p, status, &float128_params);
   4294    return float128_round_pack_canonical(&p, status);
   4295}
   4296
   4297floatx80 floatx80_sqrt(floatx80 a, float_status *s)
   4298{
   4299    FloatParts128 p;
   4300
   4301    if (!floatx80_unpack_canonical(&p, a, s)) {
   4302        return floatx80_default_nan(s);
   4303    }
   4304    parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
   4305    return floatx80_round_pack_canonical(&p, s);
   4306}
   4307
   4308/*
   4309 * log2
   4310 */
   4311float32 float32_log2(float32 a, float_status *status)
   4312{
   4313    FloatParts64 p;
   4314
   4315    float32_unpack_canonical(&p, a, status);
   4316    parts_log2(&p, status, &float32_params);
   4317    return float32_round_pack_canonical(&p, status);
   4318}
   4319
   4320float64 float64_log2(float64 a, float_status *status)
   4321{
   4322    FloatParts64 p;
   4323
   4324    float64_unpack_canonical(&p, a, status);
   4325    parts_log2(&p, status, &float64_params);
   4326    return float64_round_pack_canonical(&p, status);
   4327}
   4328
   4329/*----------------------------------------------------------------------------
   4330| The pattern for a default generated NaN.
   4331*----------------------------------------------------------------------------*/
   4332
   4333float16 float16_default_nan(float_status *status)
   4334{
   4335    FloatParts64 p;
   4336
   4337    parts_default_nan(&p, status);
   4338    p.frac >>= float16_params.frac_shift;
   4339    return float16_pack_raw(&p);
   4340}
   4341
   4342float32 float32_default_nan(float_status *status)
   4343{
   4344    FloatParts64 p;
   4345
   4346    parts_default_nan(&p, status);
   4347    p.frac >>= float32_params.frac_shift;
   4348    return float32_pack_raw(&p);
   4349}
   4350
   4351float64 float64_default_nan(float_status *status)
   4352{
   4353    FloatParts64 p;
   4354
   4355    parts_default_nan(&p, status);
   4356    p.frac >>= float64_params.frac_shift;
   4357    return float64_pack_raw(&p);
   4358}
   4359
   4360float128 float128_default_nan(float_status *status)
   4361{
   4362    FloatParts128 p;
   4363
   4364    parts_default_nan(&p, status);
   4365    frac_shr(&p, float128_params.frac_shift);
   4366    return float128_pack_raw(&p);
   4367}
   4368
   4369bfloat16 bfloat16_default_nan(float_status *status)
   4370{
   4371    FloatParts64 p;
   4372
   4373    parts_default_nan(&p, status);
   4374    p.frac >>= bfloat16_params.frac_shift;
   4375    return bfloat16_pack_raw(&p);
   4376}
   4377
   4378/*----------------------------------------------------------------------------
   4379| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
   4380*----------------------------------------------------------------------------*/
   4381
   4382float16 float16_silence_nan(float16 a, float_status *status)
   4383{
   4384    FloatParts64 p;
   4385
   4386    float16_unpack_raw(&p, a);
   4387    p.frac <<= float16_params.frac_shift;
   4388    parts_silence_nan(&p, status);
   4389    p.frac >>= float16_params.frac_shift;
   4390    return float16_pack_raw(&p);
   4391}
   4392
   4393float32 float32_silence_nan(float32 a, float_status *status)
   4394{
   4395    FloatParts64 p;
   4396
   4397    float32_unpack_raw(&p, a);
   4398    p.frac <<= float32_params.frac_shift;
   4399    parts_silence_nan(&p, status);
   4400    p.frac >>= float32_params.frac_shift;
   4401    return float32_pack_raw(&p);
   4402}
   4403
   4404float64 float64_silence_nan(float64 a, float_status *status)
   4405{
   4406    FloatParts64 p;
   4407
   4408    float64_unpack_raw(&p, a);
   4409    p.frac <<= float64_params.frac_shift;
   4410    parts_silence_nan(&p, status);
   4411    p.frac >>= float64_params.frac_shift;
   4412    return float64_pack_raw(&p);
   4413}
   4414
   4415bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
   4416{
   4417    FloatParts64 p;
   4418
   4419    bfloat16_unpack_raw(&p, a);
   4420    p.frac <<= bfloat16_params.frac_shift;
   4421    parts_silence_nan(&p, status);
   4422    p.frac >>= bfloat16_params.frac_shift;
   4423    return bfloat16_pack_raw(&p);
   4424}
   4425
   4426float128 float128_silence_nan(float128 a, float_status *status)
   4427{
   4428    FloatParts128 p;
   4429
   4430    float128_unpack_raw(&p, a);
   4431    frac_shl(&p, float128_params.frac_shift);
   4432    parts_silence_nan(&p, status);
   4433    frac_shr(&p, float128_params.frac_shift);
   4434    return float128_pack_raw(&p);
   4435}
   4436
   4437/*----------------------------------------------------------------------------
   4438| If `a' is denormal and we are in flush-to-zero mode then set the
   4439| input-denormal exception and return zero. Otherwise just return the value.
   4440*----------------------------------------------------------------------------*/
   4441
   4442static bool parts_squash_denormal(FloatParts64 p, float_status *status)
   4443{
   4444    if (p.exp == 0 && p.frac != 0) {
   4445        float_raise(float_flag_input_denormal, status);
   4446        return true;
   4447    }
   4448
   4449    return false;
   4450}
   4451
   4452float16 float16_squash_input_denormal(float16 a, float_status *status)
   4453{
   4454    if (status->flush_inputs_to_zero) {
   4455        FloatParts64 p;
   4456
   4457        float16_unpack_raw(&p, a);
   4458        if (parts_squash_denormal(p, status)) {
   4459            return float16_set_sign(float16_zero, p.sign);
   4460        }
   4461    }
   4462    return a;
   4463}
   4464
   4465float32 float32_squash_input_denormal(float32 a, float_status *status)
   4466{
   4467    if (status->flush_inputs_to_zero) {
   4468        FloatParts64 p;
   4469
   4470        float32_unpack_raw(&p, a);
   4471        if (parts_squash_denormal(p, status)) {
   4472            return float32_set_sign(float32_zero, p.sign);
   4473        }
   4474    }
   4475    return a;
   4476}
   4477
   4478float64 float64_squash_input_denormal(float64 a, float_status *status)
   4479{
   4480    if (status->flush_inputs_to_zero) {
   4481        FloatParts64 p;
   4482
   4483        float64_unpack_raw(&p, a);
   4484        if (parts_squash_denormal(p, status)) {
   4485            return float64_set_sign(float64_zero, p.sign);
   4486        }
   4487    }
   4488    return a;
   4489}
   4490
   4491bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
   4492{
   4493    if (status->flush_inputs_to_zero) {
   4494        FloatParts64 p;
   4495
   4496        bfloat16_unpack_raw(&p, a);
   4497        if (parts_squash_denormal(p, status)) {
   4498            return bfloat16_set_sign(bfloat16_zero, p.sign);
   4499        }
   4500    }
   4501    return a;
   4502}
   4503
   4504/*----------------------------------------------------------------------------
   4505| Normalizes the subnormal extended double-precision floating-point value
   4506| represented by the denormalized significand `aSig'.  The normalized exponent
   4507| and significand are stored at the locations pointed to by `zExpPtr' and
   4508| `zSigPtr', respectively.
   4509*----------------------------------------------------------------------------*/
   4510
   4511void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
   4512                                uint64_t *zSigPtr)
   4513{
   4514    int8_t shiftCount;
   4515
   4516    shiftCount = clz64(aSig);
   4517    *zSigPtr = aSig<<shiftCount;
   4518    *zExpPtr = 1 - shiftCount;
   4519}
   4520
   4521/*----------------------------------------------------------------------------
   4522| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
   4523| and extended significand formed by the concatenation of `zSig0' and `zSig1',
   4524| and returns the proper extended double-precision floating-point value
   4525| corresponding to the abstract input.  Ordinarily, the abstract value is
   4526| rounded and packed into the extended double-precision format, with the
   4527| inexact exception raised if the abstract input cannot be represented
   4528| exactly.  However, if the abstract value is too large, the overflow and
   4529| inexact exceptions are raised and an infinity or maximal finite value is
   4530| returned.  If the abstract value is too small, the input value is rounded to
   4531| a subnormal number, and the underflow and inexact exceptions are raised if
   4532| the abstract input cannot be represented exactly as a subnormal extended
   4533| double-precision floating-point number.
   4534|     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
   4535| the result is rounded to the same number of bits as single or double
   4536| precision, respectively.  Otherwise, the result is rounded to the full
   4537| precision of the extended double-precision format.
   4538|     The input significand must be normalized or smaller.  If the input
   4539| significand is not normalized, `zExp' must be 0; in that case, the result
   4540| returned is a subnormal number, and it must not require rounding.  The
   4541| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
   4542| Floating-Point Arithmetic.
   4543*----------------------------------------------------------------------------*/
   4544
   4545floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
   4546                              int32_t zExp, uint64_t zSig0, uint64_t zSig1,
   4547                              float_status *status)
   4548{
   4549    FloatRoundMode roundingMode;
   4550    bool roundNearestEven, increment, isTiny;
   4551    int64_t roundIncrement, roundMask, roundBits;
   4552
   4553    roundingMode = status->float_rounding_mode;
   4554    roundNearestEven = ( roundingMode == float_round_nearest_even );
   4555    switch (roundingPrecision) {
   4556    case floatx80_precision_x:
   4557        goto precision80;
   4558    case floatx80_precision_d:
   4559        roundIncrement = UINT64_C(0x0000000000000400);
   4560        roundMask = UINT64_C(0x00000000000007FF);
   4561        break;
   4562    case floatx80_precision_s:
   4563        roundIncrement = UINT64_C(0x0000008000000000);
   4564        roundMask = UINT64_C(0x000000FFFFFFFFFF);
   4565        break;
   4566    default:
   4567        g_assert_not_reached();
   4568    }
   4569    zSig0 |= ( zSig1 != 0 );
   4570    switch (roundingMode) {
   4571    case float_round_nearest_even:
   4572    case float_round_ties_away:
   4573        break;
   4574    case float_round_to_zero:
   4575        roundIncrement = 0;
   4576        break;
   4577    case float_round_up:
   4578        roundIncrement = zSign ? 0 : roundMask;
   4579        break;
   4580    case float_round_down:
   4581        roundIncrement = zSign ? roundMask : 0;
   4582        break;
   4583    default:
   4584        abort();
   4585    }
   4586    roundBits = zSig0 & roundMask;
   4587    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
   4588        if (    ( 0x7FFE < zExp )
   4589             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
   4590           ) {
   4591            goto overflow;
   4592        }
   4593        if ( zExp <= 0 ) {
   4594            if (status->flush_to_zero) {
   4595                float_raise(float_flag_output_denormal, status);
   4596                return packFloatx80(zSign, 0, 0);
   4597            }
   4598            isTiny = status->tininess_before_rounding
   4599                  || (zExp < 0 )
   4600                  || (zSig0 <= zSig0 + roundIncrement);
   4601            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
   4602            zExp = 0;
   4603            roundBits = zSig0 & roundMask;
   4604            if (isTiny && roundBits) {
   4605                float_raise(float_flag_underflow, status);
   4606            }
   4607            if (roundBits) {
   4608                float_raise(float_flag_inexact, status);
   4609            }
   4610            zSig0 += roundIncrement;
   4611            if ( (int64_t) zSig0 < 0 ) zExp = 1;
   4612            roundIncrement = roundMask + 1;
   4613            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
   4614                roundMask |= roundIncrement;
   4615            }
   4616            zSig0 &= ~ roundMask;
   4617            return packFloatx80( zSign, zExp, zSig0 );
   4618        }
   4619    }
   4620    if (roundBits) {
   4621        float_raise(float_flag_inexact, status);
   4622    }
   4623    zSig0 += roundIncrement;
   4624    if ( zSig0 < roundIncrement ) {
   4625        ++zExp;
   4626        zSig0 = UINT64_C(0x8000000000000000);
   4627    }
   4628    roundIncrement = roundMask + 1;
   4629    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
   4630        roundMask |= roundIncrement;
   4631    }
   4632    zSig0 &= ~ roundMask;
   4633    if ( zSig0 == 0 ) zExp = 0;
   4634    return packFloatx80( zSign, zExp, zSig0 );
   4635 precision80:
   4636    switch (roundingMode) {
   4637    case float_round_nearest_even:
   4638    case float_round_ties_away:
   4639        increment = ((int64_t)zSig1 < 0);
   4640        break;
   4641    case float_round_to_zero:
   4642        increment = 0;
   4643        break;
   4644    case float_round_up:
   4645        increment = !zSign && zSig1;
   4646        break;
   4647    case float_round_down:
   4648        increment = zSign && zSig1;
   4649        break;
   4650    default:
   4651        abort();
   4652    }
   4653    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
   4654        if (    ( 0x7FFE < zExp )
   4655             || (    ( zExp == 0x7FFE )
   4656                  && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
   4657                  && increment
   4658                )
   4659           ) {
   4660            roundMask = 0;
   4661 overflow:
   4662            float_raise(float_flag_overflow | float_flag_inexact, status);
   4663            if (    ( roundingMode == float_round_to_zero )
   4664                 || ( zSign && ( roundingMode == float_round_up ) )
   4665                 || ( ! zSign && ( roundingMode == float_round_down ) )
   4666               ) {
   4667                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
   4668            }
   4669            return packFloatx80(zSign,
   4670                                floatx80_infinity_high,
   4671                                floatx80_infinity_low);
   4672        }
   4673        if ( zExp <= 0 ) {
   4674            isTiny = status->tininess_before_rounding
   4675                  || (zExp < 0)
   4676                  || !increment
   4677                  || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
   4678            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
   4679            zExp = 0;
   4680            if (isTiny && zSig1) {
   4681                float_raise(float_flag_underflow, status);
   4682            }
   4683            if (zSig1) {
   4684                float_raise(float_flag_inexact, status);
   4685            }
   4686            switch (roundingMode) {
   4687            case float_round_nearest_even:
   4688            case float_round_ties_away:
   4689                increment = ((int64_t)zSig1 < 0);
   4690                break;
   4691            case float_round_to_zero:
   4692                increment = 0;
   4693                break;
   4694            case float_round_up:
   4695                increment = !zSign && zSig1;
   4696                break;
   4697            case float_round_down:
   4698                increment = zSign && zSig1;
   4699                break;
   4700            default:
   4701                abort();
   4702            }
   4703            if ( increment ) {
   4704                ++zSig0;
   4705                if (!(zSig1 << 1) && roundNearestEven) {
   4706                    zSig0 &= ~1;
   4707                }
   4708                if ( (int64_t) zSig0 < 0 ) zExp = 1;
   4709            }
   4710            return packFloatx80( zSign, zExp, zSig0 );
   4711        }
   4712    }
   4713    if (zSig1) {
   4714        float_raise(float_flag_inexact, status);
   4715    }
   4716    if ( increment ) {
   4717        ++zSig0;
   4718        if ( zSig0 == 0 ) {
   4719            ++zExp;
   4720            zSig0 = UINT64_C(0x8000000000000000);
   4721        }
   4722        else {
   4723            if (!(zSig1 << 1) && roundNearestEven) {
   4724                zSig0 &= ~1;
   4725            }
   4726        }
   4727    }
   4728    else {
   4729        if ( zSig0 == 0 ) zExp = 0;
   4730    }
   4731    return packFloatx80( zSign, zExp, zSig0 );
   4732
   4733}
   4734
   4735/*----------------------------------------------------------------------------
   4736| Takes an abstract floating-point value having sign `zSign', exponent
   4737| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
   4738| and returns the proper extended double-precision floating-point value
   4739| corresponding to the abstract input.  This routine is just like
   4740| `roundAndPackFloatx80' except that the input significand does not have to be
   4741| normalized.
   4742*----------------------------------------------------------------------------*/
   4743
   4744floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
   4745                                       bool zSign, int32_t zExp,
   4746                                       uint64_t zSig0, uint64_t zSig1,
   4747                                       float_status *status)
   4748{
   4749    int8_t shiftCount;
   4750
   4751    if ( zSig0 == 0 ) {
   4752        zSig0 = zSig1;
   4753        zSig1 = 0;
   4754        zExp -= 64;
   4755    }
   4756    shiftCount = clz64(zSig0);
   4757    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
   4758    zExp -= shiftCount;
   4759    return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
   4760                                zSig0, zSig1, status);
   4761
   4762}
   4763
   4764/*----------------------------------------------------------------------------
   4765| Returns the binary exponential of the single-precision floating-point value
   4766| `a'. The operation is performed according to the IEC/IEEE Standard for
   4767| Binary Floating-Point Arithmetic.
   4768|
   4769| Uses the following identities:
   4770|
   4771| 1. -------------------------------------------------------------------------
   4772|      x    x*ln(2)
   4773|     2  = e
   4774|
   4775| 2. -------------------------------------------------------------------------
   4776|                      2     3     4     5           n
   4777|      x        x     x     x     x     x           x
   4778|     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
   4779|               1!    2!    3!    4!    5!          n!
   4780*----------------------------------------------------------------------------*/
   4781
   4782static const float64 float32_exp2_coefficients[15] =
   4783{
   4784    const_float64( 0x3ff0000000000000ll ), /*  1 */
   4785    const_float64( 0x3fe0000000000000ll ), /*  2 */
   4786    const_float64( 0x3fc5555555555555ll ), /*  3 */
   4787    const_float64( 0x3fa5555555555555ll ), /*  4 */
   4788    const_float64( 0x3f81111111111111ll ), /*  5 */
   4789    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
   4790    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
   4791    const_float64( 0x3efa01a01a01a01all ), /*  8 */
   4792    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
   4793    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
   4794    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
   4795    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
   4796    const_float64( 0x3de6124613a86d09ll ), /* 13 */
   4797    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
   4798    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
   4799};
   4800
   4801float32 float32_exp2(float32 a, float_status *status)
   4802{
   4803    FloatParts64 xp, xnp, tp, rp;
   4804    int i;
   4805
   4806    float32_unpack_canonical(&xp, a, status);
   4807    if (unlikely(xp.cls != float_class_normal)) {
   4808        switch (xp.cls) {
   4809        case float_class_snan:
   4810        case float_class_qnan:
   4811            parts_return_nan(&xp, status);
   4812            return float32_round_pack_canonical(&xp, status);
   4813        case float_class_inf:
   4814            return xp.sign ? float32_zero : a;
   4815        case float_class_zero:
   4816            return float32_one;
   4817        default:
   4818            break;
   4819        }
   4820        g_assert_not_reached();
   4821    }
   4822
   4823    float_raise(float_flag_inexact, status);
   4824
   4825    float64_unpack_canonical(&tp, float64_ln2, status);
   4826    xp = *parts_mul(&xp, &tp, status);
   4827    xnp = xp;
   4828
   4829    float64_unpack_canonical(&rp, float64_one, status);
   4830    for (i = 0 ; i < 15 ; i++) {
   4831        float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
   4832        rp = *parts_muladd(&tp, &xp, &rp, 0, status);
   4833        xnp = *parts_mul(&xnp, &xp, status);
   4834    }
   4835
   4836    return float32_round_pack_canonical(&rp, status);
   4837}
   4838
   4839/*----------------------------------------------------------------------------
   4840| Rounds the extended double-precision floating-point value `a'
   4841| to the precision provided by floatx80_rounding_precision and returns the
   4842| result as an extended double-precision floating-point value.
   4843| The operation is performed according to the IEC/IEEE Standard for Binary
   4844| Floating-Point Arithmetic.
   4845*----------------------------------------------------------------------------*/
   4846
   4847floatx80 floatx80_round(floatx80 a, float_status *status)
   4848{
   4849    FloatParts128 p;
   4850
   4851    if (!floatx80_unpack_canonical(&p, a, status)) {
   4852        return floatx80_default_nan(status);
   4853    }
   4854    return floatx80_round_pack_canonical(&p, status);
   4855}
   4856
   4857static void __attribute__((constructor)) softfloat_init(void)
   4858{
   4859    union_float64 ua, ub, uc, ur;
   4860
   4861    if (QEMU_NO_HARDFLOAT) {
   4862        return;
   4863    }
   4864    /*
   4865     * Test that the host's FMA is not obviously broken. For example,
   4866     * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
   4867     *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
   4868     */
   4869    ua.s = 0x0020000000000001ULL;
   4870    ub.s = 0x3ca0000000000000ULL;
   4871    uc.s = 0x0020000000000000ULL;
   4872    ur.h = fma(ua.h, ub.h, uc.h);
   4873    if (ur.s != 0x0020000000000001ULL) {
   4874        force_soft_fma = true;
   4875    }
   4876}