cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

softfloat.c (22912B)


      1/*
      2 * Floating point emulation support for subnormalised numbers on SH4
      3 * architecture This file is derived from the SoftFloat IEC/IEEE
      4 * Floating-point Arithmetic Package, Release 2 the original license of
      5 * which is reproduced below.
      6 *
      7 * ========================================================================
      8 *
      9 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
     10 * Arithmetic Package, Release 2.
     11 *
     12 * Written by John R. Hauser.  This work was made possible in part by the
     13 * International Computer Science Institute, located at Suite 600, 1947 Center
     14 * Street, Berkeley, California 94704.  Funding was partially provided by the
     15 * National Science Foundation under grant MIP-9311980.  The original version
     16 * of this code was written as part of a project to build a fixed-point vector
     17 * processor in collaboration with the University of California at Berkeley,
     18 * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     19 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
     20 * arithmetic/softfloat.html'.
     21 *
     22 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     23 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     24 * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     25 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     26 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     27 *
     28 * Derivative works are acceptable, even for commercial purposes, so long as
     29 * (1) they include prominent notice that the work is derivative, and (2) they
     30 * include prominent notice akin to these three paragraphs for those parts of
     31 * this code that are retained.
     32 *
     33 * ========================================================================
     34 *
     35 * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
     36 * and Kamel Khelifi <kamel.khelifi@st.com>
     37 */
     38#include <linux/kernel.h>
     39#include <cpu/fpu.h>
     40#include <asm/div64.h>
     41
     42#define LIT64( a ) a##LL
     43
     44typedef char flag;
     45typedef unsigned char uint8;
     46typedef signed char int8;
     47typedef int uint16;
     48typedef int int16;
     49typedef unsigned int uint32;
     50typedef signed int int32;
     51
     52typedef unsigned long long int bits64;
     53typedef signed long long int sbits64;
     54
     55typedef unsigned char bits8;
     56typedef signed char sbits8;
     57typedef unsigned short int bits16;
     58typedef signed short int sbits16;
     59typedef unsigned int bits32;
     60typedef signed int sbits32;
     61
     62typedef unsigned long long int uint64;
     63typedef signed long long int int64;
     64
     65typedef unsigned long int float32;
     66typedef unsigned long long float64;
     67
     68extern void float_raise(unsigned int flags);	/* in fpu.c */
     69extern int float_rounding_mode(void);	/* in fpu.c */
     70
     71bits64 extractFloat64Frac(float64 a);
     72flag extractFloat64Sign(float64 a);
     73int16 extractFloat64Exp(float64 a);
     74int16 extractFloat32Exp(float32 a);
     75flag extractFloat32Sign(float32 a);
     76bits32 extractFloat32Frac(float32 a);
     77float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
     78void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
     79float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
     80void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
     81float64 float64_sub(float64 a, float64 b);
     82float32 float32_sub(float32 a, float32 b);
     83float32 float32_add(float32 a, float32 b);
     84float64 float64_add(float64 a, float64 b);
     85float64 float64_div(float64 a, float64 b);
     86float32 float32_div(float32 a, float32 b);
     87float32 float32_mul(float32 a, float32 b);
     88float64 float64_mul(float64 a, float64 b);
     89float32 float64_to_float32(float64 a);
     90void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
     91		   bits64 * z1Ptr);
     92void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
     93		   bits64 * z1Ptr);
     94void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
     95
     96static int8 countLeadingZeros32(bits32 a);
     97static int8 countLeadingZeros64(bits64 a);
     98static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
     99					    bits64 zSig);
    100static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
    101static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
    102static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
    103static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
    104					    bits32 zSig);
    105static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
    106static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
    107static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
    108static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
    109				      bits64 * zSigPtr);
    110static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
    111static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
    112				      bits32 * zSigPtr);
    113
    114bits64 extractFloat64Frac(float64 a)
    115{
    116	return a & LIT64(0x000FFFFFFFFFFFFF);
    117}
    118
    119flag extractFloat64Sign(float64 a)
    120{
    121	return a >> 63;
    122}
    123
    124int16 extractFloat64Exp(float64 a)
    125{
    126	return (a >> 52) & 0x7FF;
    127}
    128
    129int16 extractFloat32Exp(float32 a)
    130{
    131	return (a >> 23) & 0xFF;
    132}
    133
    134flag extractFloat32Sign(float32 a)
    135{
    136	return a >> 31;
    137}
    138
    139bits32 extractFloat32Frac(float32 a)
    140{
    141	return a & 0x007FFFFF;
    142}
    143
    144float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
    145{
    146	return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
    147}
    148
    149void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
    150{
    151	bits64 z;
    152
    153	if (count == 0) {
    154		z = a;
    155	} else if (count < 64) {
    156		z = (a >> count) | ((a << ((-count) & 63)) != 0);
    157	} else {
    158		z = (a != 0);
    159	}
    160	*zPtr = z;
    161}
    162
    163static int8 countLeadingZeros32(bits32 a)
    164{
    165	static const int8 countLeadingZerosHigh[] = {
    166		8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
    167		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    168		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    169		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    170		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    171		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    172		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    173		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    174		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    175		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    176		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    177		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    178		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    179		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    180		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    181		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    182	};
    183	int8 shiftCount;
    184
    185	shiftCount = 0;
    186	if (a < 0x10000) {
    187		shiftCount += 16;
    188		a <<= 16;
    189	}
    190	if (a < 0x1000000) {
    191		shiftCount += 8;
    192		a <<= 8;
    193	}
    194	shiftCount += countLeadingZerosHigh[a >> 24];
    195	return shiftCount;
    196
    197}
    198
    199static int8 countLeadingZeros64(bits64 a)
    200{
    201	int8 shiftCount;
    202
    203	shiftCount = 0;
    204	if (a < ((bits64) 1) << 32) {
    205		shiftCount += 32;
    206	} else {
    207		a >>= 32;
    208	}
    209	shiftCount += countLeadingZeros32(a);
    210	return shiftCount;
    211
    212}
    213
    214static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
    215{
    216	int8 shiftCount;
    217
    218	shiftCount = countLeadingZeros64(zSig) - 1;
    219	return roundAndPackFloat64(zSign, zExp - shiftCount,
    220				   zSig << shiftCount);
    221
    222}
    223
    224static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
    225{
    226	int16 aExp, bExp, zExp;
    227	bits64 aSig, bSig, zSig;
    228	int16 expDiff;
    229
    230	aSig = extractFloat64Frac(a);
    231	aExp = extractFloat64Exp(a);
    232	bSig = extractFloat64Frac(b);
    233	bExp = extractFloat64Exp(b);
    234	expDiff = aExp - bExp;
    235	aSig <<= 10;
    236	bSig <<= 10;
    237	if (0 < expDiff)
    238		goto aExpBigger;
    239	if (expDiff < 0)
    240		goto bExpBigger;
    241	if (aExp == 0) {
    242		aExp = 1;
    243		bExp = 1;
    244	}
    245	if (bSig < aSig)
    246		goto aBigger;
    247	if (aSig < bSig)
    248		goto bBigger;
    249	return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
    250      bExpBigger:
    251	if (bExp == 0x7FF) {
    252		return packFloat64(zSign ^ 1, 0x7FF, 0);
    253	}
    254	if (aExp == 0) {
    255		++expDiff;
    256	} else {
    257		aSig |= LIT64(0x4000000000000000);
    258	}
    259	shift64RightJamming(aSig, -expDiff, &aSig);
    260	bSig |= LIT64(0x4000000000000000);
    261      bBigger:
    262	zSig = bSig - aSig;
    263	zExp = bExp;
    264	zSign ^= 1;
    265	goto normalizeRoundAndPack;
    266      aExpBigger:
    267	if (aExp == 0x7FF) {
    268		return a;
    269	}
    270	if (bExp == 0) {
    271		--expDiff;
    272	} else {
    273		bSig |= LIT64(0x4000000000000000);
    274	}
    275	shift64RightJamming(bSig, expDiff, &bSig);
    276	aSig |= LIT64(0x4000000000000000);
    277      aBigger:
    278	zSig = aSig - bSig;
    279	zExp = aExp;
    280      normalizeRoundAndPack:
    281	--zExp;
    282	return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
    283
    284}
    285static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
    286{
    287	int16 aExp, bExp, zExp;
    288	bits64 aSig, bSig, zSig;
    289	int16 expDiff;
    290
    291	aSig = extractFloat64Frac(a);
    292	aExp = extractFloat64Exp(a);
    293	bSig = extractFloat64Frac(b);
    294	bExp = extractFloat64Exp(b);
    295	expDiff = aExp - bExp;
    296	aSig <<= 9;
    297	bSig <<= 9;
    298	if (0 < expDiff) {
    299		if (aExp == 0x7FF) {
    300			return a;
    301		}
    302		if (bExp == 0) {
    303			--expDiff;
    304		} else {
    305			bSig |= LIT64(0x2000000000000000);
    306		}
    307		shift64RightJamming(bSig, expDiff, &bSig);
    308		zExp = aExp;
    309	} else if (expDiff < 0) {
    310		if (bExp == 0x7FF) {
    311			return packFloat64(zSign, 0x7FF, 0);
    312		}
    313		if (aExp == 0) {
    314			++expDiff;
    315		} else {
    316			aSig |= LIT64(0x2000000000000000);
    317		}
    318		shift64RightJamming(aSig, -expDiff, &aSig);
    319		zExp = bExp;
    320	} else {
    321		if (aExp == 0x7FF) {
    322			return a;
    323		}
    324		if (aExp == 0)
    325			return packFloat64(zSign, 0, (aSig + bSig) >> 9);
    326		zSig = LIT64(0x4000000000000000) + aSig + bSig;
    327		zExp = aExp;
    328		goto roundAndPack;
    329	}
    330	aSig |= LIT64(0x2000000000000000);
    331	zSig = (aSig + bSig) << 1;
    332	--zExp;
    333	if ((sbits64) zSig < 0) {
    334		zSig = aSig + bSig;
    335		++zExp;
    336	}
    337      roundAndPack:
    338	return roundAndPackFloat64(zSign, zExp, zSig);
    339
    340}
    341
    342float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
    343{
    344	return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
    345}
    346
    347void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
    348{
    349	bits32 z;
    350	if (count == 0) {
    351		z = a;
    352	} else if (count < 32) {
    353		z = (a >> count) | ((a << ((-count) & 31)) != 0);
    354	} else {
    355		z = (a != 0);
    356	}
    357	*zPtr = z;
    358}
    359
    360static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
    361{
    362	flag roundNearestEven;
    363	int8 roundIncrement, roundBits;
    364	flag isTiny;
    365
    366	/* SH4 has only 2 rounding modes - round to nearest and round to zero */
    367	roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
    368	roundIncrement = 0x40;
    369	if (!roundNearestEven) {
    370		roundIncrement = 0;
    371	}
    372	roundBits = zSig & 0x7F;
    373	if (0xFD <= (bits16) zExp) {
    374		if ((0xFD < zExp)
    375		    || ((zExp == 0xFD)
    376			&& ((sbits32) (zSig + roundIncrement) < 0))
    377		    ) {
    378			float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
    379			return packFloat32(zSign, 0xFF,
    380					   0) - (roundIncrement == 0);
    381		}
    382		if (zExp < 0) {
    383			isTiny = (zExp < -1)
    384			    || (zSig + roundIncrement < 0x80000000);
    385			shift32RightJamming(zSig, -zExp, &zSig);
    386			zExp = 0;
    387			roundBits = zSig & 0x7F;
    388			if (isTiny && roundBits)
    389				float_raise(FPSCR_CAUSE_UNDERFLOW);
    390		}
    391	}
    392	if (roundBits)
    393		float_raise(FPSCR_CAUSE_INEXACT);
    394	zSig = (zSig + roundIncrement) >> 7;
    395	zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
    396	if (zSig == 0)
    397		zExp = 0;
    398	return packFloat32(zSign, zExp, zSig);
    399
    400}
    401
    402static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
    403{
    404	int8 shiftCount;
    405
    406	shiftCount = countLeadingZeros32(zSig) - 1;
    407	return roundAndPackFloat32(zSign, zExp - shiftCount,
    408				   zSig << shiftCount);
    409}
    410
    411static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
    412{
    413	flag roundNearestEven;
    414	int16 roundIncrement, roundBits;
    415	flag isTiny;
    416
    417	/* SH4 has only 2 rounding modes - round to nearest and round to zero */
    418	roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
    419	roundIncrement = 0x200;
    420	if (!roundNearestEven) {
    421		roundIncrement = 0;
    422	}
    423	roundBits = zSig & 0x3FF;
    424	if (0x7FD <= (bits16) zExp) {
    425		if ((0x7FD < zExp)
    426		    || ((zExp == 0x7FD)
    427			&& ((sbits64) (zSig + roundIncrement) < 0))
    428		    ) {
    429			float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
    430			return packFloat64(zSign, 0x7FF,
    431					   0) - (roundIncrement == 0);
    432		}
    433		if (zExp < 0) {
    434			isTiny = (zExp < -1)
    435			    || (zSig + roundIncrement <
    436				LIT64(0x8000000000000000));
    437			shift64RightJamming(zSig, -zExp, &zSig);
    438			zExp = 0;
    439			roundBits = zSig & 0x3FF;
    440			if (isTiny && roundBits)
    441				float_raise(FPSCR_CAUSE_UNDERFLOW);
    442		}
    443	}
    444	if (roundBits)
    445		float_raise(FPSCR_CAUSE_INEXACT);
    446	zSig = (zSig + roundIncrement) >> 10;
    447	zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
    448	if (zSig == 0)
    449		zExp = 0;
    450	return packFloat64(zSign, zExp, zSig);
    451
    452}
    453
    454static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
    455{
    456	int16 aExp, bExp, zExp;
    457	bits32 aSig, bSig, zSig;
    458	int16 expDiff;
    459
    460	aSig = extractFloat32Frac(a);
    461	aExp = extractFloat32Exp(a);
    462	bSig = extractFloat32Frac(b);
    463	bExp = extractFloat32Exp(b);
    464	expDiff = aExp - bExp;
    465	aSig <<= 7;
    466	bSig <<= 7;
    467	if (0 < expDiff)
    468		goto aExpBigger;
    469	if (expDiff < 0)
    470		goto bExpBigger;
    471	if (aExp == 0) {
    472		aExp = 1;
    473		bExp = 1;
    474	}
    475	if (bSig < aSig)
    476		goto aBigger;
    477	if (aSig < bSig)
    478		goto bBigger;
    479	return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
    480      bExpBigger:
    481	if (bExp == 0xFF) {
    482		return packFloat32(zSign ^ 1, 0xFF, 0);
    483	}
    484	if (aExp == 0) {
    485		++expDiff;
    486	} else {
    487		aSig |= 0x40000000;
    488	}
    489	shift32RightJamming(aSig, -expDiff, &aSig);
    490	bSig |= 0x40000000;
    491      bBigger:
    492	zSig = bSig - aSig;
    493	zExp = bExp;
    494	zSign ^= 1;
    495	goto normalizeRoundAndPack;
    496      aExpBigger:
    497	if (aExp == 0xFF) {
    498		return a;
    499	}
    500	if (bExp == 0) {
    501		--expDiff;
    502	} else {
    503		bSig |= 0x40000000;
    504	}
    505	shift32RightJamming(bSig, expDiff, &bSig);
    506	aSig |= 0x40000000;
    507      aBigger:
    508	zSig = aSig - bSig;
    509	zExp = aExp;
    510      normalizeRoundAndPack:
    511	--zExp;
    512	return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
    513
    514}
    515
    516static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
    517{
    518	int16 aExp, bExp, zExp;
    519	bits32 aSig, bSig, zSig;
    520	int16 expDiff;
    521
    522	aSig = extractFloat32Frac(a);
    523	aExp = extractFloat32Exp(a);
    524	bSig = extractFloat32Frac(b);
    525	bExp = extractFloat32Exp(b);
    526	expDiff = aExp - bExp;
    527	aSig <<= 6;
    528	bSig <<= 6;
    529	if (0 < expDiff) {
    530		if (aExp == 0xFF) {
    531			return a;
    532		}
    533		if (bExp == 0) {
    534			--expDiff;
    535		} else {
    536			bSig |= 0x20000000;
    537		}
    538		shift32RightJamming(bSig, expDiff, &bSig);
    539		zExp = aExp;
    540	} else if (expDiff < 0) {
    541		if (bExp == 0xFF) {
    542			return packFloat32(zSign, 0xFF, 0);
    543		}
    544		if (aExp == 0) {
    545			++expDiff;
    546		} else {
    547			aSig |= 0x20000000;
    548		}
    549		shift32RightJamming(aSig, -expDiff, &aSig);
    550		zExp = bExp;
    551	} else {
    552		if (aExp == 0xFF) {
    553			return a;
    554		}
    555		if (aExp == 0)
    556			return packFloat32(zSign, 0, (aSig + bSig) >> 6);
    557		zSig = 0x40000000 + aSig + bSig;
    558		zExp = aExp;
    559		goto roundAndPack;
    560	}
    561	aSig |= 0x20000000;
    562	zSig = (aSig + bSig) << 1;
    563	--zExp;
    564	if ((sbits32) zSig < 0) {
    565		zSig = aSig + bSig;
    566		++zExp;
    567	}
    568      roundAndPack:
    569	return roundAndPackFloat32(zSign, zExp, zSig);
    570
    571}
    572
    573float64 float64_sub(float64 a, float64 b)
    574{
    575	flag aSign, bSign;
    576
    577	aSign = extractFloat64Sign(a);
    578	bSign = extractFloat64Sign(b);
    579	if (aSign == bSign) {
    580		return subFloat64Sigs(a, b, aSign);
    581	} else {
    582		return addFloat64Sigs(a, b, aSign);
    583	}
    584
    585}
    586
    587float32 float32_sub(float32 a, float32 b)
    588{
    589	flag aSign, bSign;
    590
    591	aSign = extractFloat32Sign(a);
    592	bSign = extractFloat32Sign(b);
    593	if (aSign == bSign) {
    594		return subFloat32Sigs(a, b, aSign);
    595	} else {
    596		return addFloat32Sigs(a, b, aSign);
    597	}
    598
    599}
    600
    601float32 float32_add(float32 a, float32 b)
    602{
    603	flag aSign, bSign;
    604
    605	aSign = extractFloat32Sign(a);
    606	bSign = extractFloat32Sign(b);
    607	if (aSign == bSign) {
    608		return addFloat32Sigs(a, b, aSign);
    609	} else {
    610		return subFloat32Sigs(a, b, aSign);
    611	}
    612
    613}
    614
    615float64 float64_add(float64 a, float64 b)
    616{
    617	flag aSign, bSign;
    618
    619	aSign = extractFloat64Sign(a);
    620	bSign = extractFloat64Sign(b);
    621	if (aSign == bSign) {
    622		return addFloat64Sigs(a, b, aSign);
    623	} else {
    624		return subFloat64Sigs(a, b, aSign);
    625	}
    626}
    627
    628static void
    629normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
    630{
    631	int8 shiftCount;
    632
    633	shiftCount = countLeadingZeros64(aSig) - 11;
    634	*zSigPtr = aSig << shiftCount;
    635	*zExpPtr = 1 - shiftCount;
    636}
    637
    638void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
    639		   bits64 * z1Ptr)
    640{
    641	bits64 z1;
    642
    643	z1 = a1 + b1;
    644	*z1Ptr = z1;
    645	*z0Ptr = a0 + b0 + (z1 < a1);
    646}
    647
    648void
    649sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
    650       bits64 * z1Ptr)
    651{
    652	*z1Ptr = a1 - b1;
    653	*z0Ptr = a0 - b0 - (a1 < b1);
    654}
    655
    656static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
    657{
    658	bits64 b0, b1;
    659	bits64 rem0, rem1, term0, term1;
    660	bits64 z, tmp;
    661	if (b <= a0)
    662		return LIT64(0xFFFFFFFFFFFFFFFF);
    663	b0 = b >> 32;
    664	tmp = a0;
    665	do_div(tmp, b0);
    666
    667	z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : tmp << 32;
    668	mul64To128(b, z, &term0, &term1);
    669	sub128(a0, a1, term0, term1, &rem0, &rem1);
    670	while (((sbits64) rem0) < 0) {
    671		z -= LIT64(0x100000000);
    672		b1 = b << 32;
    673		add128(rem0, rem1, b0, b1, &rem0, &rem1);
    674	}
    675	rem0 = (rem0 << 32) | (rem1 >> 32);
    676	tmp = rem0;
    677	do_div(tmp, b0);
    678	z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : tmp;
    679	return z;
    680}
    681
    682void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
    683{
    684	bits32 aHigh, aLow, bHigh, bLow;
    685	bits64 z0, zMiddleA, zMiddleB, z1;
    686
    687	aLow = a;
    688	aHigh = a >> 32;
    689	bLow = b;
    690	bHigh = b >> 32;
    691	z1 = ((bits64) aLow) * bLow;
    692	zMiddleA = ((bits64) aLow) * bHigh;
    693	zMiddleB = ((bits64) aHigh) * bLow;
    694	z0 = ((bits64) aHigh) * bHigh;
    695	zMiddleA += zMiddleB;
    696	z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
    697	zMiddleA <<= 32;
    698	z1 += zMiddleA;
    699	z0 += (z1 < zMiddleA);
    700	*z1Ptr = z1;
    701	*z0Ptr = z0;
    702
    703}
    704
    705static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
    706				      bits32 * zSigPtr)
    707{
    708	int8 shiftCount;
    709
    710	shiftCount = countLeadingZeros32(aSig) - 8;
    711	*zSigPtr = aSig << shiftCount;
    712	*zExpPtr = 1 - shiftCount;
    713
    714}
    715
    716float64 float64_div(float64 a, float64 b)
    717{
    718	flag aSign, bSign, zSign;
    719	int16 aExp, bExp, zExp;
    720	bits64 aSig, bSig, zSig;
    721	bits64 rem0, rem1;
    722	bits64 term0, term1;
    723
    724	aSig = extractFloat64Frac(a);
    725	aExp = extractFloat64Exp(a);
    726	aSign = extractFloat64Sign(a);
    727	bSig = extractFloat64Frac(b);
    728	bExp = extractFloat64Exp(b);
    729	bSign = extractFloat64Sign(b);
    730	zSign = aSign ^ bSign;
    731	if (aExp == 0x7FF) {
    732		if (bExp == 0x7FF) {
    733		}
    734		return packFloat64(zSign, 0x7FF, 0);
    735	}
    736	if (bExp == 0x7FF) {
    737		return packFloat64(zSign, 0, 0);
    738	}
    739	if (bExp == 0) {
    740		if (bSig == 0) {
    741			if ((aExp | aSig) == 0) {
    742				float_raise(FPSCR_CAUSE_INVALID);
    743			}
    744			return packFloat64(zSign, 0x7FF, 0);
    745		}
    746		normalizeFloat64Subnormal(bSig, &bExp, &bSig);
    747	}
    748	if (aExp == 0) {
    749		if (aSig == 0)
    750			return packFloat64(zSign, 0, 0);
    751		normalizeFloat64Subnormal(aSig, &aExp, &aSig);
    752	}
    753	zExp = aExp - bExp + 0x3FD;
    754	aSig = (aSig | LIT64(0x0010000000000000)) << 10;
    755	bSig = (bSig | LIT64(0x0010000000000000)) << 11;
    756	if (bSig <= (aSig + aSig)) {
    757		aSig >>= 1;
    758		++zExp;
    759	}
    760	zSig = estimateDiv128To64(aSig, 0, bSig);
    761	if ((zSig & 0x1FF) <= 2) {
    762		mul64To128(bSig, zSig, &term0, &term1);
    763		sub128(aSig, 0, term0, term1, &rem0, &rem1);
    764		while ((sbits64) rem0 < 0) {
    765			--zSig;
    766			add128(rem0, rem1, 0, bSig, &rem0, &rem1);
    767		}
    768		zSig |= (rem1 != 0);
    769	}
    770	return roundAndPackFloat64(zSign, zExp, zSig);
    771
    772}
    773
    774float32 float32_div(float32 a, float32 b)
    775{
    776	flag aSign, bSign, zSign;
    777	int16 aExp, bExp, zExp;
    778	bits32 aSig, bSig;
    779	uint64_t zSig;
    780
    781	aSig = extractFloat32Frac(a);
    782	aExp = extractFloat32Exp(a);
    783	aSign = extractFloat32Sign(a);
    784	bSig = extractFloat32Frac(b);
    785	bExp = extractFloat32Exp(b);
    786	bSign = extractFloat32Sign(b);
    787	zSign = aSign ^ bSign;
    788	if (aExp == 0xFF) {
    789		if (bExp == 0xFF) {
    790		}
    791		return packFloat32(zSign, 0xFF, 0);
    792	}
    793	if (bExp == 0xFF) {
    794		return packFloat32(zSign, 0, 0);
    795	}
    796	if (bExp == 0) {
    797		if (bSig == 0) {
    798			return packFloat32(zSign, 0xFF, 0);
    799		}
    800		normalizeFloat32Subnormal(bSig, &bExp, &bSig);
    801	}
    802	if (aExp == 0) {
    803		if (aSig == 0)
    804			return packFloat32(zSign, 0, 0);
    805		normalizeFloat32Subnormal(aSig, &aExp, &aSig);
    806	}
    807	zExp = aExp - bExp + 0x7D;
    808	aSig = (aSig | 0x00800000) << 7;
    809	bSig = (bSig | 0x00800000) << 8;
    810	if (bSig <= (aSig + aSig)) {
    811		aSig >>= 1;
    812		++zExp;
    813	}
    814	zSig = (((bits64) aSig) << 32);
    815	do_div(zSig, bSig);
    816
    817	if ((zSig & 0x3F) == 0) {
    818		zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
    819	}
    820	return roundAndPackFloat32(zSign, zExp, (bits32)zSig);
    821
    822}
    823
    824float32 float32_mul(float32 a, float32 b)
    825{
    826	char aSign, bSign, zSign;
    827	int aExp, bExp, zExp;
    828	unsigned int aSig, bSig;
    829	unsigned long long zSig64;
    830	unsigned int zSig;
    831
    832	aSig = extractFloat32Frac(a);
    833	aExp = extractFloat32Exp(a);
    834	aSign = extractFloat32Sign(a);
    835	bSig = extractFloat32Frac(b);
    836	bExp = extractFloat32Exp(b);
    837	bSign = extractFloat32Sign(b);
    838	zSign = aSign ^ bSign;
    839	if (aExp == 0) {
    840		if (aSig == 0)
    841			return packFloat32(zSign, 0, 0);
    842		normalizeFloat32Subnormal(aSig, &aExp, &aSig);
    843	}
    844	if (bExp == 0) {
    845		if (bSig == 0)
    846			return packFloat32(zSign, 0, 0);
    847		normalizeFloat32Subnormal(bSig, &bExp, &bSig);
    848	}
    849	if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
    850		return roundAndPackFloat32(zSign, 0xff, 0);
    851
    852	zExp = aExp + bExp - 0x7F;
    853	aSig = (aSig | 0x00800000) << 7;
    854	bSig = (bSig | 0x00800000) << 8;
    855	shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
    856	zSig = zSig64;
    857	if (0 <= (signed int)(zSig << 1)) {
    858		zSig <<= 1;
    859		--zExp;
    860	}
    861	return roundAndPackFloat32(zSign, zExp, zSig);
    862
    863}
    864
    865float64 float64_mul(float64 a, float64 b)
    866{
    867	char aSign, bSign, zSign;
    868	int aExp, bExp, zExp;
    869	unsigned long long int aSig, bSig, zSig0, zSig1;
    870
    871	aSig = extractFloat64Frac(a);
    872	aExp = extractFloat64Exp(a);
    873	aSign = extractFloat64Sign(a);
    874	bSig = extractFloat64Frac(b);
    875	bExp = extractFloat64Exp(b);
    876	bSign = extractFloat64Sign(b);
    877	zSign = aSign ^ bSign;
    878
    879	if (aExp == 0) {
    880		if (aSig == 0)
    881			return packFloat64(zSign, 0, 0);
    882		normalizeFloat64Subnormal(aSig, &aExp, &aSig);
    883	}
    884	if (bExp == 0) {
    885		if (bSig == 0)
    886			return packFloat64(zSign, 0, 0);
    887		normalizeFloat64Subnormal(bSig, &bExp, &bSig);
    888	}
    889	if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
    890		return roundAndPackFloat64(zSign, 0x7ff, 0);
    891
    892	zExp = aExp + bExp - 0x3FF;
    893	aSig = (aSig | 0x0010000000000000LL) << 10;
    894	bSig = (bSig | 0x0010000000000000LL) << 11;
    895	mul64To128(aSig, bSig, &zSig0, &zSig1);
    896	zSig0 |= (zSig1 != 0);
    897	if (0 <= (signed long long int)(zSig0 << 1)) {
    898		zSig0 <<= 1;
    899		--zExp;
    900	}
    901	return roundAndPackFloat64(zSign, zExp, zSig0);
    902}
    903
    904/*
    905 * -------------------------------------------------------------------------------
    906 *  Returns the result of converting the double-precision floating-point value
    907 *  `a' to the single-precision floating-point format.  The conversion is
    908 *  performed according to the IEC/IEEE Standard for Binary Floating-point
    909 *  Arithmetic.
    910 *  -------------------------------------------------------------------------------
    911 *  */
    912float32 float64_to_float32(float64 a)
    913{
    914    flag aSign;
    915    int16 aExp;
    916    bits64 aSig;
    917    bits32 zSig;
    918
    919    aSig = extractFloat64Frac( a );
    920    aExp = extractFloat64Exp( a );
    921    aSign = extractFloat64Sign( a );
    922
    923    shift64RightJamming( aSig, 22, &aSig );
    924    zSig = aSig;
    925    if ( aExp || zSig ) {
    926        zSig |= 0x40000000;
    927        aExp -= 0x381;
    928    }
    929    return roundAndPackFloat32(aSign, aExp, zSig);
    930}