cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

softfloat-macros (24305B)


      1
      2/*
      3===============================================================================
      4
      5This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
      6Arithmetic Package, Release 2.
      7
      8Written by John R. Hauser.  This work was made possible in part by the
      9International Computer Science Institute, located at Suite 600, 1947 Center
     10Street, Berkeley, California 94704.  Funding was partially provided by the
     11National Science Foundation under grant MIP-9311980.  The original version
     12of this code was written as part of a project to build a fixed-point vector
     13processor in collaboration with the University of California at Berkeley,
     14overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
     15is available through the web page
     16http://www.jhauser.us/arithmetic/SoftFloat-2b/SoftFloat-source.txt
     17
     18THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
     19has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
     20TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
     21PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
     22AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
     23
     24Derivative works are acceptable, even for commercial purposes, so long as
     25(1) they include prominent notice that the work is derivative, and (2) they
     26include prominent notice akin to these three paragraphs for those parts of
     27this code that are retained.
     28
     29===============================================================================
     30*/
     31
     32/*
     33-------------------------------------------------------------------------------
     34Shifts `a' right by the number of bits given in `count'.  If any nonzero
     35bits are shifted off, they are ``jammed'' into the least significant bit of
     36the result by setting the least significant bit to 1.  The value of `count'
     37can be arbitrarily large; in particular, if `count' is greater than 32, the
     38result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     39The result is stored in the location pointed to by `zPtr'.
     40-------------------------------------------------------------------------------
     41*/
     42INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
     43{
     44    bits32 z;
     45    if ( count == 0 ) {
     46        z = a;
     47    }
     48    else if ( count < 32 ) {
     49        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
     50    }
     51    else {
     52        z = ( a != 0 );
     53    }
     54    *zPtr = z;
     55}
     56
     57/*
     58-------------------------------------------------------------------------------
     59Shifts `a' right by the number of bits given in `count'.  If any nonzero
     60bits are shifted off, they are ``jammed'' into the least significant bit of
     61the result by setting the least significant bit to 1.  The value of `count'
     62can be arbitrarily large; in particular, if `count' is greater than 64, the
     63result will be either 0 or 1, depending on whether `a' is zero or nonzero.
     64The result is stored in the location pointed to by `zPtr'.
     65-------------------------------------------------------------------------------
     66*/
     67INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
     68{
     69    bits64 z;
     70
     71 __asm__("@shift64RightJamming -- start");   
     72    if ( count == 0 ) {
     73        z = a;
     74    }
     75    else if ( count < 64 ) {
     76        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
     77    }
     78    else {
     79        z = ( a != 0 );
     80    }
     81 __asm__("@shift64RightJamming -- end");   
     82    *zPtr = z;
     83}
     84
     85/*
     86-------------------------------------------------------------------------------
     87Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
     88_plus_ the number of bits given in `count'.  The shifted result is at most
     8964 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
     90bits shifted off form a second 64-bit result as follows:  The _last_ bit
     91shifted off is the most-significant bit of the extra result, and the other
     9263 bits of the extra result are all zero if and only if _all_but_the_last_
     93bits shifted off were all zero.  This extra result is stored in the location
     94pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
     95    (This routine makes more sense if `a0' and `a1' are considered to form a
     96fixed-point value with binary point between `a0' and `a1'.  This fixed-point
     97value is shifted right by the number of bits given in `count', and the
     98integer part of the result is returned at the location pointed to by
     99`z0Ptr'.  The fractional part of the result may be slightly corrupted as
    100described above, and is returned at the location pointed to by `z1Ptr'.)
    101-------------------------------------------------------------------------------
    102*/
    103INLINE void
    104 shift64ExtraRightJamming(
    105     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    106{
    107    bits64 z0, z1;
    108    int8 negCount = ( - count ) & 63;
    109
    110    if ( count == 0 ) {
    111        z1 = a1;
    112        z0 = a0;
    113    }
    114    else if ( count < 64 ) {
    115        z1 = ( a0<<negCount ) | ( a1 != 0 );
    116        z0 = a0>>count;
    117    }
    118    else {
    119        if ( count == 64 ) {
    120            z1 = a0 | ( a1 != 0 );
    121        }
    122        else {
    123            z1 = ( ( a0 | a1 ) != 0 );
    124        }
    125        z0 = 0;
    126    }
    127    *z1Ptr = z1;
    128    *z0Ptr = z0;
    129
    130}
    131
    132/*
    133-------------------------------------------------------------------------------
    134Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    135number of bits given in `count'.  Any bits shifted off are lost.  The value
    136of `count' can be arbitrarily large; in particular, if `count' is greater
    137than 128, the result will be 0.  The result is broken into two 64-bit pieces
    138which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    139-------------------------------------------------------------------------------
    140*/
    141INLINE void
    142 shift128Right(
    143     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    144{
    145    bits64 z0, z1;
    146    int8 negCount = ( - count ) & 63;
    147
    148    if ( count == 0 ) {
    149        z1 = a1;
    150        z0 = a0;
    151    }
    152    else if ( count < 64 ) {
    153        z1 = ( a0<<negCount ) | ( a1>>count );
    154        z0 = a0>>count;
    155    }
    156    else {
    157        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
    158        z0 = 0;
    159    }
    160    *z1Ptr = z1;
    161    *z0Ptr = z0;
    162
    163}
    164
    165/*
    166-------------------------------------------------------------------------------
    167Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
    168number of bits given in `count'.  If any nonzero bits are shifted off, they
    169are ``jammed'' into the least significant bit of the result by setting the
    170least significant bit to 1.  The value of `count' can be arbitrarily large;
    171in particular, if `count' is greater than 128, the result will be either 0
    172or 1, depending on whether the concatenation of `a0' and `a1' is zero or
    173nonzero.  The result is broken into two 64-bit pieces which are stored at
    174the locations pointed to by `z0Ptr' and `z1Ptr'.
    175-------------------------------------------------------------------------------
    176*/
    177INLINE void
    178 shift128RightJamming(
    179     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    180{
    181    bits64 z0, z1;
    182    int8 negCount = ( - count ) & 63;
    183
    184    if ( count == 0 ) {
    185        z1 = a1;
    186        z0 = a0;
    187    }
    188    else if ( count < 64 ) {
    189        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
    190        z0 = a0>>count;
    191    }
    192    else {
    193        if ( count == 64 ) {
    194            z1 = a0 | ( a1 != 0 );
    195        }
    196        else if ( count < 128 ) {
    197            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
    198        }
    199        else {
    200            z1 = ( ( a0 | a1 ) != 0 );
    201        }
    202        z0 = 0;
    203    }
    204    *z1Ptr = z1;
    205    *z0Ptr = z0;
    206
    207}
    208
    209/*
    210-------------------------------------------------------------------------------
    211Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
    212by 64 _plus_ the number of bits given in `count'.  The shifted result is
    213at most 128 nonzero bits; these are broken into two 64-bit pieces which are
    214stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
    215off form a third 64-bit result as follows:  The _last_ bit shifted off is
    216the most-significant bit of the extra result, and the other 63 bits of the
    217extra result are all zero if and only if _all_but_the_last_ bits shifted off
    218were all zero.  This extra result is stored in the location pointed to by
    219`z2Ptr'.  The value of `count' can be arbitrarily large.
    220    (This routine makes more sense if `a0', `a1', and `a2' are considered
    221to form a fixed-point value with binary point between `a1' and `a2'.  This
    222fixed-point value is shifted right by the number of bits given in `count',
    223and the integer part of the result is returned at the locations pointed to
    224by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
    225corrupted as described above, and is returned at the location pointed to by
    226`z2Ptr'.)
    227-------------------------------------------------------------------------------
    228*/
    229INLINE void
    230 shift128ExtraRightJamming(
    231     bits64 a0,
    232     bits64 a1,
    233     bits64 a2,
    234     int16 count,
    235     bits64 *z0Ptr,
    236     bits64 *z1Ptr,
    237     bits64 *z2Ptr
    238 )
    239{
    240    bits64 z0, z1, z2;
    241    int8 negCount = ( - count ) & 63;
    242
    243    if ( count == 0 ) {
    244        z2 = a2;
    245        z1 = a1;
    246        z0 = a0;
    247    }
    248    else {
    249        if ( count < 64 ) {
    250            z2 = a1<<negCount;
    251            z1 = ( a0<<negCount ) | ( a1>>count );
    252            z0 = a0>>count;
    253        }
    254        else {
    255            if ( count == 64 ) {
    256                z2 = a1;
    257                z1 = a0;
    258            }
    259            else {
    260                a2 |= a1;
    261                if ( count < 128 ) {
    262                    z2 = a0<<negCount;
    263                    z1 = a0>>( count & 63 );
    264                }
    265                else {
    266                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
    267                    z1 = 0;
    268                }
    269            }
    270            z0 = 0;
    271        }
    272        z2 |= ( a2 != 0 );
    273    }
    274    *z2Ptr = z2;
    275    *z1Ptr = z1;
    276    *z0Ptr = z0;
    277
    278}
    279
    280/*
    281-------------------------------------------------------------------------------
    282Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
    283number of bits given in `count'.  Any bits shifted off are lost.  The value
    284of `count' must be less than 64.  The result is broken into two 64-bit
    285pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    286-------------------------------------------------------------------------------
    287*/
    288INLINE void
    289 shortShift128Left(
    290     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
    291{
    292
    293    *z1Ptr = a1<<count;
    294    *z0Ptr =
    295        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
    296
    297}
    298
    299/*
    300-------------------------------------------------------------------------------
    301Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
    302by the number of bits given in `count'.  Any bits shifted off are lost.
    303The value of `count' must be less than 64.  The result is broken into three
    30464-bit pieces which are stored at the locations pointed to by `z0Ptr',
    305`z1Ptr', and `z2Ptr'.
    306-------------------------------------------------------------------------------
    307*/
    308INLINE void
    309 shortShift192Left(
    310     bits64 a0,
    311     bits64 a1,
    312     bits64 a2,
    313     int16 count,
    314     bits64 *z0Ptr,
    315     bits64 *z1Ptr,
    316     bits64 *z2Ptr
    317 )
    318{
    319    bits64 z0, z1, z2;
    320    int8 negCount;
    321
    322    z2 = a2<<count;
    323    z1 = a1<<count;
    324    z0 = a0<<count;
    325    if ( 0 < count ) {
    326        negCount = ( ( - count ) & 63 );
    327        z1 |= a2>>negCount;
    328        z0 |= a1>>negCount;
    329    }
    330    *z2Ptr = z2;
    331    *z1Ptr = z1;
    332    *z0Ptr = z0;
    333
    334}
    335
    336/*
    337-------------------------------------------------------------------------------
    338Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
    339value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
    340any carry out is lost.  The result is broken into two 64-bit pieces which
    341are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
    342-------------------------------------------------------------------------------
    343*/
    344INLINE void
    345 add128(
    346     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
    347{
    348    bits64 z1;
    349
    350    z1 = a1 + b1;
    351    *z1Ptr = z1;
    352    *z0Ptr = a0 + b0 + ( z1 < a1 );
    353
    354}
    355
    356/*
    357-------------------------------------------------------------------------------
    358Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
    359192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
    360modulo 2^192, so any carry out is lost.  The result is broken into three
    36164-bit pieces which are stored at the locations pointed to by `z0Ptr',
    362`z1Ptr', and `z2Ptr'.
    363-------------------------------------------------------------------------------
    364*/
    365INLINE void
    366 add192(
    367     bits64 a0,
    368     bits64 a1,
    369     bits64 a2,
    370     bits64 b0,
    371     bits64 b1,
    372     bits64 b2,
    373     bits64 *z0Ptr,
    374     bits64 *z1Ptr,
    375     bits64 *z2Ptr
    376 )
    377{
    378    bits64 z0, z1, z2;
    379    int8 carry0, carry1;
    380
    381    z2 = a2 + b2;
    382    carry1 = ( z2 < a2 );
    383    z1 = a1 + b1;
    384    carry0 = ( z1 < a1 );
    385    z0 = a0 + b0;
    386    z1 += carry1;
    387    z0 += ( z1 < carry1 );
    388    z0 += carry0;
    389    *z2Ptr = z2;
    390    *z1Ptr = z1;
    391    *z0Ptr = z0;
    392
    393}
    394
    395/*
    396-------------------------------------------------------------------------------
    397Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
    398128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
    3992^128, so any borrow out (carry out) is lost.  The result is broken into two
    40064-bit pieces which are stored at the locations pointed to by `z0Ptr' and
    401`z1Ptr'.
    402-------------------------------------------------------------------------------
    403*/
    404INLINE void
    405 sub128(
    406     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
    407{
    408
    409    *z1Ptr = a1 - b1;
    410    *z0Ptr = a0 - b0 - ( a1 < b1 );
    411
    412}
    413
    414/*
    415-------------------------------------------------------------------------------
    416Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
    417from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
    418Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
    419result is broken into three 64-bit pieces which are stored at the locations
    420pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
    421-------------------------------------------------------------------------------
    422*/
    423INLINE void
    424 sub192(
    425     bits64 a0,
    426     bits64 a1,
    427     bits64 a2,
    428     bits64 b0,
    429     bits64 b1,
    430     bits64 b2,
    431     bits64 *z0Ptr,
    432     bits64 *z1Ptr,
    433     bits64 *z2Ptr
    434 )
    435{
    436    bits64 z0, z1, z2;
    437    int8 borrow0, borrow1;
    438
    439    z2 = a2 - b2;
    440    borrow1 = ( a2 < b2 );
    441    z1 = a1 - b1;
    442    borrow0 = ( a1 < b1 );
    443    z0 = a0 - b0;
    444    z0 -= ( z1 < borrow1 );
    445    z1 -= borrow1;
    446    z0 -= borrow0;
    447    *z2Ptr = z2;
    448    *z1Ptr = z1;
    449    *z0Ptr = z0;
    450
    451}
    452
    453/*
    454-------------------------------------------------------------------------------
    455Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
    456into two 64-bit pieces which are stored at the locations pointed to by
    457`z0Ptr' and `z1Ptr'.
    458-------------------------------------------------------------------------------
    459*/
    460INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
    461{
    462    bits32 aHigh, aLow, bHigh, bLow;
    463    bits64 z0, zMiddleA, zMiddleB, z1;
    464
    465    aLow = a;
    466    aHigh = a>>32;
    467    bLow = b;
    468    bHigh = b>>32;
    469    z1 = ( (bits64) aLow ) * bLow;
    470    zMiddleA = ( (bits64) aLow ) * bHigh;
    471    zMiddleB = ( (bits64) aHigh ) * bLow;
    472    z0 = ( (bits64) aHigh ) * bHigh;
    473    zMiddleA += zMiddleB;
    474    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
    475    zMiddleA <<= 32;
    476    z1 += zMiddleA;
    477    z0 += ( z1 < zMiddleA );
    478    *z1Ptr = z1;
    479    *z0Ptr = z0;
    480
    481}
    482
    483/*
    484-------------------------------------------------------------------------------
    485Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
    486obtain a 192-bit product.  The product is broken into three 64-bit pieces
    487which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
    488`z2Ptr'.
    489-------------------------------------------------------------------------------
    490*/
    491INLINE void
    492 mul128By64To192(
    493     bits64 a0,
    494     bits64 a1,
    495     bits64 b,
    496     bits64 *z0Ptr,
    497     bits64 *z1Ptr,
    498     bits64 *z2Ptr
    499 )
    500{
    501    bits64 z0, z1, z2, more1;
    502
    503    mul64To128( a1, b, &z1, &z2 );
    504    mul64To128( a0, b, &z0, &more1 );
    505    add128( z0, more1, 0, z1, &z0, &z1 );
    506    *z2Ptr = z2;
    507    *z1Ptr = z1;
    508    *z0Ptr = z0;
    509
    510}
    511
    512/*
    513-------------------------------------------------------------------------------
    514Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
    515128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
    516product.  The product is broken into four 64-bit pieces which are stored at
    517the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
    518-------------------------------------------------------------------------------
    519*/
    520INLINE void
    521 mul128To256(
    522     bits64 a0,
    523     bits64 a1,
    524     bits64 b0,
    525     bits64 b1,
    526     bits64 *z0Ptr,
    527     bits64 *z1Ptr,
    528     bits64 *z2Ptr,
    529     bits64 *z3Ptr
    530 )
    531{
    532    bits64 z0, z1, z2, z3;
    533    bits64 more1, more2;
    534
    535    mul64To128( a1, b1, &z2, &z3 );
    536    mul64To128( a1, b0, &z1, &more2 );
    537    add128( z1, more2, 0, z2, &z1, &z2 );
    538    mul64To128( a0, b0, &z0, &more1 );
    539    add128( z0, more1, 0, z1, &z0, &z1 );
    540    mul64To128( a0, b1, &more1, &more2 );
    541    add128( more1, more2, 0, z2, &more1, &z2 );
    542    add128( z0, z1, 0, more1, &z0, &z1 );
    543    *z3Ptr = z3;
    544    *z2Ptr = z2;
    545    *z1Ptr = z1;
    546    *z0Ptr = z0;
    547
    548}
    549
    550/*
    551-------------------------------------------------------------------------------
    552Returns an approximation to the 64-bit integer quotient obtained by dividing
    553`b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
    554divisor `b' must be at least 2^63.  If q is the exact quotient truncated
    555toward zero, the approximation returned lies between q and q + 2 inclusive.
    556If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
    557unsigned integer is returned.
    558-------------------------------------------------------------------------------
    559*/
    560static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
    561{
    562    bits64 b0, b1;
    563    bits64 rem0, rem1, term0, term1;
    564    bits64 z;
    565    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
    566    b0 = b>>32;  /* hence b0 is 32 bits wide now */
    567    if ( b0<<32 <= a0 ) {
    568        z = LIT64( 0xFFFFFFFF00000000 );
    569    }  else {
    570        z = a0;
    571        do_div( z, b0 );
    572        z <<= 32;
    573    }
    574    mul64To128( b, z, &term0, &term1 );
    575    sub128( a0, a1, term0, term1, &rem0, &rem1 );
    576    while ( ( (sbits64) rem0 ) < 0 ) {
    577        z -= LIT64( 0x100000000 );
    578        b1 = b<<32;
    579        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
    580    }
    581    rem0 = ( rem0<<32 ) | ( rem1>>32 );
    582    if ( b0<<32 <= rem0 ) {
    583        z |= 0xFFFFFFFF;
    584    } else {
    585        do_div( rem0, b0 );
    586        z |= rem0;
    587    }
    588    return z;
    589
    590}
    591
    592/*
    593-------------------------------------------------------------------------------
    594Returns an approximation to the square root of the 32-bit significand given
    595by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
    596`aExp' (the least significant bit) is 1, the integer returned approximates
    5972^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
    598is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
    599case, the approximation returned lies strictly within +/-2 of the exact
    600value.
    601-------------------------------------------------------------------------------
    602*/
    603static bits32 estimateSqrt32( int16 aExp, bits32 a )
    604{
    605    static const bits16 sqrtOddAdjustments[] = {
    606        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
    607        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
    608    };
    609    static const bits16 sqrtEvenAdjustments[] = {
    610        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
    611        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
    612    };
    613    int8 index;
    614    bits32 z;
    615    bits64 A;
    616
    617    index = ( a>>27 ) & 15;
    618    if ( aExp & 1 ) {
    619        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
    620        z = ( ( a / z )<<14 ) + ( z<<15 );
    621        a >>= 1;
    622    }
    623    else {
    624        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
    625        z = a / z + z;
    626        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
    627        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
    628    }
    629    A = ( (bits64) a )<<31;
    630    do_div( A, z );
    631    return ( (bits32) A ) + ( z>>1 );
    632
    633}
    634
    635/*
    636-------------------------------------------------------------------------------
    637Returns the number of leading 0 bits before the most-significant 1 bit
    638of `a'.  If `a' is zero, 32 is returned.
    639-------------------------------------------------------------------------------
    640*/
    641static int8 countLeadingZeros32( bits32 a )
    642{
    643    static const int8 countLeadingZerosHigh[] = {
    644        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
    645        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    646        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    647        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    648        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    649        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    650        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    651        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    652        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    653        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    654        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    655        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    656        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    657        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    658        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    659        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    660    };
    661    int8 shiftCount;
    662
    663    shiftCount = 0;
    664    if ( a < 0x10000 ) {
    665        shiftCount += 16;
    666        a <<= 16;
    667    }
    668    if ( a < 0x1000000 ) {
    669        shiftCount += 8;
    670        a <<= 8;
    671    }
    672    shiftCount += countLeadingZerosHigh[ a>>24 ];
    673    return shiftCount;
    674
    675}
    676
    677/*
    678-------------------------------------------------------------------------------
    679Returns the number of leading 0 bits before the most-significant 1 bit
    680of `a'.  If `a' is zero, 64 is returned.
    681-------------------------------------------------------------------------------
    682*/
    683static int8 countLeadingZeros64( bits64 a )
    684{
    685    int8 shiftCount;
    686
    687    shiftCount = 0;
    688    if ( a < ( (bits64) 1 )<<32 ) {
    689        shiftCount += 32;
    690    }
    691    else {
    692        a >>= 32;
    693    }
    694    shiftCount += countLeadingZeros32( a );
    695    return shiftCount;
    696
    697}
    698
    699/*
    700-------------------------------------------------------------------------------
    701Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
    702is equal to the 128-bit value formed by concatenating `b0' and `b1'.
    703Otherwise, returns 0.
    704-------------------------------------------------------------------------------
    705*/
    706INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    707{
    708
    709    return ( a0 == b0 ) && ( a1 == b1 );
    710
    711}
    712
    713/*
    714-------------------------------------------------------------------------------
    715Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    716than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
    717Otherwise, returns 0.
    718-------------------------------------------------------------------------------
    719*/
    720INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    721{
    722
    723    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
    724
    725}
    726
    727/*
    728-------------------------------------------------------------------------------
    729Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
    730than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
    731returns 0.
    732-------------------------------------------------------------------------------
    733*/
    734INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    735{
    736
    737    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
    738
    739}
    740
    741/*
    742-------------------------------------------------------------------------------
    743Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
    744not equal to the 128-bit value formed by concatenating `b0' and `b1'.
    745Otherwise, returns 0.
    746-------------------------------------------------------------------------------
    747*/
    748INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
    749{
    750
    751    return ( a0 != b0 ) || ( a1 != b1 );
    752
    753}
    754