akaros/kern/arch/riscv/softfloat-macros.h
<<
>>
Prefs
   1
   2/*============================================================================

   3

   4This C source fragment is part of the SoftFloat IEC/IEEE Floating-point

   5Arithmetic Package, Release 2b.

   6

   7Written by John R. Hauser.  This work was made possible in part by the

   8International Computer Science Institute, located at Suite 600, 1947 Center

   9Street, Berkeley, California 94704.  Funding was partially provided by the

  10National Science Foundation under grant MIP-9311980.  The original version

  11of this code was written as part of a project to build a fixed-point vector

  12processor in collaboration with the University of California at Berkeley,

  13overseen by Profs. Nelson Morgan and John Wawrzynek.  More information

  14is available through the Web page `http://www.cs.berkeley.edu/~jhauser/

  15arithmetic/SoftFloat.html'.

  16

  17THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has

  18been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES

  19RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS

  20AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,

  21COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE

  22EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE

  23INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR

  24OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.

  25

  26Derivative works are acceptable, even for commercial purposes, so long as

  27(1) the source code for the derivative work includes prominent notice that

  28the work is derivative, and (2) the source code includes prominent notice with

  29these four paragraphs for those parts of this code that are retained.

  30

  31=============================================================================*/
  32
  33/*----------------------------------------------------------------------------

  34| Shifts `a' right by the number of bits given in `count'.  If any nonzero

  35| bits are shifted off, they are ``jammed'' into the least significant bit of

  36| the result by setting the least significant bit to 1.  The value of `count'

  37| can be arbitrarily large; in particular, if `count' is greater than 32, the

  38| result will be either 0 or 1, depending on whether `a' is zero or nonzero.

  39| The result is stored in the location pointed to by `zPtr'.

  40*----------------------------------------------------------------------------*/
  41
  42INLINE void shift32RightJamming( bits32 a, int16_t count, bits32 *zPtr )
  43{
  44    bits32 z;
  45
  46    if ( count == 0 ) {
  47        z = a;
  48    }
  49    else if ( count < 32 ) {
  50        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
  51    }
  52    else {
  53        z = ( a != 0 );
  54    }
  55    *zPtr = z;
  56
  57}
  58
  59/*----------------------------------------------------------------------------

  60| Shifts `a' right by the number of bits given in `count'.  If any nonzero

  61| bits are shifted off, they are ``jammed'' into the least significant bit of

  62| the result by setting the least significant bit to 1.  The value of `count'

  63| can be arbitrarily large; in particular, if `count' is greater than 64, the

  64| result will be either 0 or 1, depending on whether `a' is zero or nonzero.

  65| The result is stored in the location pointed to by `zPtr'.

  66*----------------------------------------------------------------------------*/
  67
  68INLINE void shift64RightJamming( bits64 a, int16_t count, bits64 *zPtr )
  69{
  70    bits64 z;
  71
  72    if ( count == 0 ) {
  73        z = a;
  74    }
  75    else if ( count < 64 ) {
  76        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
  77    }
  78    else {
  79        z = ( a != 0 );
  80    }
  81    *zPtr = z;
  82
  83}
  84
  85/*----------------------------------------------------------------------------

  86| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64

  87| _plus_ the number of bits given in `count'.  The shifted result is at most

  88| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The

  89| bits shifted off form a second 64-bit result as follows:  The _last_ bit

  90| shifted off is the most-significant bit of the extra result, and the other

  91| 63 bits of the extra result are all zero if and only if _all_but_the_last_

  92| bits shifted off were all zero.  This extra result is stored in the location

  93| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.

  94|     (This routine makes more sense if `a0' and `a1' are considered to form

  95| a fixed-point value with binary point between `a0' and `a1'.  This fixed-

  96| point value is shifted right by the number of bits given in `count', and

  97| the integer part of the result is returned at the location pointed to by

  98| `z0Ptr'.  The fractional part of the result may be slightly corrupted as

  99| described above, and is returned at the location pointed to by `z1Ptr'.)

 100*----------------------------------------------------------------------------*/
 101
 102INLINE void
 103 shift64ExtraRightJamming(
 104     bits64 a0, bits64 a1, int16_t count, bits64 *z0Ptr, bits64 *z1Ptr )
 105{
 106    bits64 z0, z1;
 107    int8_t negCount = ( - count ) & 63;
 108
 109    if ( count == 0 ) {
 110        z1 = a1;
 111        z0 = a0;
 112    }
 113    else if ( count < 64 ) {
 114        z1 = ( a0<<negCount ) | ( a1 != 0 );
 115        z0 = a0>>count;
 116    }
 117    else {
 118        if ( count == 64 ) {
 119            z1 = a0 | ( a1 != 0 );
 120        }
 121        else {
 122            z1 = ( ( a0 | a1 ) != 0 );
 123        }
 124        z0 = 0;
 125    }
 126    *z1Ptr = z1;
 127    *z0Ptr = z0;
 128
 129}
 130
 131/*----------------------------------------------------------------------------

 132| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the

 133| number of bits given in `count'.  Any bits shifted off are lost.  The value

 134| of `count' can be arbitrarily large; in particular, if `count' is greater

 135| than 128, the result will be 0.  The result is broken into two 64-bit pieces

 136| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.

 137*----------------------------------------------------------------------------*/
 138
 139INLINE void
 140 shift128Right(
 141     bits64 a0, bits64 a1, int16_t count, bits64 *z0Ptr, bits64 *z1Ptr )
 142{
 143    bits64 z0, z1;
 144    int8_t negCount = ( - count ) & 63;
 145
 146    if ( count == 0 ) {
 147        z1 = a1;
 148        z0 = a0;
 149    }
 150    else if ( count < 64 ) {
 151        z1 = ( a0<<negCount ) | ( a1>>count );
 152        z0 = a0>>count;
 153    }
 154    else {
 155        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
 156        z0 = 0;
 157    }
 158    *z1Ptr = z1;
 159    *z0Ptr = z0;
 160
 161}
 162
 163/*----------------------------------------------------------------------------

 164| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the

 165| number of bits given in `count'.  If any nonzero bits are shifted off, they

 166| are ``jammed'' into the least significant bit of the result by setting the

 167| least significant bit to 1.  The value of `count' can be arbitrarily large;

 168| in particular, if `count' is greater than 128, the result will be either

 169| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or

 170| nonzero.  The result is broken into two 64-bit pieces which are stored at

 171| the locations pointed to by `z0Ptr' and `z1Ptr'.

 172*----------------------------------------------------------------------------*/
 173
 174INLINE void
 175 shift128RightJamming(
 176     bits64 a0, bits64 a1, int16_t count, bits64 *z0Ptr, bits64 *z1Ptr )
 177{
 178    bits64 z0, z1;
 179    int8_t negCount = ( - count ) & 63;
 180
 181    if ( count == 0 ) {
 182        z1 = a1;
 183        z0 = a0;
 184    }
 185    else if ( count < 64 ) {
 186        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
 187        z0 = a0>>count;
 188    }
 189    else {
 190        if ( count == 64 ) {
 191            z1 = a0 | ( a1 != 0 );
 192        }
 193        else if ( count < 128 ) {
 194            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
 195        }
 196        else {
 197            z1 = ( ( a0 | a1 ) != 0 );
 198        }
 199        z0 = 0;
 200    }
 201    *z1Ptr = z1;
 202    *z0Ptr = z0;
 203
 204}
 205
 206/*----------------------------------------------------------------------------

 207| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right

 208| by 64 _plus_ the number of bits given in `count'.  The shifted result is

 209| at most 128 nonzero bits; these are broken into two 64-bit pieces which are

 210| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted

 211| off form a third 64-bit result as follows:  The _last_ bit shifted off is

 212| the most-significant bit of the extra result, and the other 63 bits of the

 213| extra result are all zero if and only if _all_but_the_last_ bits shifted off

 214| were all zero.  This extra result is stored in the location pointed to by

 215| `z2Ptr'.  The value of `count' can be arbitrarily large.

 216|     (This routine makes more sense if `a0', `a1', and `a2' are considered

 217| to form a fixed-point value with binary point between `a1' and `a2'.  This

 218| fixed-point value is shifted right by the number of bits given in `count',

 219| and the integer part of the result is returned at the locations pointed to

 220| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly

 221| corrupted as described above, and is returned at the location pointed to by

 222| `z2Ptr'.)

 223*----------------------------------------------------------------------------*/
 224
 225INLINE void
 226 shift128ExtraRightJamming(
 227     bits64 a0,
 228     bits64 a1,
 229     bits64 a2,
 230     int16_t count,
 231     bits64 *z0Ptr,
 232     bits64 *z1Ptr,
 233     bits64 *z2Ptr
 234 )
 235{
 236    bits64 z0, z1, z2;
 237    int8_t negCount = ( - count ) & 63;
 238
 239    if ( count == 0 ) {
 240        z2 = a2;
 241        z1 = a1;
 242        z0 = a0;
 243    }
 244    else {
 245        if ( count < 64 ) {
 246            z2 = a1<<negCount;
 247            z1 = ( a0<<negCount ) | ( a1>>count );
 248            z0 = a0>>count;
 249        }
 250        else {
 251            if ( count == 64 ) {
 252                z2 = a1;
 253                z1 = a0;
 254            }
 255            else {
 256                a2 |= a1;
 257                if ( count < 128 ) {
 258                    z2 = a0<<negCount;
 259                    z1 = a0>>( count & 63 );
 260                }
 261                else {
 262                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
 263                    z1 = 0;
 264                }
 265            }
 266            z0 = 0;
 267        }
 268        z2 |= ( a2 != 0 );
 269    }
 270    *z2Ptr = z2;
 271    *z1Ptr = z1;
 272    *z0Ptr = z0;
 273
 274}
 275
 276/*----------------------------------------------------------------------------

 277| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the

 278| number of bits given in `count'.  Any bits shifted off are lost.  The value

 279| of `count' must be less than 64.  The result is broken into two 64-bit

 280| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.

 281*----------------------------------------------------------------------------*/
 282
 283INLINE void
 284 shortShift128Left(
 285     bits64 a0, bits64 a1, int16_t count, bits64 *z0Ptr, bits64 *z1Ptr )
 286{
 287
 288    *z1Ptr = a1<<count;
 289    *z0Ptr =
 290        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
 291
 292}
 293
 294/*----------------------------------------------------------------------------

 295| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left

 296| by the number of bits given in `count'.  Any bits shifted off are lost.

 297| The value of `count' must be less than 64.  The result is broken into three

 298| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',

 299| `z1Ptr', and `z2Ptr'.

 300*----------------------------------------------------------------------------*/
 301
 302INLINE void
 303 shortShift192Left(
 304     bits64 a0,
 305     bits64 a1,
 306     bits64 a2,
 307     int16_t count,
 308     bits64 *z0Ptr,
 309     bits64 *z1Ptr,
 310     bits64 *z2Ptr
 311 )
 312{
 313    bits64 z0, z1, z2;
 314    int8_t negCount;
 315
 316    z2 = a2<<count;
 317    z1 = a1<<count;
 318    z0 = a0<<count;
 319    if ( 0 < count ) {
 320        negCount = ( ( - count ) & 63 );
 321        z1 |= a2>>negCount;
 322        z0 |= a1>>negCount;
 323    }
 324    *z2Ptr = z2;
 325    *z1Ptr = z1;
 326    *z0Ptr = z0;
 327
 328}
 329
 330/*----------------------------------------------------------------------------

 331| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit

 332| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so

 333| any carry out is lost.  The result is broken into two 64-bit pieces which

 334| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.

 335*----------------------------------------------------------------------------*/
 336
 337INLINE void
 338 add128(
 339     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
 340{
 341    bits64 z1;
 342
 343    z1 = a1 + b1;
 344    *z1Ptr = z1;
 345    *z0Ptr = a0 + b0 + ( z1 < a1 );
 346
 347}
 348
 349/*----------------------------------------------------------------------------

 350| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the

 351| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is

 352| modulo 2^192, so any carry out is lost.  The result is broken into three

 353| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',

 354| `z1Ptr', and `z2Ptr'.

 355*----------------------------------------------------------------------------*/
 356
 357INLINE void
 358 add192(
 359     bits64 a0,
 360     bits64 a1,
 361     bits64 a2,
 362     bits64 b0,
 363     bits64 b1,
 364     bits64 b2,
 365     bits64 *z0Ptr,
 366     bits64 *z1Ptr,
 367     bits64 *z2Ptr
 368 )
 369{
 370    bits64 z0, z1, z2;
 371    int8_t carry0, carry1;
 372
 373    z2 = a2 + b2;
 374    carry1 = ( z2 < a2 );
 375    z1 = a1 + b1;
 376    carry0 = ( z1 < a1 );
 377    z0 = a0 + b0;
 378    z1 += carry1;
 379    z0 += ( z1 < carry1 );
 380    z0 += carry0;
 381    *z2Ptr = z2;
 382    *z1Ptr = z1;
 383    *z0Ptr = z0;
 384
 385}
 386
 387/*----------------------------------------------------------------------------

 388| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the

 389| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo

 390| 2^128, so any borrow out (carry out) is lost.  The result is broken into two

 391| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and

 392| `z1Ptr'.

 393*----------------------------------------------------------------------------*/
 394
 395INLINE void
 396 sub128(
 397     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
 398{
 399
 400    *z1Ptr = a1 - b1;
 401    *z0Ptr = a0 - b0 - ( a1 < b1 );
 402
 403}
 404
 405/*----------------------------------------------------------------------------

 406| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'

 407| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.

 408| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The

 409| result is broken into three 64-bit pieces which are stored at the locations

 410| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.

 411*----------------------------------------------------------------------------*/
 412
 413INLINE void
 414 sub192(
 415     bits64 a0,
 416     bits64 a1,
 417     bits64 a2,
 418     bits64 b0,
 419     bits64 b1,
 420     bits64 b2,
 421     bits64 *z0Ptr,
 422     bits64 *z1Ptr,
 423     bits64 *z2Ptr
 424 )
 425{
 426    bits64 z0, z1, z2;
 427    int8_t borrow0, borrow1;
 428
 429    z2 = a2 - b2;
 430    borrow1 = ( a2 < b2 );
 431    z1 = a1 - b1;
 432    borrow0 = ( a1 < b1 );
 433    z0 = a0 - b0;
 434    z0 -= ( z1 < borrow1 );
 435    z1 -= borrow1;
 436    z0 -= borrow0;
 437    *z2Ptr = z2;
 438    *z1Ptr = z1;
 439    *z0Ptr = z0;
 440
 441}
 442
 443/*----------------------------------------------------------------------------

 444| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken

 445| into two 64-bit pieces which are stored at the locations pointed to by

 446| `z0Ptr' and `z1Ptr'.

 447*----------------------------------------------------------------------------*/
 448
 449INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
 450{
 451    bits32 aHigh, aLow, bHigh, bLow;
 452    bits64 z0, zMiddleA, zMiddleB, z1;
 453
 454    aLow = a;
 455    aHigh = a>>32;
 456    bLow = b;
 457    bHigh = b>>32;
 458    z1 = ( (bits64) aLow ) * bLow;
 459    zMiddleA = ( (bits64) aLow ) * bHigh;
 460    zMiddleB = ( (bits64) aHigh ) * bLow;
 461    z0 = ( (bits64) aHigh ) * bHigh;
 462    zMiddleA += zMiddleB;
 463    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
 464    zMiddleA <<= 32;
 465    z1 += zMiddleA;
 466    z0 += ( z1 < zMiddleA );
 467    *z1Ptr = z1;
 468    *z0Ptr = z0;
 469
 470}
 471
 472/*----------------------------------------------------------------------------

 473| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by

 474| `b' to obtain a 192-bit product.  The product is broken into three 64-bit

 475| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and

 476| `z2Ptr'.

 477*----------------------------------------------------------------------------*/
 478
 479INLINE void
 480 mul128By64To192(
 481     bits64 a0,
 482     bits64 a1,
 483     bits64 b,
 484     bits64 *z0Ptr,
 485     bits64 *z1Ptr,
 486     bits64 *z2Ptr
 487 )
 488{
 489    bits64 z0, z1, z2, more1;
 490
 491    mul64To128( a1, b, &z1, &z2 );
 492    mul64To128( a0, b, &z0, &more1 );
 493    add128( z0, more1, 0, z1, &z0, &z1 );
 494    *z2Ptr = z2;
 495    *z1Ptr = z1;
 496    *z0Ptr = z0;
 497
 498}
 499
 500/*----------------------------------------------------------------------------

 501| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the

 502| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit

 503| product.  The product is broken into four 64-bit pieces which are stored at

 504| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.

 505*----------------------------------------------------------------------------*/
 506
 507INLINE void
 508 mul128To256(
 509     bits64 a0,
 510     bits64 a1,
 511     bits64 b0,
 512     bits64 b1,
 513     bits64 *z0Ptr,
 514     bits64 *z1Ptr,
 515     bits64 *z2Ptr,
 516     bits64 *z3Ptr
 517 )
 518{
 519    bits64 z0, z1, z2, z3;
 520    bits64 more1, more2;
 521
 522    mul64To128( a1, b1, &z2, &z3 );
 523    mul64To128( a1, b0, &z1, &more2 );
 524    add128( z1, more2, 0, z2, &z1, &z2 );
 525    mul64To128( a0, b0, &z0, &more1 );
 526    add128( z0, more1, 0, z1, &z0, &z1 );
 527    mul64To128( a0, b1, &more1, &more2 );
 528    add128( more1, more2, 0, z2, &more1, &z2 );
 529    add128( z0, z1, 0, more1, &z0, &z1 );
 530    *z3Ptr = z3;
 531    *z2Ptr = z2;
 532    *z1Ptr = z1;
 533    *z0Ptr = z0;
 534
 535}
 536
 537/*----------------------------------------------------------------------------

 538| Returns an approximation to the 64-bit integer quotient obtained by dividing

 539| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The

 540| divisor `b' must be at least 2^63.  If q is the exact quotient truncated

 541| toward zero, the approximation returned lies between q and q + 2 inclusive.

 542| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit

 543| unsigned integer is returned.

 544*----------------------------------------------------------------------------*/
 545
 546static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
 547{
 548    bits64 b0, b1;
 549    bits64 rem0, rem1, term0, term1;
 550    bits64 z;
 551
 552    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
 553    b0 = b>>32;
 554    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
 555    mul64To128( b, z, &term0, &term1 );
 556    sub128( a0, a1, term0, term1, &rem0, &rem1 );
 557    while ( ( (sbits64) rem0 ) < 0 ) {
 558        z -= LIT64( 0x100000000 );
 559        b1 = b<<32;
 560        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
 561    }
 562    rem0 = ( rem0<<32 ) | ( rem1>>32 );
 563    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
 564    return z;
 565
 566}
 567
 568/*----------------------------------------------------------------------------

 569| Returns an approximation to the square root of the 32-bit significand given

 570| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of

 571| `aExp' (the least significant bit) is 1, the integer returned approximates

 572| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'

 573| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either

 574| case, the approximation returned lies strictly within +/-2 of the exact

 575| value.

 576*----------------------------------------------------------------------------*/
 577
 578static bits32 estimateSqrt32( int16_t aExp, bits32 a )
 579{
 580    static const bits16 sqrtOddAdjustments[] = {
 581        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
 582        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
 583    };
 584    static const bits16 sqrtEvenAdjustments[] = {
 585        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
 586        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
 587    };
 588    int8_t index;
 589    bits32 z;
 590
 591    index = ( a>>27 ) & 15;
 592    if ( aExp & 1 ) {
 593        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
 594        z = ( ( a / z )<<14 ) + ( z<<15 );
 595        a >>= 1;
 596    }
 597    else {
 598        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
 599        z = a / z + z;
 600        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
 601        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
 602    }
 603    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
 604
 605}
 606
 607/*----------------------------------------------------------------------------

 608| Returns the number of leading 0 bits before the most-significant 1 bit of

 609| `a'.  If `a' is zero, 32 is returned.

 610*----------------------------------------------------------------------------*/
 611
 612static int8_t countLeadingZeros32( bits32 a )
 613{
 614    static const int8_t countLeadingZerosHigh[] = {
 615        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
 616        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 617        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 618        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 619        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 620        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 621        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 622        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 623        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 624        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 625        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 626        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 627        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 628        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 629        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 630        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 631    };
 632    int8_t shiftCount;
 633
 634    shiftCount = 0;
 635    if ( a < 0x10000 ) {
 636        shiftCount += 16;
 637        a <<= 16;
 638    }
 639    if ( a < 0x1000000 ) {
 640        shiftCount += 8;
 641        a <<= 8;
 642    }
 643    shiftCount += countLeadingZerosHigh[ a>>24 ];
 644    return shiftCount;
 645
 646}
 647
 648/*----------------------------------------------------------------------------

 649| Returns the number of leading 0 bits before the most-significant 1 bit of

 650| `a'.  If `a' is zero, 64 is returned.

 651*----------------------------------------------------------------------------*/
 652
 653static int8_t countLeadingZeros64( bits64 a )
 654{
 655    int8_t shiftCount;
 656
 657    shiftCount = 0;
 658    if ( a < ( (bits64) 1 )<<32 ) {
 659        shiftCount += 32;
 660    }
 661    else {
 662        a >>= 32;
 663    }
 664    shiftCount += countLeadingZeros32( a );
 665    return shiftCount;
 666
 667}
 668
 669/*----------------------------------------------------------------------------

 670| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'

 671| is equal to the 128-bit value formed by concatenating `b0' and `b1'.

 672| Otherwise, returns 0.

 673*----------------------------------------------------------------------------*/
 674
 675INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
 676{
 677
 678    return ( a0 == b0 ) && ( a1 == b1 );
 679
 680}
 681
 682/*----------------------------------------------------------------------------

 683| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less

 684| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.

 685| Otherwise, returns 0.

 686*----------------------------------------------------------------------------*/
 687
 688INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
 689{
 690
 691    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
 692
 693}
 694
 695/*----------------------------------------------------------------------------

 696| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less

 697| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,

 698| returns 0.

 699*----------------------------------------------------------------------------*/
 700
 701INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
 702{
 703
 704    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
 705
 706}
 707
 708/*----------------------------------------------------------------------------

 709| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is

 710| not equal to the 128-bit value formed by concatenating `b0' and `b1'.

 711| Otherwise, returns 0.

 712*----------------------------------------------------------------------------*/
 713
 714INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
 715{
 716
 717    return ( a0 != b0 ) || ( a1 != b1 );
 718
 719}
 720
 721