wm_sqrt.S (11040B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2 .file "wm_sqrt.S" 3/*---------------------------------------------------------------------------+ 4 | wm_sqrt.S | 5 | | 6 | Fixed point arithmetic square root evaluation. | 7 | | 8 | Copyright (C) 1992,1993,1995,1997 | 9 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 10 | Australia. E-mail billm@suburbia.net | 11 | | 12 | Call from C as: | 13 | int wm_sqrt(FPU_REG *n, unsigned int control_word) | 14 | | 15 +---------------------------------------------------------------------------*/ 16 17/*---------------------------------------------------------------------------+ 18 | wm_sqrt(FPU_REG *n, unsigned int control_word) | 19 | returns the square root of n in n. | 20 | | 21 | Use Newton's method to compute the square root of a number, which must | 22 | be in the range [1.0 .. 4.0), to 64 bits accuracy. | 23 | Does not check the sign or tag of the argument. | 24 | Sets the exponent, but not the sign or tag of the result. | 25 | | 26 | The guess is kept in %esi:%edi | 27 +---------------------------------------------------------------------------*/ 28 29#include "exception.h" 30#include "fpu_emu.h" 31 32 33#ifndef NON_REENTRANT_FPU 34/* Local storage on the stack: */ 35#define FPU_accum_3 -4(%ebp) /* ms word */ 36#define FPU_accum_2 -8(%ebp) 37#define FPU_accum_1 -12(%ebp) 38#define FPU_accum_0 -16(%ebp) 39 40/* 41 * The de-normalised argument: 42 * sq_2 sq_1 sq_0 43 * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 44 * ^ binary point here 45 */ 46#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ 47#define FPU_fsqrt_arg_1 -24(%ebp) 48#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ 49 50#else 51/* Local storage in a static area: */ 52.data 53 .align 4,0 54FPU_accum_3: 55 .long 0 /* ms word */ 56FPU_accum_2: 57 .long 0 58FPU_accum_1: 59 .long 0 60FPU_accum_0: 61 .long 0 62 63/* The de-normalised argument: 64 sq_2 sq_1 sq_0 65 b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 66 ^ binary point here 67 */ 68FPU_fsqrt_arg_2: 69 .long 0 /* ms word */ 70FPU_fsqrt_arg_1: 71 .long 0 72FPU_fsqrt_arg_0: 73 .long 0 /* ls word, at most the ms bit is set */ 74#endif /* NON_REENTRANT_FPU */ 75 76 77.text 78SYM_FUNC_START(wm_sqrt) 79 pushl %ebp 80 movl %esp,%ebp 81#ifndef NON_REENTRANT_FPU 82 subl $28,%esp 83#endif /* NON_REENTRANT_FPU */ 84 pushl %esi 85 pushl %edi 86 pushl %ebx 87 88 movl PARAM1,%esi 89 90 movl SIGH(%esi),%eax 91 movl SIGL(%esi),%ecx 92 xorl %edx,%edx 93 94/* We use a rough linear estimate for the first guess.. */ 95 96 cmpw EXP_BIAS,EXP(%esi) 97 jnz sqrt_arg_ge_2 98 99 shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ 100 rcrl $1,%ecx 101 rcrl $1,%edx 102 103sqrt_arg_ge_2: 104/* From here on, n is never accessed directly again until it is 105 replaced by the answer. */ 106 107 movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ 108 movl %ecx,FPU_fsqrt_arg_1 109 movl %edx,FPU_fsqrt_arg_0 110 111/* Make a linear first estimate */ 112 shrl $1,%eax 113 addl $0x40000000,%eax 114 movl $0xaaaaaaaa,%ecx 115 mull %ecx 116 shll %edx /* max result was 7fff... */ 117 testl $0x80000000,%edx /* but min was 3fff... */ 118 jnz sqrt_prelim_no_adjust 119 120 movl $0x80000000,%edx /* round up */ 121 122sqrt_prelim_no_adjust: 123 movl %edx,%esi /* Our first guess */ 124 125/* We have now computed (approx) (2 + x) / 3, which forms the basis 126 for a few iterations of Newton's method */ 127 128 movl FPU_fsqrt_arg_2,%ecx /* ms word */ 129 130/* 131 * From our initial estimate, three iterations are enough to get us 132 * to 30 bits or so. This will then allow two iterations at better 133 * precision to complete the process. 134 */ 135 136/* Compute (g + n/g)/2 at each iteration (g is the guess). */ 137 shrl %ecx /* Doing this first will prevent a divide */ 138 /* overflow later. */ 139 140 movl %ecx,%edx /* msw of the arg / 2 */ 141 divl %esi /* current estimate */ 142 shrl %esi /* divide by 2 */ 143 addl %eax,%esi /* the new estimate */ 144 145 movl %ecx,%edx 146 divl %esi 147 shrl %esi 148 addl %eax,%esi 149 150 movl %ecx,%edx 151 divl %esi 152 shrl %esi 153 addl %eax,%esi 154 155/* 156 * Now that an estimate accurate to about 30 bits has been obtained (in %esi), 157 * we improve it to 60 bits or so. 158 * 159 * The strategy from now on is to compute new estimates from 160 * guess := guess + (n - guess^2) / (2 * guess) 161 */ 162 163/* First, find the square of the guess */ 164 movl %esi,%eax 165 mull %esi 166/* guess^2 now in %edx:%eax */ 167 168 movl FPU_fsqrt_arg_1,%ecx 169 subl %ecx,%eax 170 movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ 171 sbbl %ecx,%edx 172 jnc sqrt_stage_2_positive 173 174/* Subtraction gives a negative result, 175 negate the result before division. */ 176 notl %edx 177 notl %eax 178 addl $1,%eax 179 adcl $0,%edx 180 181 divl %esi 182 movl %eax,%ecx 183 184 movl %edx,%eax 185 divl %esi 186 jmp sqrt_stage_2_finish 187 188sqrt_stage_2_positive: 189 divl %esi 190 movl %eax,%ecx 191 192 movl %edx,%eax 193 divl %esi 194 195 notl %ecx 196 notl %eax 197 addl $1,%eax 198 adcl $0,%ecx 199 200sqrt_stage_2_finish: 201 sarl $1,%ecx /* divide by 2 */ 202 rcrl $1,%eax 203 204 /* Form the new estimate in %esi:%edi */ 205 movl %eax,%edi 206 addl %ecx,%esi 207 208 jnz sqrt_stage_2_done /* result should be [1..2) */ 209 210#ifdef PARANOID 211/* It should be possible to get here only if the arg is ffff....ffff */ 212 cmpl $0xffffffff,FPU_fsqrt_arg_1 213 jnz sqrt_stage_2_error 214#endif /* PARANOID */ 215 216/* The best rounded result. */ 217 xorl %eax,%eax 218 decl %eax 219 movl %eax,%edi 220 movl %eax,%esi 221 movl $0x7fffffff,%eax 222 jmp sqrt_round_result 223 224#ifdef PARANOID 225sqrt_stage_2_error: 226 pushl EX_INTERNAL|0x213 227 call EXCEPTION 228#endif /* PARANOID */ 229 230sqrt_stage_2_done: 231 232/* Now the square root has been computed to better than 60 bits. */ 233 234/* Find the square of the guess. */ 235 movl %edi,%eax /* ls word of guess */ 236 mull %edi 237 movl %edx,FPU_accum_1 238 239 movl %esi,%eax 240 mull %esi 241 movl %edx,FPU_accum_3 242 movl %eax,FPU_accum_2 243 244 movl %edi,%eax 245 mull %esi 246 addl %eax,FPU_accum_1 247 adcl %edx,FPU_accum_2 248 adcl $0,FPU_accum_3 249 250/* movl %esi,%eax */ 251/* mull %edi */ 252 addl %eax,FPU_accum_1 253 adcl %edx,FPU_accum_2 254 adcl $0,FPU_accum_3 255 256/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ 257 258 movl FPU_fsqrt_arg_0,%eax /* get normalized n */ 259 subl %eax,FPU_accum_1 260 movl FPU_fsqrt_arg_1,%eax 261 sbbl %eax,FPU_accum_2 262 movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ 263 sbbl %eax,FPU_accum_3 264 jnc sqrt_stage_3_positive 265 266/* Subtraction gives a negative result, 267 negate the result before division */ 268 notl FPU_accum_1 269 notl FPU_accum_2 270 notl FPU_accum_3 271 addl $1,FPU_accum_1 272 adcl $0,FPU_accum_2 273 274#ifdef PARANOID 275 adcl $0,FPU_accum_3 /* This must be zero */ 276 jz sqrt_stage_3_no_error 277 278sqrt_stage_3_error: 279 pushl EX_INTERNAL|0x207 280 call EXCEPTION 281 282sqrt_stage_3_no_error: 283#endif /* PARANOID */ 284 285 movl FPU_accum_2,%edx 286 movl FPU_accum_1,%eax 287 divl %esi 288 movl %eax,%ecx 289 290 movl %edx,%eax 291 divl %esi 292 293 sarl $1,%ecx /* divide by 2 */ 294 rcrl $1,%eax 295 296 /* prepare to round the result */ 297 298 addl %ecx,%edi 299 adcl $0,%esi 300 301 jmp sqrt_stage_3_finished 302 303sqrt_stage_3_positive: 304 movl FPU_accum_2,%edx 305 movl FPU_accum_1,%eax 306 divl %esi 307 movl %eax,%ecx 308 309 movl %edx,%eax 310 divl %esi 311 312 sarl $1,%ecx /* divide by 2 */ 313 rcrl $1,%eax 314 315 /* prepare to round the result */ 316 317 notl %eax /* Negate the correction term */ 318 notl %ecx 319 addl $1,%eax 320 adcl $0,%ecx /* carry here ==> correction == 0 */ 321 adcl $0xffffffff,%esi 322 323 addl %ecx,%edi 324 adcl $0,%esi 325 326sqrt_stage_3_finished: 327 328/* 329 * The result in %esi:%edi:%esi should be good to about 90 bits here, 330 * and the rounding information here does not have sufficient accuracy 331 * in a few rare cases. 332 */ 333 cmpl $0xffffffe0,%eax 334 ja sqrt_near_exact_x 335 336 cmpl $0x00000020,%eax 337 jb sqrt_near_exact 338 339 cmpl $0x7fffffe0,%eax 340 jb sqrt_round_result 341 342 cmpl $0x80000020,%eax 343 jb sqrt_get_more_precision 344 345sqrt_round_result: 346/* Set up for rounding operations */ 347 movl %eax,%edx 348 movl %esi,%eax 349 movl %edi,%ebx 350 movl PARAM1,%edi 351 movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ 352 jmp fpu_reg_round 353 354 355sqrt_near_exact_x: 356/* First, the estimate must be rounded up. */ 357 addl $1,%edi 358 adcl $0,%esi 359 360sqrt_near_exact: 361/* 362 * This is an easy case because x^1/2 is monotonic. 363 * We need just find the square of our estimate, compare it 364 * with the argument, and deduce whether our estimate is 365 * above, below, or exact. We use the fact that the estimate 366 * is known to be accurate to about 90 bits. 367 */ 368 movl %edi,%eax /* ls word of guess */ 369 mull %edi 370 movl %edx,%ebx /* 2nd ls word of square */ 371 movl %eax,%ecx /* ls word of square */ 372 373 movl %edi,%eax 374 mull %esi 375 addl %eax,%ebx 376 addl %eax,%ebx 377 378#ifdef PARANOID 379 cmp $0xffffffb0,%ebx 380 jb sqrt_near_exact_ok 381 382 cmp $0x00000050,%ebx 383 ja sqrt_near_exact_ok 384 385 pushl EX_INTERNAL|0x214 386 call EXCEPTION 387 388sqrt_near_exact_ok: 389#endif /* PARANOID */ 390 391 or %ebx,%ebx 392 js sqrt_near_exact_small 393 394 jnz sqrt_near_exact_large 395 396 or %ebx,%edx 397 jnz sqrt_near_exact_large 398 399/* Our estimate is exactly the right answer */ 400 xorl %eax,%eax 401 jmp sqrt_round_result 402 403sqrt_near_exact_small: 404/* Our estimate is too small */ 405 movl $0x000000ff,%eax 406 jmp sqrt_round_result 407 408sqrt_near_exact_large: 409/* Our estimate is too large, we need to decrement it */ 410 subl $1,%edi 411 sbbl $0,%esi 412 movl $0xffffff00,%eax 413 jmp sqrt_round_result 414 415 416sqrt_get_more_precision: 417/* This case is almost the same as the above, except we start 418 with an extra bit of precision in the estimate. */ 419 stc /* The extra bit. */ 420 rcll $1,%edi /* Shift the estimate left one bit */ 421 rcll $1,%esi 422 423 movl %edi,%eax /* ls word of guess */ 424 mull %edi 425 movl %edx,%ebx /* 2nd ls word of square */ 426 movl %eax,%ecx /* ls word of square */ 427 428 movl %edi,%eax 429 mull %esi 430 addl %eax,%ebx 431 addl %eax,%ebx 432 433/* Put our estimate back to its original value */ 434 stc /* The ms bit. */ 435 rcrl $1,%esi /* Shift the estimate left one bit */ 436 rcrl $1,%edi 437 438#ifdef PARANOID 439 cmp $0xffffff60,%ebx 440 jb sqrt_more_prec_ok 441 442 cmp $0x000000a0,%ebx 443 ja sqrt_more_prec_ok 444 445 pushl EX_INTERNAL|0x215 446 call EXCEPTION 447 448sqrt_more_prec_ok: 449#endif /* PARANOID */ 450 451 or %ebx,%ebx 452 js sqrt_more_prec_small 453 454 jnz sqrt_more_prec_large 455 456 or %ebx,%ecx 457 jnz sqrt_more_prec_large 458 459/* Our estimate is exactly the right answer */ 460 movl $0x80000000,%eax 461 jmp sqrt_round_result 462 463sqrt_more_prec_small: 464/* Our estimate is too small */ 465 movl $0x800000ff,%eax 466 jmp sqrt_round_result 467 468sqrt_more_prec_large: 469/* Our estimate is too large */ 470 movl $0x7fffff00,%eax 471 jmp sqrt_round_result 472SYM_FUNC_END(wm_sqrt)