cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

wm_sqrt.S (11040B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2	.file	"wm_sqrt.S"
      3/*---------------------------------------------------------------------------+
      4 |  wm_sqrt.S                                                                |
      5 |                                                                           |
      6 | Fixed point arithmetic square root evaluation.                            |
      7 |                                                                           |
      8 | Copyright (C) 1992,1993,1995,1997                                         |
      9 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
     10 |                       Australia.  E-mail billm@suburbia.net               |
     11 |                                                                           |
     12 | Call from C as:                                                           |
     13 |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
     14 |                                                                           |
     15 +---------------------------------------------------------------------------*/
     16
     17/*---------------------------------------------------------------------------+
     18 |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
     19 |    returns the square root of n in n.                                     |
     20 |                                                                           |
     21 |  Use Newton's method to compute the square root of a number, which must   |
     22 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
     23 |  Does not check the sign or tag of the argument.                          |
     24 |  Sets the exponent, but not the sign or tag of the result.                |
     25 |                                                                           |
     26 |  The guess is kept in %esi:%edi                                           |
     27 +---------------------------------------------------------------------------*/
     28
     29#include "exception.h"
     30#include "fpu_emu.h"
     31
     32
     33#ifndef NON_REENTRANT_FPU
     34/*	Local storage on the stack: */
     35#define FPU_accum_3	-4(%ebp)	/* ms word */
     36#define FPU_accum_2	-8(%ebp)
     37#define FPU_accum_1	-12(%ebp)
     38#define FPU_accum_0	-16(%ebp)
     39
     40/*
     41 * The de-normalised argument:
     42 *                  sq_2                  sq_1              sq_0
     43 *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
     44 *           ^ binary point here
     45 */
     46#define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */
     47#define FPU_fsqrt_arg_1	-24(%ebp)
     48#define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */
     49
     50#else
     51/*	Local storage in a static area: */
     52.data
     53	.align 4,0
     54FPU_accum_3:
     55	.long	0		/* ms word */
     56FPU_accum_2:
     57	.long	0
     58FPU_accum_1:
     59	.long	0
     60FPU_accum_0:
     61	.long	0
     62
     63/* The de-normalised argument:
     64                    sq_2                  sq_1              sq_0
     65          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
     66             ^ binary point here
     67 */
     68FPU_fsqrt_arg_2:
     69	.long	0		/* ms word */
     70FPU_fsqrt_arg_1:
     71	.long	0
     72FPU_fsqrt_arg_0:
     73	.long	0		/* ls word, at most the ms bit is set */
     74#endif /* NON_REENTRANT_FPU */ 
     75
     76
     77.text
     78SYM_FUNC_START(wm_sqrt)
     79	pushl	%ebp
     80	movl	%esp,%ebp
     81#ifndef NON_REENTRANT_FPU
     82	subl	$28,%esp
     83#endif /* NON_REENTRANT_FPU */
     84	pushl	%esi
     85	pushl	%edi
     86	pushl	%ebx
     87
     88	movl	PARAM1,%esi
     89
     90	movl	SIGH(%esi),%eax
     91	movl	SIGL(%esi),%ecx
     92	xorl	%edx,%edx
     93
     94/* We use a rough linear estimate for the first guess.. */
     95
     96	cmpw	EXP_BIAS,EXP(%esi)
     97	jnz	sqrt_arg_ge_2
     98
     99	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */
    100	rcrl	$1,%ecx
    101	rcrl	$1,%edx
    102
    103sqrt_arg_ge_2:
    104/* From here on, n is never accessed directly again until it is
    105   replaced by the answer. */
    106
    107	movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */
    108	movl	%ecx,FPU_fsqrt_arg_1
    109	movl	%edx,FPU_fsqrt_arg_0
    110
    111/* Make a linear first estimate */
    112	shrl	$1,%eax
    113	addl	$0x40000000,%eax
    114	movl	$0xaaaaaaaa,%ecx
    115	mull	%ecx
    116	shll	%edx			/* max result was 7fff... */
    117	testl	$0x80000000,%edx	/* but min was 3fff... */
    118	jnz	sqrt_prelim_no_adjust
    119
    120	movl	$0x80000000,%edx	/* round up */
    121
    122sqrt_prelim_no_adjust:
    123	movl	%edx,%esi	/* Our first guess */
    124
    125/* We have now computed (approx)   (2 + x) / 3, which forms the basis
    126   for a few iterations of Newton's method */
    127
    128	movl	FPU_fsqrt_arg_2,%ecx	/* ms word */
    129
    130/*
    131 * From our initial estimate, three iterations are enough to get us
    132 * to 30 bits or so. This will then allow two iterations at better
    133 * precision to complete the process.
    134 */
    135
    136/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
    137	shrl	%ecx		/* Doing this first will prevent a divide */
    138				/* overflow later. */
    139
    140	movl	%ecx,%edx	/* msw of the arg / 2 */
    141	divl	%esi		/* current estimate */
    142	shrl	%esi		/* divide by 2 */
    143	addl	%eax,%esi	/* the new estimate */
    144
    145	movl	%ecx,%edx
    146	divl	%esi
    147	shrl	%esi
    148	addl	%eax,%esi
    149
    150	movl	%ecx,%edx
    151	divl	%esi
    152	shrl	%esi
    153	addl	%eax,%esi
    154
    155/*
    156 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
    157 * we improve it to 60 bits or so.
    158 *
    159 * The strategy from now on is to compute new estimates from
    160 *      guess := guess + (n - guess^2) / (2 * guess)
    161 */
    162
    163/* First, find the square of the guess */
    164	movl	%esi,%eax
    165	mull	%esi
    166/* guess^2 now in %edx:%eax */
    167
    168	movl	FPU_fsqrt_arg_1,%ecx
    169	subl	%ecx,%eax
    170	movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */
    171	sbbl	%ecx,%edx
    172	jnc	sqrt_stage_2_positive
    173
    174/* Subtraction gives a negative result,
    175   negate the result before division. */
    176	notl	%edx
    177	notl	%eax
    178	addl	$1,%eax
    179	adcl	$0,%edx
    180
    181	divl	%esi
    182	movl	%eax,%ecx
    183
    184	movl	%edx,%eax
    185	divl	%esi
    186	jmp	sqrt_stage_2_finish
    187
    188sqrt_stage_2_positive:
    189	divl	%esi
    190	movl	%eax,%ecx
    191
    192	movl	%edx,%eax
    193	divl	%esi
    194
    195	notl	%ecx
    196	notl	%eax
    197	addl	$1,%eax
    198	adcl	$0,%ecx
    199
    200sqrt_stage_2_finish:
    201	sarl	$1,%ecx		/* divide by 2 */
    202	rcrl	$1,%eax
    203
    204	/* Form the new estimate in %esi:%edi */
    205	movl	%eax,%edi
    206	addl	%ecx,%esi
    207
    208	jnz	sqrt_stage_2_done	/* result should be [1..2) */
    209
    210#ifdef PARANOID
    211/* It should be possible to get here only if the arg is ffff....ffff */
    212	cmpl	$0xffffffff,FPU_fsqrt_arg_1
    213	jnz	sqrt_stage_2_error
    214#endif /* PARANOID */
    215
    216/* The best rounded result. */
    217	xorl	%eax,%eax
    218	decl	%eax
    219	movl	%eax,%edi
    220	movl	%eax,%esi
    221	movl	$0x7fffffff,%eax
    222	jmp	sqrt_round_result
    223
    224#ifdef PARANOID
    225sqrt_stage_2_error:
    226	pushl	EX_INTERNAL|0x213
    227	call	EXCEPTION
    228#endif /* PARANOID */ 
    229
    230sqrt_stage_2_done:
    231
    232/* Now the square root has been computed to better than 60 bits. */
    233
    234/* Find the square of the guess. */
    235	movl	%edi,%eax		/* ls word of guess */
    236	mull	%edi
    237	movl	%edx,FPU_accum_1
    238
    239	movl	%esi,%eax
    240	mull	%esi
    241	movl	%edx,FPU_accum_3
    242	movl	%eax,FPU_accum_2
    243
    244	movl	%edi,%eax
    245	mull	%esi
    246	addl	%eax,FPU_accum_1
    247	adcl	%edx,FPU_accum_2
    248	adcl	$0,FPU_accum_3
    249
    250/*	movl	%esi,%eax */
    251/*	mull	%edi */
    252	addl	%eax,FPU_accum_1
    253	adcl	%edx,FPU_accum_2
    254	adcl	$0,FPU_accum_3
    255
    256/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
    257
    258	movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */
    259	subl	%eax,FPU_accum_1
    260	movl	FPU_fsqrt_arg_1,%eax
    261	sbbl	%eax,FPU_accum_2
    262	movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */
    263	sbbl	%eax,FPU_accum_3
    264	jnc	sqrt_stage_3_positive
    265
    266/* Subtraction gives a negative result,
    267   negate the result before division */
    268	notl	FPU_accum_1
    269	notl	FPU_accum_2
    270	notl	FPU_accum_3
    271	addl	$1,FPU_accum_1
    272	adcl	$0,FPU_accum_2
    273
    274#ifdef PARANOID
    275	adcl	$0,FPU_accum_3	/* This must be zero */
    276	jz	sqrt_stage_3_no_error
    277
    278sqrt_stage_3_error:
    279	pushl	EX_INTERNAL|0x207
    280	call	EXCEPTION
    281
    282sqrt_stage_3_no_error:
    283#endif /* PARANOID */
    284
    285	movl	FPU_accum_2,%edx
    286	movl	FPU_accum_1,%eax
    287	divl	%esi
    288	movl	%eax,%ecx
    289
    290	movl	%edx,%eax
    291	divl	%esi
    292
    293	sarl	$1,%ecx		/* divide by 2 */
    294	rcrl	$1,%eax
    295
    296	/* prepare to round the result */
    297
    298	addl	%ecx,%edi
    299	adcl	$0,%esi
    300
    301	jmp	sqrt_stage_3_finished
    302
    303sqrt_stage_3_positive:
    304	movl	FPU_accum_2,%edx
    305	movl	FPU_accum_1,%eax
    306	divl	%esi
    307	movl	%eax,%ecx
    308
    309	movl	%edx,%eax
    310	divl	%esi
    311
    312	sarl	$1,%ecx		/* divide by 2 */
    313	rcrl	$1,%eax
    314
    315	/* prepare to round the result */
    316
    317	notl	%eax		/* Negate the correction term */
    318	notl	%ecx
    319	addl	$1,%eax
    320	adcl	$0,%ecx		/* carry here ==> correction == 0 */
    321	adcl	$0xffffffff,%esi
    322
    323	addl	%ecx,%edi
    324	adcl	$0,%esi
    325
    326sqrt_stage_3_finished:
    327
    328/*
    329 * The result in %esi:%edi:%esi should be good to about 90 bits here,
    330 * and the rounding information here does not have sufficient accuracy
    331 * in a few rare cases.
    332 */
    333	cmpl	$0xffffffe0,%eax
    334	ja	sqrt_near_exact_x
    335
    336	cmpl	$0x00000020,%eax
    337	jb	sqrt_near_exact
    338
    339	cmpl	$0x7fffffe0,%eax
    340	jb	sqrt_round_result
    341
    342	cmpl	$0x80000020,%eax
    343	jb	sqrt_get_more_precision
    344
    345sqrt_round_result:
    346/* Set up for rounding operations */
    347	movl	%eax,%edx
    348	movl	%esi,%eax
    349	movl	%edi,%ebx
    350	movl	PARAM1,%edi
    351	movw	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */
    352	jmp	fpu_reg_round
    353
    354
    355sqrt_near_exact_x:
    356/* First, the estimate must be rounded up. */
    357	addl	$1,%edi
    358	adcl	$0,%esi
    359
    360sqrt_near_exact:
    361/*
    362 * This is an easy case because x^1/2 is monotonic.
    363 * We need just find the square of our estimate, compare it
    364 * with the argument, and deduce whether our estimate is
    365 * above, below, or exact. We use the fact that the estimate
    366 * is known to be accurate to about 90 bits.
    367 */
    368	movl	%edi,%eax		/* ls word of guess */
    369	mull	%edi
    370	movl	%edx,%ebx		/* 2nd ls word of square */
    371	movl	%eax,%ecx		/* ls word of square */
    372
    373	movl	%edi,%eax
    374	mull	%esi
    375	addl	%eax,%ebx
    376	addl	%eax,%ebx
    377
    378#ifdef PARANOID
    379	cmp	$0xffffffb0,%ebx
    380	jb	sqrt_near_exact_ok
    381
    382	cmp	$0x00000050,%ebx
    383	ja	sqrt_near_exact_ok
    384
    385	pushl	EX_INTERNAL|0x214
    386	call	EXCEPTION
    387
    388sqrt_near_exact_ok:
    389#endif /* PARANOID */ 
    390
    391	or	%ebx,%ebx
    392	js	sqrt_near_exact_small
    393
    394	jnz	sqrt_near_exact_large
    395
    396	or	%ebx,%edx
    397	jnz	sqrt_near_exact_large
    398
    399/* Our estimate is exactly the right answer */
    400	xorl	%eax,%eax
    401	jmp	sqrt_round_result
    402
    403sqrt_near_exact_small:
    404/* Our estimate is too small */
    405	movl	$0x000000ff,%eax
    406	jmp	sqrt_round_result
    407	
    408sqrt_near_exact_large:
    409/* Our estimate is too large, we need to decrement it */
    410	subl	$1,%edi
    411	sbbl	$0,%esi
    412	movl	$0xffffff00,%eax
    413	jmp	sqrt_round_result
    414
    415
    416sqrt_get_more_precision:
    417/* This case is almost the same as the above, except we start
    418   with an extra bit of precision in the estimate. */
    419	stc			/* The extra bit. */
    420	rcll	$1,%edi		/* Shift the estimate left one bit */
    421	rcll	$1,%esi
    422
    423	movl	%edi,%eax		/* ls word of guess */
    424	mull	%edi
    425	movl	%edx,%ebx		/* 2nd ls word of square */
    426	movl	%eax,%ecx		/* ls word of square */
    427
    428	movl	%edi,%eax
    429	mull	%esi
    430	addl	%eax,%ebx
    431	addl	%eax,%ebx
    432
    433/* Put our estimate back to its original value */
    434	stc			/* The ms bit. */
    435	rcrl	$1,%esi		/* Shift the estimate left one bit */
    436	rcrl	$1,%edi
    437
    438#ifdef PARANOID
    439	cmp	$0xffffff60,%ebx
    440	jb	sqrt_more_prec_ok
    441
    442	cmp	$0x000000a0,%ebx
    443	ja	sqrt_more_prec_ok
    444
    445	pushl	EX_INTERNAL|0x215
    446	call	EXCEPTION
    447
    448sqrt_more_prec_ok:
    449#endif /* PARANOID */ 
    450
    451	or	%ebx,%ebx
    452	js	sqrt_more_prec_small
    453
    454	jnz	sqrt_more_prec_large
    455
    456	or	%ebx,%ecx
    457	jnz	sqrt_more_prec_large
    458
    459/* Our estimate is exactly the right answer */
    460	movl	$0x80000000,%eax
    461	jmp	sqrt_round_result
    462
    463sqrt_more_prec_small:
    464/* Our estimate is too small */
    465	movl	$0x800000ff,%eax
    466	jmp	sqrt_round_result
    467	
    468sqrt_more_prec_large:
    469/* Our estimate is too large */
    470	movl	$0x7fffff00,%eax
    471	jmp	sqrt_round_result
    472SYM_FUNC_END(wm_sqrt)