cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sacos.S (2827B)


      1|
      2|	sacos.sa 3.3 12/19/90
      3|
      4|	Description: The entry point sAcos computes the inverse cosine of
      5|		an input argument; sAcosd does the same except for denormalized
      6|		input.
      7|
      8|	Input: Double-extended number X in location pointed to
      9|		by address register a0.
     10|
     11|	Output: The value arccos(X) returned in floating-point register Fp0.
     12|
     13|	Accuracy and Monotonicity: The returned result is within 3 ulps in
     14|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
     15|		result is subsequently rounded to double precision. The
     16|		result is provably monotonic in double precision.
     17|
     18|	Speed: The program sCOS takes approximately 310 cycles.
     19|
     20|	Algorithm:
     21|
     22|	ACOS
     23|	1. If |X| >= 1, go to 3.
     24|
     25|	2. (|X| < 1) Calculate acos(X) by
     26|		z := (1-X) / (1+X)
     27|		acos(X) = 2 * atan( sqrt(z) ).
     28|		Exit.
     29|
     30|	3. If |X| > 1, go to 5.
     31|
     32|	4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.
     33|
     34|	5. (|X| > 1) Generate an invalid operation by 0 * infinity.
     35|		Exit.
     36|
     37
     38|		Copyright (C) Motorola, Inc. 1990
     39|			All Rights Reserved
     40|
     41|       For details on the license for this file, please see the
     42|       file, README, in this same directory.
     43
     44|SACOS	idnt	2,1 | Motorola 040 Floating Point Software Package
     45
     46	|section	8
     47
     48PI:	.long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
     49PIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
     50
     51	|xref	t_operr
     52	|xref	t_frcinx
     53	|xref	satan
     54
     55	.global	sacosd
     56sacosd:
     57|--ACOS(X) = PI/2 FOR DENORMALIZED X
     58	fmovel		%d1,%fpcr		| ...load user's rounding mode/precision
     59	fmovex		PIBY2,%fp0
     60	bra		t_frcinx
     61
     62	.global	sacos
     63sacos:
     64	fmovex		(%a0),%fp0	| ...LOAD INPUT
     65
     66	movel		(%a0),%d0		| ...pack exponent with upper 16 fraction
     67	movew		4(%a0),%d0
     68	andil		#0x7FFFFFFF,%d0
     69	cmpil		#0x3FFF8000,%d0
     70	bges		ACOSBIG
     71
     72|--THIS IS THE USUAL CASE, |X| < 1
     73|--ACOS(X) = 2 * ATAN(	SQRT( (1-X)/(1+X) )	)
     74
     75	fmoves		#0x3F800000,%fp1
     76	faddx		%fp0,%fp1		| ...1+X
     77	fnegx		%fp0		| ... -X
     78	fadds		#0x3F800000,%fp0	| ...1-X
     79	fdivx		%fp1,%fp0		| ...(1-X)/(1+X)
     80	fsqrtx		%fp0		| ...SQRT((1-X)/(1+X))
     81	fmovemx	%fp0-%fp0,(%a0)	| ...overwrite input
     82	movel		%d1,-(%sp)	|save original users fpcr
     83	clrl		%d1
     84	bsr		satan		| ...ATAN(SQRT([1-X]/[1+X]))
     85	fmovel		(%sp)+,%fpcr	|restore users exceptions
     86	faddx		%fp0,%fp0		| ...2 * ATAN( STUFF )
     87	bra		t_frcinx
     88
     89ACOSBIG:
     90	fabsx		%fp0
     91	fcmps		#0x3F800000,%fp0
     92	fbgt		t_operr		|cause an operr exception
     93
     94|--|X| = 1, ACOS(X) = 0 OR PI
     95	movel		(%a0),%d0		| ...pack exponent with upper 16 fraction
     96	movew		4(%a0),%d0
     97	cmpl		#0,%d0		|D0 has original exponent+fraction
     98	bgts		ACOSP1
     99
    100|--X = -1
    101|Returns PI and inexact exception
    102	fmovex		PI,%fp0
    103	fmovel		%d1,%FPCR
    104	fadds		#0x00800000,%fp0	|cause an inexact exception to be put
    105|					;into the 040 - will not trap until next
    106|					;fp inst.
    107	bra		t_frcinx
    108
    109ACOSP1:
    110	fmovel		%d1,%FPCR
    111	fmoves		#0x00000000,%fp0
    112	rts				|Facos ; of +1 is exact
    113
    114	|end