cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

polynom_Xsig.S (4056B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/*---------------------------------------------------------------------------+
      3 |  polynomial_Xsig.S                                                        |
      4 |                                                                           |
      5 | Fixed point arithmetic polynomial evaluation.                             |
      6 |                                                                           |
      7 | Copyright (C) 1992,1993,1994,1995                                         |
      8 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
      9 |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
     10 |                                                                           |
     11 | Call from C as:                                                           |
     12 |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 |
     13 |                        unsigned long long terms[], int n)                 |
     14 |                                                                           |
     15 | Computes:                                                                 |
     16 | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  |
     17 | and adds the result to the 12 byte Xsig.                                  |
     18 | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
     19 | precision.                                                                |
     20 |                                                                           |
     21 | This function must be used carefully: most overflow of intermediate       |
     22 | results is controlled, but overflow of the result is not.                 |
     23 |                                                                           |
     24 +---------------------------------------------------------------------------*/
     25	.file	"polynomial_Xsig.S"
     26
     27#include "fpu_emu.h"
     28
     29
     30#define	TERM_SIZE	$8
     31#define	SUM_MS		-20(%ebp)	/* sum ms long */
     32#define SUM_MIDDLE	-24(%ebp)	/* sum middle long */
     33#define	SUM_LS		-28(%ebp)	/* sum ls long */
     34#define	ACCUM_MS	-4(%ebp)	/* accum ms long */
     35#define	ACCUM_MIDDLE	-8(%ebp)	/* accum middle long */
     36#define	ACCUM_LS	-12(%ebp)	/* accum ls long */
     37#define OVERFLOWED      -16(%ebp)	/* addition overflow flag */
     38
     39.text
     40SYM_FUNC_START(polynomial_Xsig)
     41	pushl	%ebp
     42	movl	%esp,%ebp
     43	subl	$32,%esp
     44	pushl	%esi
     45	pushl	%edi
     46	pushl	%ebx
     47
     48	movl	PARAM2,%esi		/* x */
     49	movl	PARAM3,%edi		/* terms */
     50
     51	movl	TERM_SIZE,%eax
     52	mull	PARAM4			/* n */
     53	addl	%eax,%edi
     54
     55	movl	4(%edi),%edx		/* terms[n] */
     56	movl	%edx,SUM_MS
     57	movl	(%edi),%edx		/* terms[n] */
     58	movl	%edx,SUM_MIDDLE
     59	xor	%eax,%eax
     60	movl	%eax,SUM_LS
     61	movb	%al,OVERFLOWED
     62
     63	subl	TERM_SIZE,%edi
     64	decl	PARAM4
     65	js	L_accum_done
     66
     67L_accum_loop:
     68	xor	%eax,%eax
     69	movl	%eax,ACCUM_MS
     70	movl	%eax,ACCUM_MIDDLE
     71
     72	movl	SUM_MIDDLE,%eax
     73	mull	(%esi)			/* x ls long */
     74	movl	%edx,ACCUM_LS
     75
     76	movl	SUM_MIDDLE,%eax
     77	mull	4(%esi)			/* x ms long */
     78	addl	%eax,ACCUM_LS
     79	adcl	%edx,ACCUM_MIDDLE
     80	adcl	$0,ACCUM_MS
     81
     82	movl	SUM_MS,%eax
     83	mull	(%esi)			/* x ls long */
     84	addl	%eax,ACCUM_LS
     85	adcl	%edx,ACCUM_MIDDLE
     86	adcl	$0,ACCUM_MS
     87
     88	movl	SUM_MS,%eax
     89	mull	4(%esi)			/* x ms long */
     90	addl	%eax,ACCUM_MIDDLE
     91	adcl	%edx,ACCUM_MS
     92
     93	testb	$0xff,OVERFLOWED
     94	jz	L_no_overflow
     95
     96	movl	(%esi),%eax
     97	addl	%eax,ACCUM_MIDDLE
     98	movl	4(%esi),%eax
     99	adcl	%eax,ACCUM_MS		/* This could overflow too */
    100
    101L_no_overflow:
    102
    103/*
    104 * Now put the sum of next term and the accumulator
    105 * into the sum register
    106 */
    107	movl	ACCUM_LS,%eax
    108	addl	(%edi),%eax		/* term ls long */
    109	movl	%eax,SUM_LS
    110	movl	ACCUM_MIDDLE,%eax
    111	adcl	(%edi),%eax		/* term ls long */
    112	movl	%eax,SUM_MIDDLE
    113	movl	ACCUM_MS,%eax
    114	adcl	4(%edi),%eax		/* term ms long */
    115	movl	%eax,SUM_MS
    116	sbbb	%al,%al
    117	movb	%al,OVERFLOWED		/* Used in the next iteration */
    118
    119	subl	TERM_SIZE,%edi
    120	decl	PARAM4
    121	jns	L_accum_loop
    122
    123L_accum_done:
    124	movl	PARAM1,%edi		/* accum */
    125	movl	SUM_LS,%eax
    126	addl	%eax,(%edi)
    127	movl	SUM_MIDDLE,%eax
    128	adcl	%eax,4(%edi)
    129	movl	SUM_MS,%eax
    130	adcl	%eax,8(%edi)
    131
    132	popl	%ebx
    133	popl	%edi
    134	popl	%esi
    135	leave
    136	RET
    137SYM_FUNC_END(polynomial_Xsig)