polynom_Xsig.S (4056B)
1/* SPDX-License-Identifier: GPL-2.0 */ 2/*---------------------------------------------------------------------------+ 3 | polynomial_Xsig.S | 4 | | 5 | Fixed point arithmetic polynomial evaluation. | 6 | | 7 | Copyright (C) 1992,1993,1994,1995 | 8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 9 | Australia. E-mail billm@jacobi.maths.monash.edu.au | 10 | | 11 | Call from C as: | 12 | void polynomial_Xsig(Xsig *accum, unsigned long long x, | 13 | unsigned long long terms[], int n) | 14 | | 15 | Computes: | 16 | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | 17 | and adds the result to the 12 byte Xsig. | 18 | The terms[] are each 8 bytes, but all computation is performed to 12 byte | 19 | precision. | 20 | | 21 | This function must be used carefully: most overflow of intermediate | 22 | results is controlled, but overflow of the result is not. | 23 | | 24 +---------------------------------------------------------------------------*/ 25 .file "polynomial_Xsig.S" 26 27#include "fpu_emu.h" 28 29 30#define TERM_SIZE $8 31#define SUM_MS -20(%ebp) /* sum ms long */ 32#define SUM_MIDDLE -24(%ebp) /* sum middle long */ 33#define SUM_LS -28(%ebp) /* sum ls long */ 34#define ACCUM_MS -4(%ebp) /* accum ms long */ 35#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ 36#define ACCUM_LS -12(%ebp) /* accum ls long */ 37#define OVERFLOWED -16(%ebp) /* addition overflow flag */ 38 39.text 40SYM_FUNC_START(polynomial_Xsig) 41 pushl %ebp 42 movl %esp,%ebp 43 subl $32,%esp 44 pushl %esi 45 pushl %edi 46 pushl %ebx 47 48 movl PARAM2,%esi /* x */ 49 movl PARAM3,%edi /* terms */ 50 51 movl TERM_SIZE,%eax 52 mull PARAM4 /* n */ 53 addl %eax,%edi 54 55 movl 4(%edi),%edx /* terms[n] */ 56 movl %edx,SUM_MS 57 movl (%edi),%edx /* terms[n] */ 58 movl %edx,SUM_MIDDLE 59 xor %eax,%eax 60 movl %eax,SUM_LS 61 movb %al,OVERFLOWED 62 63 subl TERM_SIZE,%edi 64 decl PARAM4 65 js L_accum_done 66 67L_accum_loop: 68 xor %eax,%eax 69 movl %eax,ACCUM_MS 70 movl %eax,ACCUM_MIDDLE 71 72 movl SUM_MIDDLE,%eax 73 mull (%esi) /* x ls long */ 74 movl %edx,ACCUM_LS 75 76 movl SUM_MIDDLE,%eax 77 mull 4(%esi) /* x ms long */ 78 addl %eax,ACCUM_LS 79 adcl %edx,ACCUM_MIDDLE 80 adcl $0,ACCUM_MS 81 82 movl SUM_MS,%eax 83 mull (%esi) /* x ls long */ 84 addl %eax,ACCUM_LS 85 adcl %edx,ACCUM_MIDDLE 86 adcl $0,ACCUM_MS 87 88 movl SUM_MS,%eax 89 mull 4(%esi) /* x ms long */ 90 addl %eax,ACCUM_MIDDLE 91 adcl %edx,ACCUM_MS 92 93 testb $0xff,OVERFLOWED 94 jz L_no_overflow 95 96 movl (%esi),%eax 97 addl %eax,ACCUM_MIDDLE 98 movl 4(%esi),%eax 99 adcl %eax,ACCUM_MS /* This could overflow too */ 100 101L_no_overflow: 102 103/* 104 * Now put the sum of next term and the accumulator 105 * into the sum register 106 */ 107 movl ACCUM_LS,%eax 108 addl (%edi),%eax /* term ls long */ 109 movl %eax,SUM_LS 110 movl ACCUM_MIDDLE,%eax 111 adcl (%edi),%eax /* term ls long */ 112 movl %eax,SUM_MIDDLE 113 movl ACCUM_MS,%eax 114 adcl 4(%edi),%eax /* term ms long */ 115 movl %eax,SUM_MS 116 sbbb %al,%al 117 movb %al,OVERFLOWED /* Used in the next iteration */ 118 119 subl TERM_SIZE,%edi 120 decl PARAM4 121 jns L_accum_loop 122 123L_accum_done: 124 movl PARAM1,%edi /* accum */ 125 movl SUM_LS,%eax 126 addl %eax,(%edi) 127 movl SUM_MIDDLE,%eax 128 adcl %eax,4(%edi) 129 movl SUM_MS,%eax 130 adcl %eax,8(%edi) 131 132 popl %ebx 133 popl %edi 134 popl %esi 135 leave 136 RET 137SYM_FUNC_END(polynomial_Xsig)