1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /*---------------------------------------------------------------------------+ |
3 | | polynomial_Xsig.S | |
4 | | | |
5 | | Fixed point arithmetic polynomial evaluation. | |
6 | | | |
7 | | Copyright (C) 1992,1993,1994,1995 | |
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
9 | | Australia. E-mail billm@jacobi.maths.monash.edu.au | |
10 | | | |
11 | | Call from C as: | |
12 | | void polynomial_Xsig(Xsig *accum, unsigned long long x, | |
13 | | unsigned long long terms[], int n) | |
14 | | | |
15 | | Computes: | |
16 | | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | |
17 | | and adds the result to the 12 byte Xsig. | |
18 | | The terms[] are each 8 bytes, but all computation is performed to 12 byte | |
19 | | precision. | |
20 | | | |
21 | | This function must be used carefully: most overflow of intermediate | |
22 | | results is controlled, but overflow of the result is not. | |
23 | | | |
24 | +---------------------------------------------------------------------------*/ |
25 | .file "polynomial_Xsig.S" |
26 | |
27 | #include "fpu_emu.h" |
28 | |
29 | |
30 | #define TERM_SIZE $8 |
31 | #define SUM_MS -20(%ebp) /* sum ms long */ |
32 | #define SUM_MIDDLE -24(%ebp) /* sum middle long */ |
33 | #define SUM_LS -28(%ebp) /* sum ls long */ |
34 | #define ACCUM_MS -4(%ebp) /* accum ms long */ |
35 | #define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ |
36 | #define ACCUM_LS -12(%ebp) /* accum ls long */ |
37 | #define OVERFLOWED -16(%ebp) /* addition overflow flag */ |
38 | |
39 | .text |
40 | SYM_FUNC_START(polynomial_Xsig) |
41 | pushl %ebp |
42 | movl %esp,%ebp |
43 | subl $32,%esp |
44 | pushl %esi |
45 | pushl %edi |
46 | pushl %ebx |
47 | |
48 | movl PARAM2,%esi /* x */ |
49 | movl PARAM3,%edi /* terms */ |
50 | |
51 | movl TERM_SIZE,%eax |
52 | mull PARAM4 /* n */ |
53 | addl %eax,%edi |
54 | |
55 | movl 4(%edi),%edx /* terms[n] */ |
56 | movl %edx,SUM_MS |
57 | movl (%edi),%edx /* terms[n] */ |
58 | movl %edx,SUM_MIDDLE |
59 | xor %eax,%eax |
60 | movl %eax,SUM_LS |
61 | movb %al,OVERFLOWED |
62 | |
63 | subl TERM_SIZE,%edi |
64 | decl PARAM4 |
65 | js L_accum_done |
66 | |
67 | L_accum_loop: |
68 | xor %eax,%eax |
69 | movl %eax,ACCUM_MS |
70 | movl %eax,ACCUM_MIDDLE |
71 | |
72 | movl SUM_MIDDLE,%eax |
73 | mull (%esi) /* x ls long */ |
74 | movl %edx,ACCUM_LS |
75 | |
76 | movl SUM_MIDDLE,%eax |
77 | mull 4(%esi) /* x ms long */ |
78 | addl %eax,ACCUM_LS |
79 | adcl %edx,ACCUM_MIDDLE |
80 | adcl $0,ACCUM_MS |
81 | |
82 | movl SUM_MS,%eax |
83 | mull (%esi) /* x ls long */ |
84 | addl %eax,ACCUM_LS |
85 | adcl %edx,ACCUM_MIDDLE |
86 | adcl $0,ACCUM_MS |
87 | |
88 | movl SUM_MS,%eax |
89 | mull 4(%esi) /* x ms long */ |
90 | addl %eax,ACCUM_MIDDLE |
91 | adcl %edx,ACCUM_MS |
92 | |
93 | testb $0xff,OVERFLOWED |
94 | jz L_no_overflow |
95 | |
96 | movl (%esi),%eax |
97 | addl %eax,ACCUM_MIDDLE |
98 | movl 4(%esi),%eax |
99 | adcl %eax,ACCUM_MS /* This could overflow too */ |
100 | |
101 | L_no_overflow: |
102 | |
103 | /* |
104 | * Now put the sum of next term and the accumulator |
105 | * into the sum register |
106 | */ |
107 | movl ACCUM_LS,%eax |
108 | addl (%edi),%eax /* term ls long */ |
109 | movl %eax,SUM_LS |
110 | movl ACCUM_MIDDLE,%eax |
111 | adcl (%edi),%eax /* term ls long */ |
112 | movl %eax,SUM_MIDDLE |
113 | movl ACCUM_MS,%eax |
114 | adcl 4(%edi),%eax /* term ms long */ |
115 | movl %eax,SUM_MS |
116 | sbbb %al,%al |
117 | movb %al,OVERFLOWED /* Used in the next iteration */ |
118 | |
119 | subl TERM_SIZE,%edi |
120 | decl PARAM4 |
121 | jns L_accum_loop |
122 | |
123 | L_accum_done: |
124 | movl PARAM1,%edi /* accum */ |
125 | movl SUM_LS,%eax |
126 | addl %eax,(%edi) |
127 | movl SUM_MIDDLE,%eax |
128 | adcl %eax,4(%edi) |
129 | movl SUM_MS,%eax |
130 | adcl %eax,8(%edi) |
131 | |
132 | popl %ebx |
133 | popl %edi |
134 | popl %esi |
135 | leave |
136 | RET |
137 | SYM_FUNC_END(polynomial_Xsig) |
138 | |