1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /*---------------------------------------------------------------------------+ |
3 | | poly_sin.c | |
4 | | | |
5 | | Computation of an approximation of the sin function and the cosine | |
6 | | function by a polynomial. | |
7 | | | |
8 | | Copyright (C) 1992,1993,1994,1997,1999 | |
9 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | |
10 | | E-mail billm@melbpc.org.au | |
11 | | | |
12 | | | |
13 | +---------------------------------------------------------------------------*/ |
14 | |
15 | #include "exception.h" |
16 | #include "reg_constant.h" |
17 | #include "fpu_emu.h" |
18 | #include "fpu_system.h" |
19 | #include "control_w.h" |
20 | #include "poly.h" |
21 | |
22 | #define N_COEFF_P 4 |
23 | #define N_COEFF_N 4 |
24 | |
25 | static const unsigned long long pos_terms_l[N_COEFF_P] = { |
26 | 0xaaaaaaaaaaaaaaabLL, |
27 | 0x00d00d00d00cf906LL, |
28 | 0x000006b99159a8bbLL, |
29 | 0x000000000d7392e6LL |
30 | }; |
31 | |
32 | static const unsigned long long neg_terms_l[N_COEFF_N] = { |
33 | 0x2222222222222167LL, |
34 | 0x0002e3bc74aab624LL, |
35 | 0x0000000b09229062LL, |
36 | 0x00000000000c7973LL |
37 | }; |
38 | |
39 | #define N_COEFF_PH 4 |
40 | #define N_COEFF_NH 4 |
41 | static const unsigned long long pos_terms_h[N_COEFF_PH] = { |
42 | 0x0000000000000000LL, |
43 | 0x05b05b05b05b0406LL, |
44 | 0x000049f93edd91a9LL, |
45 | 0x00000000c9c9ed62LL |
46 | }; |
47 | |
48 | static const unsigned long long neg_terms_h[N_COEFF_NH] = { |
49 | 0xaaaaaaaaaaaaaa98LL, |
50 | 0x001a01a01a019064LL, |
51 | 0x0000008f76c68a77LL, |
52 | 0x0000000000d58f5eLL |
53 | }; |
54 | |
55 | /*--- poly_sine() -----------------------------------------------------------+ |
56 | | | |
57 | +---------------------------------------------------------------------------*/ |
58 | void poly_sine(FPU_REG *st0_ptr) |
59 | { |
60 | int exponent, echange; |
61 | Xsig accumulator, argSqrd, argTo4; |
62 | unsigned long fix_up, adj; |
63 | unsigned long long fixed_arg; |
64 | FPU_REG result; |
65 | |
66 | exponent = exponent(st0_ptr); |
67 | |
68 | accumulator.lsw = accumulator.midw = accumulator.msw = 0; |
69 | |
70 | /* Split into two ranges, for arguments below and above 1.0 */ |
71 | /* The boundary between upper and lower is approx 0.88309101259 */ |
72 | if ((exponent < -1) |
73 | || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) { |
74 | /* The argument is <= 0.88309101259 */ |
75 | |
76 | argSqrd.msw = st0_ptr->sigh; |
77 | argSqrd.midw = st0_ptr->sigl; |
78 | argSqrd.lsw = 0; |
79 | mul64_Xsig(&argSqrd, mult: &significand(st0_ptr)); |
80 | shr_Xsig(&argSqrd, n: 2 * (-1 - exponent)); |
81 | argTo4.msw = argSqrd.msw; |
82 | argTo4.midw = argSqrd.midw; |
83 | argTo4.lsw = argSqrd.lsw; |
84 | mul_Xsig_Xsig(dest: &argTo4, mult: &argTo4); |
85 | |
86 | polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: neg_terms_l, |
87 | N_COEFF_N - 1); |
88 | mul_Xsig_Xsig(dest: &accumulator, mult: &argSqrd); |
89 | negate_Xsig(x: &accumulator); |
90 | |
91 | polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: pos_terms_l, |
92 | N_COEFF_P - 1); |
93 | |
94 | shr_Xsig(&accumulator, n: 2); /* Divide by four */ |
95 | accumulator.msw |= 0x80000000; /* Add 1.0 */ |
96 | |
97 | mul64_Xsig(&accumulator, mult: &significand(st0_ptr)); |
98 | mul64_Xsig(&accumulator, mult: &significand(st0_ptr)); |
99 | mul64_Xsig(&accumulator, mult: &significand(st0_ptr)); |
100 | |
101 | /* Divide by four, FPU_REG compatible, etc */ |
102 | exponent = 3 * exponent; |
103 | |
104 | /* The minimum exponent difference is 3 */ |
105 | shr_Xsig(&accumulator, exponent(st0_ptr) - exponent); |
106 | |
107 | negate_Xsig(x: &accumulator); |
108 | XSIG_LL(accumulator) += significand(st0_ptr); |
109 | |
110 | echange = round_Xsig(&accumulator); |
111 | |
112 | setexponentpos(&result, exponent(st0_ptr) + echange); |
113 | } else { |
114 | /* The argument is > 0.88309101259 */ |
115 | /* We use sin(st(0)) = cos(pi/2-st(0)) */ |
116 | |
117 | fixed_arg = significand(st0_ptr); |
118 | |
119 | if (exponent == 0) { |
120 | /* The argument is >= 1.0 */ |
121 | |
122 | /* Put the binary point at the left. */ |
123 | fixed_arg <<= 1; |
124 | } |
125 | /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ |
126 | fixed_arg = 0x921fb54442d18469LL - fixed_arg; |
127 | /* There is a special case which arises due to rounding, to fix here. */ |
128 | if (fixed_arg == 0xffffffffffffffffLL) |
129 | fixed_arg = 0; |
130 | |
131 | XSIG_LL(argSqrd) = fixed_arg; |
132 | argSqrd.lsw = 0; |
133 | mul64_Xsig(&argSqrd, mult: &fixed_arg); |
134 | |
135 | XSIG_LL(argTo4) = XSIG_LL(argSqrd); |
136 | argTo4.lsw = argSqrd.lsw; |
137 | mul_Xsig_Xsig(dest: &argTo4, mult: &argTo4); |
138 | |
139 | polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: neg_terms_h, |
140 | N_COEFF_NH - 1); |
141 | mul_Xsig_Xsig(dest: &accumulator, mult: &argSqrd); |
142 | negate_Xsig(x: &accumulator); |
143 | |
144 | polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: pos_terms_h, |
145 | N_COEFF_PH - 1); |
146 | negate_Xsig(x: &accumulator); |
147 | |
148 | mul64_Xsig(&accumulator, mult: &fixed_arg); |
149 | mul64_Xsig(&accumulator, mult: &fixed_arg); |
150 | |
151 | shr_Xsig(&accumulator, n: 3); |
152 | negate_Xsig(x: &accumulator); |
153 | |
154 | add_Xsig_Xsig(dest: &accumulator, x2: &argSqrd); |
155 | |
156 | shr_Xsig(&accumulator, n: 1); |
157 | |
158 | accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ |
159 | negate_Xsig(x: &accumulator); |
160 | |
161 | /* The basic computation is complete. Now fix the answer to |
162 | compensate for the error due to the approximation used for |
163 | pi/2 |
164 | */ |
165 | |
166 | /* This has an exponent of -65 */ |
167 | fix_up = 0x898cc517; |
168 | /* The fix-up needs to be improved for larger args */ |
169 | if (argSqrd.msw & 0xffc00000) { |
170 | /* Get about 32 bit precision in these: */ |
171 | fix_up -= mul_32_32(arg1: 0x898cc517, arg2: argSqrd.msw) / 6; |
172 | } |
173 | fix_up = mul_32_32(arg1: fix_up, LL_MSW(fixed_arg)); |
174 | |
175 | adj = accumulator.lsw; /* temp save */ |
176 | accumulator.lsw -= fix_up; |
177 | if (accumulator.lsw > adj) |
178 | XSIG_LL(accumulator)--; |
179 | |
180 | echange = round_Xsig(&accumulator); |
181 | |
182 | setexponentpos(&result, echange - 1); |
183 | } |
184 | |
185 | significand(&result) = XSIG_LL(accumulator); |
186 | setsign(&result, getsign(st0_ptr)); |
187 | FPU_copy_to_reg0(r: &result, TAG_Valid); |
188 | |
189 | #ifdef PARANOID |
190 | if ((exponent(&result) >= 0) |
191 | && (significand(&result) > 0x8000000000000000LL)) { |
192 | EXCEPTION(EX_INTERNAL | 0x150); |
193 | } |
194 | #endif /* PARANOID */ |
195 | |
196 | } |
197 | |
198 | /*--- poly_cos() ------------------------------------------------------------+ |
199 | | | |
200 | +---------------------------------------------------------------------------*/ |
201 | void poly_cos(FPU_REG *st0_ptr) |
202 | { |
203 | FPU_REG result; |
204 | long int exponent, exp2, echange; |
205 | Xsig accumulator, argSqrd, fix_up, argTo4; |
206 | unsigned long long fixed_arg; |
207 | |
208 | #ifdef PARANOID |
209 | if ((exponent(st0_ptr) > 0) |
210 | || ((exponent(st0_ptr) == 0) |
211 | && (significand(st0_ptr) > 0xc90fdaa22168c234LL))) { |
212 | EXCEPTION(EX_Invalid); |
213 | FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); |
214 | return; |
215 | } |
216 | #endif /* PARANOID */ |
217 | |
218 | exponent = exponent(st0_ptr); |
219 | |
220 | accumulator.lsw = accumulator.midw = accumulator.msw = 0; |
221 | |
222 | if ((exponent < -1) |
223 | || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) { |
224 | /* arg is < 0.687705 */ |
225 | |
226 | argSqrd.msw = st0_ptr->sigh; |
227 | argSqrd.midw = st0_ptr->sigl; |
228 | argSqrd.lsw = 0; |
229 | mul64_Xsig(&argSqrd, mult: &significand(st0_ptr)); |
230 | |
231 | if (exponent < -1) { |
232 | /* shift the argument right by the required places */ |
233 | shr_Xsig(&argSqrd, n: 2 * (-1 - exponent)); |
234 | } |
235 | |
236 | argTo4.msw = argSqrd.msw; |
237 | argTo4.midw = argSqrd.midw; |
238 | argTo4.lsw = argSqrd.lsw; |
239 | mul_Xsig_Xsig(dest: &argTo4, mult: &argTo4); |
240 | |
241 | polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: neg_terms_h, |
242 | N_COEFF_NH - 1); |
243 | mul_Xsig_Xsig(dest: &accumulator, mult: &argSqrd); |
244 | negate_Xsig(x: &accumulator); |
245 | |
246 | polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: pos_terms_h, |
247 | N_COEFF_PH - 1); |
248 | negate_Xsig(x: &accumulator); |
249 | |
250 | mul64_Xsig(&accumulator, mult: &significand(st0_ptr)); |
251 | mul64_Xsig(&accumulator, mult: &significand(st0_ptr)); |
252 | shr_Xsig(&accumulator, n: -2 * (1 + exponent)); |
253 | |
254 | shr_Xsig(&accumulator, n: 3); |
255 | negate_Xsig(x: &accumulator); |
256 | |
257 | add_Xsig_Xsig(dest: &accumulator, x2: &argSqrd); |
258 | |
259 | shr_Xsig(&accumulator, n: 1); |
260 | |
261 | /* It doesn't matter if accumulator is all zero here, the |
262 | following code will work ok */ |
263 | negate_Xsig(x: &accumulator); |
264 | |
265 | if (accumulator.lsw & 0x80000000) |
266 | XSIG_LL(accumulator)++; |
267 | if (accumulator.msw == 0) { |
268 | /* The result is 1.0 */ |
269 | FPU_copy_to_reg0(r: &CONST_1, TAG_Valid); |
270 | return; |
271 | } else { |
272 | significand(&result) = XSIG_LL(accumulator); |
273 | |
274 | /* will be a valid positive nr with expon = -1 */ |
275 | setexponentpos(&result, -1); |
276 | } |
277 | } else { |
278 | fixed_arg = significand(st0_ptr); |
279 | |
280 | if (exponent == 0) { |
281 | /* The argument is >= 1.0 */ |
282 | |
283 | /* Put the binary point at the left. */ |
284 | fixed_arg <<= 1; |
285 | } |
286 | /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ |
287 | fixed_arg = 0x921fb54442d18469LL - fixed_arg; |
288 | /* There is a special case which arises due to rounding, to fix here. */ |
289 | if (fixed_arg == 0xffffffffffffffffLL) |
290 | fixed_arg = 0; |
291 | |
292 | exponent = -1; |
293 | exp2 = -1; |
294 | |
295 | /* A shift is needed here only for a narrow range of arguments, |
296 | i.e. for fixed_arg approx 2^-32, but we pick up more... */ |
297 | if (!(LL_MSW(fixed_arg) & 0xffff0000)) { |
298 | fixed_arg <<= 16; |
299 | exponent -= 16; |
300 | exp2 -= 16; |
301 | } |
302 | |
303 | XSIG_LL(argSqrd) = fixed_arg; |
304 | argSqrd.lsw = 0; |
305 | mul64_Xsig(&argSqrd, mult: &fixed_arg); |
306 | |
307 | if (exponent < -1) { |
308 | /* shift the argument right by the required places */ |
309 | shr_Xsig(&argSqrd, n: 2 * (-1 - exponent)); |
310 | } |
311 | |
312 | argTo4.msw = argSqrd.msw; |
313 | argTo4.midw = argSqrd.midw; |
314 | argTo4.lsw = argSqrd.lsw; |
315 | mul_Xsig_Xsig(dest: &argTo4, mult: &argTo4); |
316 | |
317 | polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: neg_terms_l, |
318 | N_COEFF_N - 1); |
319 | mul_Xsig_Xsig(dest: &accumulator, mult: &argSqrd); |
320 | negate_Xsig(x: &accumulator); |
321 | |
322 | polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: pos_terms_l, |
323 | N_COEFF_P - 1); |
324 | |
325 | shr_Xsig(&accumulator, n: 2); /* Divide by four */ |
326 | accumulator.msw |= 0x80000000; /* Add 1.0 */ |
327 | |
328 | mul64_Xsig(&accumulator, mult: &fixed_arg); |
329 | mul64_Xsig(&accumulator, mult: &fixed_arg); |
330 | mul64_Xsig(&accumulator, mult: &fixed_arg); |
331 | |
332 | /* Divide by four, FPU_REG compatible, etc */ |
333 | exponent = 3 * exponent; |
334 | |
335 | /* The minimum exponent difference is 3 */ |
336 | shr_Xsig(&accumulator, n: exp2 - exponent); |
337 | |
338 | negate_Xsig(x: &accumulator); |
339 | XSIG_LL(accumulator) += fixed_arg; |
340 | |
341 | /* The basic computation is complete. Now fix the answer to |
342 | compensate for the error due to the approximation used for |
343 | pi/2 |
344 | */ |
345 | |
346 | /* This has an exponent of -65 */ |
347 | XSIG_LL(fix_up) = 0x898cc51701b839a2ll; |
348 | fix_up.lsw = 0; |
349 | |
350 | /* The fix-up needs to be improved for larger args */ |
351 | if (argSqrd.msw & 0xffc00000) { |
352 | /* Get about 32 bit precision in these: */ |
353 | fix_up.msw -= mul_32_32(arg1: 0x898cc517, arg2: argSqrd.msw) / 2; |
354 | fix_up.msw += mul_32_32(arg1: 0x898cc517, arg2: argTo4.msw) / 24; |
355 | } |
356 | |
357 | exp2 += norm_Xsig(&accumulator); |
358 | shr_Xsig(&accumulator, n: 1); /* Prevent overflow */ |
359 | exp2++; |
360 | shr_Xsig(&fix_up, n: 65 + exp2); |
361 | |
362 | add_Xsig_Xsig(dest: &accumulator, x2: &fix_up); |
363 | |
364 | echange = round_Xsig(&accumulator); |
365 | |
366 | setexponentpos(&result, exp2 + echange); |
367 | significand(&result) = XSIG_LL(accumulator); |
368 | } |
369 | |
370 | FPU_copy_to_reg0(r: &result, TAG_Valid); |
371 | |
372 | #ifdef PARANOID |
373 | if ((exponent(&result) >= 0) |
374 | && (significand(&result) > 0x8000000000000000LL)) { |
375 | EXCEPTION(EX_INTERNAL | 0x151); |
376 | } |
377 | #endif /* PARANOID */ |
378 | |
379 | } |
380 | |