poly_sin.c source code [linux/arch/x86/math-emu/poly_sin.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/---------------------------------------------------------------------------+*
3	\| poly_sin.c \|
4	\| \|
5	\| Computation of an approximation of the sin function and the cosine \|
6	\| function by a polynomial. \|
7	\| \|
8	\| Copyright (C) 1992,1993,1994,1997,1999 \|
9	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia \|
10	\| E-mail billm@melbpc.org.au \|
11	\| \|
12	\| \|
13	+---------------------------------------------------------------------------/*
14
15	#include "exception.h"
16	#include "reg_constant.h"
17	#include "fpu_emu.h"
18	#include "fpu_system.h"
19	#include "control_w.h"
20	#include "poly.h"
21
22	#define N_COEFF_P 4
23	#define N_COEFF_N 4
24
25	static const unsigned long long pos_terms_l[N_COEFF_P] = {
26	`0xaaaaaaaaaaaaaaabLL`,
27	`0x00d00d00d00cf906LL`,
28	`0x000006b99159a8bbLL`,
29	`0x000000000d7392e6LL`
30	};
31
32	static const unsigned long long neg_terms_l[N_COEFF_N] = {
33	`0x2222222222222167LL`,
34	`0x0002e3bc74aab624LL`,
35	`0x0000000b09229062LL`,
36	`0x00000000000c7973LL`
37	};
38
39	#define N_COEFF_PH 4
40	#define N_COEFF_NH 4
41	static const unsigned long long pos_terms_h[N_COEFF_PH] = {
42	`0x0000000000000000LL`,
43	`0x05b05b05b05b0406LL`,
44	`0x000049f93edd91a9LL`,
45	`0x00000000c9c9ed62LL`
46	};
47
48	static const unsigned long long neg_terms_h[N_COEFF_NH] = {
49	`0xaaaaaaaaaaaaaa98LL`,
50	`0x001a01a01a019064LL`,
51	`0x0000008f76c68a77LL`,
52	`0x0000000000d58f5eLL`
53	};
54
55	/--- poly_sine() -----------------------------------------------------------+*
56	\| \|
57	+---------------------------------------------------------------------------/*
58	void poly_sine(FPU_REG *st0_ptr)
59	{
60	int exponent, echange;
61	Xsig accumulator, argSqrd, argTo4;
62	unsigned long fix_up, adj;
63	unsigned long long fixed_arg;
64	FPU_REG result;
65
66	exponent = exponent(st0_ptr);
67
68	accumulator.lsw = accumulator.midw = accumulator.msw = `0`;
69
70	/ Split into two ranges, for arguments below and above 1.0 /
71	/ The boundary between upper and lower is approx 0.88309101259 /
72	if ((exponent < -`1`)
73	\|\| ((exponent == -`1`) && (st0_ptr->sigh <= `0xe21240aa`))) {
74	/ The argument is <= 0.88309101259 /
75
76	argSqrd.msw = st0_ptr->sigh;
77	argSqrd.midw = st0_ptr->sigl;
78	argSqrd.lsw = `0`;
79	mul64_Xsig(&argSqrd, mult: &significand(st0_ptr));
80	shr_Xsig(&argSqrd, n: `2` * (-`1` - exponent));
81	argTo4.msw = argSqrd.msw;
82	argTo4.midw = argSqrd.midw;
83	argTo4.lsw = argSqrd.lsw;
84	mul_Xsig_Xsig(dest: &argTo4, mult: &argTo4);
85
86	polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: neg_terms_l,
87	N_COEFF_N - `1`);
88	mul_Xsig_Xsig(dest: &accumulator, mult: &argSqrd);
89	negate_Xsig(x: &accumulator);
90
91	polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: pos_terms_l,
92	N_COEFF_P - `1`);
93
94	shr_Xsig(&accumulator, n: `2`); / Divide by four /
95	accumulator.msw \|= `0x80000000`; / Add 1.0 /
96
97	mul64_Xsig(&accumulator, mult: &significand(st0_ptr));
98	mul64_Xsig(&accumulator, mult: &significand(st0_ptr));
99	mul64_Xsig(&accumulator, mult: &significand(st0_ptr));
100
101	/ Divide by four, FPU_REG compatible, etc /
102	exponent = `3` * exponent;
103
104	/ The minimum exponent difference is 3 /
105	shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
106
107	negate_Xsig(x: &accumulator);
108	XSIG_LL(accumulator) += significand(st0_ptr);
109
110	echange = round_Xsig(&accumulator);
111
112	setexponentpos(&result, exponent(st0_ptr) + echange);
113	} else {
114	/ The argument is > 0.88309101259 /
115	/ We use sin(st(0)) = cos(pi/2-st(0)) /
116
117	fixed_arg = significand(st0_ptr);
118
119	if (exponent == `0`) {
120	/ The argument is >= 1.0 /
121
122	/ Put the binary point at the left. /
123	fixed_arg <<= `1`;
124	}
125	/ pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 /
126	fixed_arg = `0x921fb54442d18469LL` - fixed_arg;
127	/ There is a special case which arises due to rounding, to fix here. /
128	if (fixed_arg == `0xffffffffffffffffLL`)
129	fixed_arg = `0`;
130
131	XSIG_LL(argSqrd) = fixed_arg;
132	argSqrd.lsw = `0`;
133	mul64_Xsig(&argSqrd, mult: &fixed_arg);
134
135	XSIG_LL(argTo4) = XSIG_LL(argSqrd);
136	argTo4.lsw = argSqrd.lsw;
137	mul_Xsig_Xsig(dest: &argTo4, mult: &argTo4);
138
139	polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: neg_terms_h,
140	N_COEFF_NH - `1`);
141	mul_Xsig_Xsig(dest: &accumulator, mult: &argSqrd);
142	negate_Xsig(x: &accumulator);
143
144	polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: pos_terms_h,
145	N_COEFF_PH - `1`);
146	negate_Xsig(x: &accumulator);
147
148	mul64_Xsig(&accumulator, mult: &fixed_arg);
149	mul64_Xsig(&accumulator, mult: &fixed_arg);
150
151	shr_Xsig(&accumulator, n: `3`);
152	negate_Xsig(x: &accumulator);
153
154	add_Xsig_Xsig(dest: &accumulator, x2: &argSqrd);
155
156	shr_Xsig(&accumulator, n: `1`);
157
158	accumulator.lsw \|= `1`; / A zero accumulator here would cause problems /
159	negate_Xsig(x: &accumulator);
160
161	/ The basic computation is complete. Now fix the answer to*
162	compensate for the error due to the approximation used for
163	pi/2
164	*/
165
166	/ This has an exponent of -65 /
167	fix_up = `0x898cc517`;
168	/ The fix-up needs to be improved for larger args /
169	if (argSqrd.msw & `0xffc00000`) {
170	/ Get about 32 bit precision in these: /
171	fix_up -= mul_32_32(arg1: `0x898cc517`, arg2: argSqrd.msw) / `6`;
172	}
173	fix_up = mul_32_32(arg1: fix_up, LL_MSW(fixed_arg));
174
175	adj = accumulator.lsw; / temp save /
176	accumulator.lsw -= fix_up;
177	if (accumulator.lsw > adj)
178	XSIG_LL(accumulator)--;
179
180	echange = round_Xsig(&accumulator);
181
182	setexponentpos(&result, echange - `1`);
183	}
184
185	significand(&result) = XSIG_LL(accumulator);
186	setsign(&result, getsign(st0_ptr));
187	FPU_copy_to_reg0(r: &result, TAG_Valid);
188
189	#ifdef PARANOID
190	if ((exponent(&result) >= `0`)
191	&& (significand(&result) > `0x8000000000000000LL`)) {
192	EXCEPTION(EX_INTERNAL \| `0x150`);
193	}
194	#endif /* PARANOID */
195
196	}
197
198	/--- poly_cos() ------------------------------------------------------------+*
199	\| \|
200	+---------------------------------------------------------------------------/*
201	void poly_cos(FPU_REG *st0_ptr)
202	{
203	FPU_REG result;
204	long int exponent, exp2, echange;
205	Xsig accumulator, argSqrd, fix_up, argTo4;
206	unsigned long long fixed_arg;
207
208	#ifdef PARANOID
209	if ((exponent(st0_ptr) > `0`)
210	\|\| ((exponent(st0_ptr) == `0`)
211	&& (significand(st0_ptr) > `0xc90fdaa22168c234LL`))) {
212	EXCEPTION(EX_Invalid);
213	FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
214	return;
215	}
216	#endif /* PARANOID */
217
218	exponent = exponent(st0_ptr);
219
220	accumulator.lsw = accumulator.midw = accumulator.msw = `0`;
221
222	if ((exponent < -`1`)
223	\|\| ((exponent == -`1`) && (st0_ptr->sigh <= `0xb00d6f54`))) {
224	/ arg is < 0.687705 /
225
226	argSqrd.msw = st0_ptr->sigh;
227	argSqrd.midw = st0_ptr->sigl;
228	argSqrd.lsw = `0`;
229	mul64_Xsig(&argSqrd, mult: &significand(st0_ptr));
230
231	if (exponent < -`1`) {
232	/ shift the argument right by the required places /
233	shr_Xsig(&argSqrd, n: `2` * (-`1` - exponent));
234	}
235
236	argTo4.msw = argSqrd.msw;
237	argTo4.midw = argSqrd.midw;
238	argTo4.lsw = argSqrd.lsw;
239	mul_Xsig_Xsig(dest: &argTo4, mult: &argTo4);
240
241	polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: neg_terms_h,
242	N_COEFF_NH - `1`);
243	mul_Xsig_Xsig(dest: &accumulator, mult: &argSqrd);
244	negate_Xsig(x: &accumulator);
245
246	polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: pos_terms_h,
247	N_COEFF_PH - `1`);
248	negate_Xsig(x: &accumulator);
249
250	mul64_Xsig(&accumulator, mult: &significand(st0_ptr));
251	mul64_Xsig(&accumulator, mult: &significand(st0_ptr));
252	shr_Xsig(&accumulator, n: -`2` * (`1` + exponent));
253
254	shr_Xsig(&accumulator, n: `3`);
255	negate_Xsig(x: &accumulator);
256
257	add_Xsig_Xsig(dest: &accumulator, x2: &argSqrd);
258
259	shr_Xsig(&accumulator, n: `1`);
260
261	/ It doesn't matter if accumulator is all zero here, the*
262	following code will work ok /*
263	negate_Xsig(x: &accumulator);
264
265	if (accumulator.lsw & `0x80000000`)
266	XSIG_LL(accumulator)++;
267	if (accumulator.msw == `0`) {
268	/ The result is 1.0 /
269	FPU_copy_to_reg0(r: &CONST_1, TAG_Valid);
270	return;
271	} else {
272	significand(&result) = XSIG_LL(accumulator);
273
274	/ will be a valid positive nr with expon = -1 /
275	setexponentpos(&result, -`1`);
276	}
277	} else {
278	fixed_arg = significand(st0_ptr);
279
280	if (exponent == `0`) {
281	/ The argument is >= 1.0 /
282
283	/ Put the binary point at the left. /
284	fixed_arg <<= `1`;
285	}
286	/ pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 /
287	fixed_arg = `0x921fb54442d18469LL` - fixed_arg;
288	/ There is a special case which arises due to rounding, to fix here. /
289	if (fixed_arg == `0xffffffffffffffffLL`)
290	fixed_arg = `0`;
291
292	exponent = -`1`;
293	exp2 = -`1`;
294
295	/ A shift is needed here only for a narrow range of arguments,*
296	i.e. for fixed_arg approx 2^-32, but we pick up more... /*
297	if (!(LL_MSW(fixed_arg) & `0xffff0000`)) {
298	fixed_arg <<= `16`;
299	exponent -= `16`;
300	exp2 -= `16`;
301	}
302
303	XSIG_LL(argSqrd) = fixed_arg;
304	argSqrd.lsw = `0`;
305	mul64_Xsig(&argSqrd, mult: &fixed_arg);
306
307	if (exponent < -`1`) {
308	/ shift the argument right by the required places /
309	shr_Xsig(&argSqrd, n: `2` * (-`1` - exponent));
310	}
311
312	argTo4.msw = argSqrd.msw;
313	argTo4.midw = argSqrd.midw;
314	argTo4.lsw = argSqrd.lsw;
315	mul_Xsig_Xsig(dest: &argTo4, mult: &argTo4);
316
317	polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: neg_terms_l,
318	N_COEFF_N - `1`);
319	mul_Xsig_Xsig(dest: &accumulator, mult: &argSqrd);
320	negate_Xsig(x: &accumulator);
321
322	polynomial_Xsig(&accumulator, x: &XSIG_LL(argTo4), terms: pos_terms_l,
323	N_COEFF_P - `1`);
324
325	shr_Xsig(&accumulator, n: `2`); / Divide by four /
326	accumulator.msw \|= `0x80000000`; / Add 1.0 /
327
328	mul64_Xsig(&accumulator, mult: &fixed_arg);
329	mul64_Xsig(&accumulator, mult: &fixed_arg);
330	mul64_Xsig(&accumulator, mult: &fixed_arg);
331
332	/ Divide by four, FPU_REG compatible, etc /
333	exponent = `3` * exponent;
334
335	/ The minimum exponent difference is 3 /
336	shr_Xsig(&accumulator, n: exp2 - exponent);
337
338	negate_Xsig(x: &accumulator);
339	XSIG_LL(accumulator) += fixed_arg;
340
341	/ The basic computation is complete. Now fix the answer to*
342	compensate for the error due to the approximation used for
343	pi/2
344	*/
345
346	/ This has an exponent of -65 /
347	XSIG_LL(fix_up) = `0x898cc51701b839a2ll`;
348	fix_up.lsw = `0`;
349
350	/ The fix-up needs to be improved for larger args /
351	if (argSqrd.msw & `0xffc00000`) {
352	/ Get about 32 bit precision in these: /
353	fix_up.msw -= mul_32_32(arg1: `0x898cc517`, arg2: argSqrd.msw) / `2`;
354	fix_up.msw += mul_32_32(arg1: `0x898cc517`, arg2: argTo4.msw) / `24`;
355	}
356
357	exp2 += norm_Xsig(&accumulator);
358	shr_Xsig(&accumulator, n: `1`); / Prevent overflow /
359	exp2++;
360	shr_Xsig(&fix_up, n: `65` + exp2);
361
362	add_Xsig_Xsig(dest: &accumulator, x2: &fix_up);
363
364	echange = round_Xsig(&accumulator);
365
366	setexponentpos(&result, exp2 + echange);
367	significand(&result) = XSIG_LL(accumulator);
368	}
369
370	FPU_copy_to_reg0(r: &result, TAG_Valid);
371
372	#ifdef PARANOID
373	if ((exponent(&result) >= `0`)
374	&& (significand(&result) > `0x8000000000000000LL`)) {
375	EXCEPTION(EX_INTERNAL \| `0x151`);
376	}
377	#endif /* PARANOID */
378
379	}
380

source code of linux/arch/x86/math-emu/poly_sin.c