s_sincosf.h source code [glibc/sysdeps/ieee754/flt-32/s_sincosf.h]

1	/ Used by sinf, cosf and sincosf functions.*
2	Copyright (C) 2018-2022 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <stdint.h>
20	#include <math.h>
21	#include "math_config.h"
22	#include <sincosf_poly.h>
23
24	/ 2PI * 2^-64. /
25	static const double pi63 = `0x1.921FB54442D18p-62`;
26	/ PI / 4. /
27	static const float pio4 = `0x1.921FB6p-1f`;
28
29	/ Polynomial data (the cosine polynomial is negated in the 2nd entry). /
30	extern const sincos_t __sincosf_table[`2`] attribute_hidden;
31
32	/ Table with 4/PI to 192 bit precision. /
33	extern const uint32_t __inv_pio4[] attribute_hidden;
34
35	/ Top 12 bits of the float representation with the sign bit cleared. /
36	static inline uint32_t
37	abstop12 (float x)
38	{
39	return (asuint (f: x) >> `20`) & `0x7ff`;
40	}
41
42	/ Fast range reduction using single multiply-subtract. Return the modulo of*
43	X as a value between -PI/4 and PI/4 and store the quadrant in NP.
44	The values for PI/2 and 2/PI are accessed via P. Since PI/2 as a double
45	is accurate to 55 bits and the worst-case cancellation happens at 6 PI/4,*
46	the result is accurate for \|X\| <= 120.0. /*
47	static inline double
48	reduce_fast (double x, const sincos_t p, int* *np)
49	{
50	double r;
51	#if TOINT_INTRINSICS
52	/ Use fast round and lround instructions when available. /
53	r = x * p->hpi_inv;
54	*np = converttoint (r);
55	return x - roundtoint (r) * p->hpi;
56	#else
57	/ Use scaled float to int conversion with explicit rounding.*
58	hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31.
59	This avoids inaccuracies introduced by truncating negative values. /*
60	r = x * p->hpi_inv;
61	int n = ((int32_t)r + `0x800000`) >> `24`;
62	*np = n;
63	return x - n * p->hpi;
64	#endif
65	}
66
67	/ Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.*
68	XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
69	Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
70	Reduction uses a table of 4/PI with 192 bits of precision. A 32x96->128 bit
71	multiply computes the exact 2.62-bit fixed-point modulo. Since the result
72	can have at most 29 leading zeros after the binary point, the double
73	precision result is accurate to 33 bits. /*
74	static inline double
75	reduce_large (uint32_t xi, int *np)
76	{
77	const uint32_t *arr = &__inv_pio4[(xi >> `26`) & `15`];
78	int shift = (xi >> `23`) & `7`;
79	uint64_t n, res0, res1, res2;
80
81	xi = (xi & `0xffffff`) \| `0x800000`;
82	xi <<= shift;
83
84	res0 = xi * arr[`0`];
85	res1 = (uint64_t)xi * arr[`4`];
86	res2 = (uint64_t)xi * arr[`8`];
87	res0 = (res2 >> `32`) \| (res0 << `32`);
88	res0 += res1;
89
90	n = (res0 + (`1ULL` << `61`)) >> `62`;
91	res0 -= n << `62`;
92	double x = (int64_t)res0;
93	*np = n;
94	return x * pi63;
95	}
96

source code of glibc/sysdeps/ieee754/flt-32/s_sincosf.h