e_expl.c source code [glibc/sysdeps/ieee754/ldbl-128ibm/e_expl.c]

1	/ Quad-precision floating point e^x.*
2	Copyright (C) 1999-2022 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	/ The basic design here is from*
20	Abraham Ziv, "Fast Evaluation of Elementary Mathematical Functions with
21	Correctly Rounded Last Bit", ACM Trans. Math. Soft., 17 (3), September 1991,
22	pp. 410-423.
23
24	We work with number pairs where the first number is the high part and
25	the second one is the low part. Arithmetic with the high part numbers must
26	be exact, without any roundoff errors.
27
28	The input value, X, is written as
29	X = n ln(2)_0 + arg1[t1]_0 + arg2[t2]_0 + x*
30	- n ln(2)_1 + arg1[t1]_1 + arg2[t2]_1 + xl*
31
32	where:
33	- n is an integer, 16384 >= n >= -16495;
34	- ln(2)_0 is the first 93 bits of ln(2), and \|ln(2)_0-ln(2)-ln(2)_1\| < 2^-205
35	- t1 is an integer, 89 >= t1 >= -89
36	- t2 is an integer, 65 >= t2 >= -65
37	- \|arg1[t1]-t1/256.0\| < 2^-53
38	- \|arg2[t2]-t2/32768.0\| < 2^-53
39	- x + xl is whatever is left, \|x + xl\| < 2^-16 + 2^-53
40
41	Then e^x is approximated as
42
43	e^x = 2^n_1 ( 2^n_0 e^(arg1[t1]_0 + arg1[t1]_1) e^(arg2[t2]_0 + arg2[t2]_1)
44	+ 2^n_0 e^(arg1[t1]_0 + arg1[t1]_1) e^(arg2[t2]_0 + arg2[t2]_1)
45	* p (x + xl + n * ln(2)_1))
46	where:
47	- p(x) is a polynomial approximating e(x)-1
48	- e^(arg1[t1]_0 + arg1[t1]_1) is obtained from a table
49	- e^(arg2[t2]_0 + arg2[t2]_1) likewise
50	- n_1 + n_0 = n, so that \|n_0\| < -LDBL_MIN_EXP-1.
51
52	If it happens that n_1 == 0 (this is the usual case), that multiplication
53	is omitted.
54	*/
55
56	#ifndef _GNU_SOURCE
57	#define _GNU_SOURCE
58	#endif
59	#include <float.h>
60	#include <ieee754.h>
61	#include <math.h>
62	#include <fenv.h>
63	#include <inttypes.h>
64	#include <math_private.h>
65	#include <fenv_private.h>
66	#include <libm-alias-finite.h>
67
68	#include "t_expl.h"
69
70	static const long double C[] = {
71	/ Smallest integer x for which e^x overflows. /
72	#define himark C[0]
73	`709.78271289338399678773454114191496482L`,
74
75	/ Largest integer x for which e^x underflows. /
76	#define lomark C[1]
77	-`744.44007192138126231410729844608163411L`,
78
79	/ 3x2^96 /
80	#define THREEp96 C[2]
81	`59421121885698253195157962752.0L`,
82
83	/ 3x2^103 /
84	#define THREEp103 C[3]
85	`30423614405477505635920876929024.0L`,
86
87	/ 3x2^111 /
88	#define THREEp111 C[4]
89	`7788445287802241442795744493830144.0L`,
90
91	/ 1/ln(2) /
92	#define M_1_LN2 C[5]
93	`1.44269504088896340735992468100189204L`,
94
95	/ first 93 bits of ln(2) /
96	#define M_LN2_0 C[6]
97	`0.693147180559945309417232121457981864L`,
98
99	/ ln2_0 - ln(2) /
100	#define M_LN2_1 C[7]
101	-`1.94704509238074995158795957333327386E-31L`,
102
103	/ very small number /
104	#define TINY C[8]
105	`1.0e-308L`,
106
107	/ 2^16383 /
108	#define TWO1023 C[9]
109	`8.988465674311579538646525953945123668E+307L`,
110
111	/ 256 /
112	#define TWO8 C[10]
113	`256.0L`,
114
115	/ 32768 /
116	#define TWO15 C[11]
117	`32768.0L`,
118
119	/ Chebyshev polynom coefficients for (exp(x)-1)/x /
120	#define P1 C[12]
121	#define P2 C[13]
122	#define P3 C[14]
123	#define P4 C[15]
124	#define P5 C[16]
125	#define P6 C[17]
126	`0.5L`,
127	`1.66666666666666666666666666666666683E-01L`,
128	`4.16666666666666666666654902320001674E-02L`,
129	`8.33333333333333333333314659767198461E-03L`,
130	`1.38888888889899438565058018857254025E-03L`,
131	`1.98412698413981650382436541785404286E-04L`,
132	};
133
134	/ Avoid local PLT entry use from (int) roundl (...) being converted*
135	to a call to lroundl in the case of 32-bit long and roundl not
136	inlined. /*
137	long int lroundl (long double) asm ("__lroundl");
138
139	long double
140	__ieee754_expl (long double x)
141	{
142	long double result, x22;
143	union ibm_extended_long_double ex2_u, scale_u;
144	int unsafe;
145
146	/ Check for usual case. /
147	if (isless (x, himark) && isgreater (x, lomark))
148	{
149	int tval1, tval2, n_i, exponent2;
150	long double n, xl;
151
152	SET_RESTORE_ROUND (FE_TONEAREST);
153
154	n = roundl (x*M_1_LN2);
155	x = x-n*M_LN2_0;
156	xl = n*M_LN2_1;
157
158	tval1 = roundl (x*TWO8);
159	x -= __expl_table[T_EXPL_ARG1+`2`*tval1];
160	xl -= __expl_table[T_EXPL_ARG1+`2`*tval1+`1`];
161
162	tval2 = roundl (x*TWO15);
163	x -= __expl_table[T_EXPL_ARG2+`2`*tval2];
164	xl -= __expl_table[T_EXPL_ARG2+`2`*tval2+`1`];
165
166	x = x + xl;
167
168	/ Compute ex2 = 2^n_0 e^(argtable[tval1]) e^(argtable[tval2]). /
169	ex2_u.ld = (__expl_table[T_EXPL_RES1 + tval1]
170	* __expl_table[T_EXPL_RES2 + tval2]);
171	n_i = (int)n;
172	/ 'unsafe' is 1 iff n_1 != 0. /
173	unsafe = fabsl(x: n_i) >= -LDBL_MIN_EXP - `1`;
174	ex2_u.d[`0`].ieee.exponent += n_i >> unsafe;
175	/ Fortunately, there are no subnormal lowpart doubles in*
176	__expl_table, only normal values and zeros.
177	But after scaling it can be subnormal. /*
178	exponent2 = ex2_u.d[`1`].ieee.exponent + (n_i >> unsafe);
179	if (ex2_u.d[`1`].ieee.exponent == `0`)
180	/ assert ((ex2_u.d[1].ieee.mantissa0\|ex2_u.d[1].ieee.mantissa1) == 0) /;
181	else if (exponent2 > `0`)
182	ex2_u.d[`1`].ieee.exponent = exponent2;
183	else if (exponent2 <= -`54`)
184	{
185	ex2_u.d[`1`].ieee.exponent = `0`;
186	ex2_u.d[`1`].ieee.mantissa0 = `0`;
187	ex2_u.d[`1`].ieee.mantissa1 = `0`;
188	}
189	else
190	{
191	static const double
192	two54 = `1.80143985094819840000e+16`, / 4350000000000000 /
193	twom54 = `5.55111512312578270212e-17`; / 3C90000000000000 /
194	ex2_u.d[`1`].d *= two54;
195	ex2_u.d[`1`].ieee.exponent += n_i >> unsafe;
196	ex2_u.d[`1`].d *= twom54;
197	}
198
199	/ Compute scale = 2^n_1. /
200	scale_u.ld = `1.0L`;
201	scale_u.d[`0`].ieee.exponent += n_i - (n_i >> unsafe);
202
203	/ Approximate e^x2 - 1, using a seventh-degree polynomial,*
204	with maximum error in [-2^-16-2^-53,2^-16+2^-53]
205	less than 4.8e-39. /*
206	x22 = x + xx(P1+x(P2+x(P3+x(P4+x(P5+x*P6)))));
207
208	/ Now we can test whether the result is ultimate or if we are unsure.*
209	In the later case we should probably call a mpn based routine to give
210	the ultimate result.
211	Empirically, this routine is already ultimate in about 99.9986% of
212	cases, the test below for the round to nearest case will be false
213	in ~ 99.9963% of cases.
214	Without proc2 routine maximum error which has been seen is
215	0.5000262 ulp.
216
217	union ieee854_long_double ex3_u;
218
219	#ifdef FE_TONEAREST
220	fesetround (FE_TONEAREST);
221	#endif
222	ex3_u.d = (result - ex2_u.d) - x22 ex2_u.d;*
223	ex2_u.d = result;
224	ex3_u.ieee.exponent += LDBL_MANT_DIG + 15 + IEEE854_LONG_DOUBLE_BIAS
225	- ex2_u.ieee.exponent;
226	n_i = abs (ex3_u.d);
227	n_i = (n_i + 1) / 2;
228	fesetenv (&oldenv);
229	#ifdef FE_TONEAREST
230	if (fegetround () == FE_TONEAREST)
231	n_i -= 0x4000;
232	#endif
233	if (!n_i) {
234	return __ieee754_expl_proc2 (origx);
235	}
236	*/
237	}
238	/ Exceptional cases: /
239	else if (isless (x, himark))
240	{
241	if (isinf (x))
242	/ e^-inf == 0, with no error. /
243	return `0`;
244	else
245	/ Underflow /
246	return TINY * TINY;
247	}
248	else
249	/ Return x, if x is a NaN or Inf; or overflow, otherwise. /
250	return TWO1023*x;
251
252	result = x22 * ex2_u.ld + ex2_u.ld;
253	if (!unsafe)
254	return result;
255	return result * scale_u.ld;
256	}
257	libm_alias_finite (__ieee754_expl, __expl)
258

source code of glibc/sysdeps/ieee754/ldbl-128ibm/e_expl.c