s_erfl.c source code [glibc/sysdeps/ieee754/ldbl-96/s_erfl.c]

1	/*
2	* ====================================================
3	* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4	*
5	* Developed at SunPro, a Sun Microsystems, Inc. business.
6	* Permission to use, copy, modify, and distribute this
7	* software is freely granted, provided that this notice
8	* is preserved.
9	* ====================================================
10	*/
11
12	/ Long double expansions are*
13	Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14	and are incorporated herein by permission of the author. The author
15	reserves the right to distribute this material elsewhere under different
16	copying permissions. These modifications are distributed here under
17	the following terms:
18
19	This library is free software; you can redistribute it and/or
20	modify it under the terms of the GNU Lesser General Public
21	License as published by the Free Software Foundation; either
22	version 2.1 of the License, or (at your option) any later version.
23
24	This library is distributed in the hope that it will be useful,
25	but WITHOUT ANY WARRANTY; without even the implied warranty of
26	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27	Lesser General Public License for more details.
28
29	You should have received a copy of the GNU Lesser General Public
30	License along with this library; if not, see
31	<https://www.gnu.org/licenses/>. /*
32
33	/ double erf(double x)*
34	* double erfc(double x)
35	* x
36	* 2 \|\
37	* erf(x) = --------- \| exp(-t*t)dt
38	* sqrt(pi) \\|
39	* 0
40	*
41	* erfc(x) = 1-erf(x)
42	* Note that
43	* erf(-x) = -erf(x)
44	* erfc(-x) = 2 - erfc(x)
45	*
46	* Method:
47	* 1. For \|x\| in [0, 0.84375]
48	* erf(x) = x + x*R(x^2)
49	* erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
50	* = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
51	* Remark. The formula is derived by noting
52	* erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
53	* and that
54	* 2/sqrt(pi) = 1.128379167095512573896158903121545171688
55	* is close to one. The interval is chosen because the fix
56	* point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
57	* near 0.6174), and by some experiment, 0.84375 is chosen to
58	* guarantee the error is less than one ulp for erf.
59	*
60	* 2. For \|x\| in [0.84375,1.25], let s = \|x\| - 1, and
61	* c = 0.84506291151 rounded to single (24 bits)
62	* erf(x) = sign(x) * (c + P1(s)/Q1(s))
63	* erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
64	* 1+(c+P1(s)/Q1(s)) if x < 0
65	* Remark: here we use the taylor series expansion at x=1.
66	* erf(1+s) = erf(1) + s*Poly(s)
67	* = 0.845.. + P1(s)/Q1(s)
68	* Note that \|P1/Q1\|< 0.078 for x in [0.84375,1.25]
69	*
70	* 3. For x in [1.25,1/0.35(~2.857143)],
71	* erfc(x) = (1/x)exp(-xx-0.5625+R1(z)/S1(z))
72	* z=1/x^2
73	* erf(x) = 1 - erfc(x)
74	*
75	* 4. For x in [1/0.35,107]
76	* erfc(x) = (1/x)exp(-xx-0.5625+R2/S2) if x > 0
77	* = 2.0 - (1/x)exp(-xx-0.5625+R2(z)/S2(z))
78	* if -6.666<x<0
79	* = 2.0 - tiny (if x <= -6.666)
80	* z=1/x^2
81	* erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6.666, else
82	* erf(x) = sign(x)*(1.0 - tiny)
83	* Note1:
84	* To compute exp(-x*x-0.5625+R/S), let s be a single
85	* precision number and s := x; then
86	* -xx = -ss + (s-x)*(s+x)
87	* exp(-x*x-0.5626+R/S) =
88	* exp(-ss-0.5625)exp((s-x)*(s+x)+R/S);
89	* Note2:
90	* Here 4 and 5 make use of the asymptotic series
91	* exp(-x*x)
92	* erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
93	* x*sqrt(pi)
94	*
95	* 5. For inf > x >= 107
96	* erf(x) = sign(x) *(1 - tiny) (raise inexact)
97	* erfc(x) = tiny*tiny (raise underflow) if x > 0
98	* = 2 - tiny if x<0
99	*
100	* 7. Special case:
101	* erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
102	* erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
103	* erfc/erf(NaN) is NaN
104	*/
105
106
107	#include <errno.h>
108	#include <float.h>
109	#include <math.h>
110	#include <math_private.h>
111	#include <math-underflow.h>
112	#include <libm-alias-ldouble.h>
113
114	static const long double
115	tiny = `1e-4931L`,
116	half = `0.5L`,
117	one = `1.0L`,
118	two = `2.0L`,
119	/ c = (float)0.84506291151 /
120	erx = `0.845062911510467529296875L`,
121	/*
122	* Coefficients for approximation to erf on [0,0.84375]
123	*/
124	/ 2/sqrt(pi) - 1 /
125	efx = `1.2837916709551257389615890312154517168810E-1L`,
126
127	pp[`6`] = {
128	`1.122751350964552113068262337278335028553E6L`,
129	-`2.808533301997696164408397079650699163276E6L`,
130	-`3.314325479115357458197119660818768924100E5L`,
131	-`6.848684465326256109712135497895525446398E4L`,
132	-`2.657817695110739185591505062971929859314E3L`,
133	-`1.655310302737837556654146291646499062882E2L`,
134	},
135
136	qq[`6`] = {
137	`8.745588372054466262548908189000448124232E6L`,
138	`3.746038264792471129367533128637019611485E6L`,
139	`7.066358783162407559861156173539693900031E5L`,
140	`7.448928604824620999413120955705448117056E4L`,
141	`4.511583986730994111992253980546131408924E3L`,
142	`1.368902937933296323345610240009071254014E2L`,
143	/ 1.000000000000000000000000000000000000000E0 /
144	},
145
146	/*
147	* Coefficients for approximation to erf in [0.84375,1.25]
148	*/
149	/ erf(x+1) = 0.845062911510467529296875 + pa(x)/qa(x)*
150	-0.15625 <= x <= +.25
151	Peak relative error 8.5e-22 /*
152
153	pa[`8`] = {
154	-`1.076952146179812072156734957705102256059E0L`,
155	`1.884814957770385593365179835059971587220E2L`,
156	-`5.339153975012804282890066622962070115606E1L`,
157	`4.435910679869176625928504532109635632618E1L`,
158	`1.683219516032328828278557309642929135179E1L`,
159	-`2.360236618396952560064259585299045804293E0L`,
160	`1.852230047861891953244413872297940938041E0L`,
161	`9.394994446747752308256773044667843200719E-2L`,
162	},
163
164	qa[`7`] = {
165	`4.559263722294508998149925774781887811255E2L`,
166	`3.289248982200800575749795055149780689738E2L`,
167	`2.846070965875643009598627918383314457912E2L`,
168	`1.398715859064535039433275722017479994465E2L`,
169	`6.060190733759793706299079050985358190726E1L`,
170	`2.078695677795422351040502569964299664233E1L`,
171	`4.641271134150895940966798357442234498546E0L`,
172	/ 1.000000000000000000000000000000000000000E0 /
173	},
174
175	/*
176	* Coefficients for approximation to erfc in [1.25,1/0.35]
177	*/
178	/ erfc(1/x) = x exp (-1/x^2 - 0.5625 + ra(x^2)/sa(x^2))*
179	1/2.85711669921875 < 1/x < 1/1.25
180	Peak relative error 3.1e-21 /*
181
182	ra[] = {
183	`1.363566591833846324191000679620738857234E-1L`,
184	`1.018203167219873573808450274314658434507E1L`,
185	`1.862359362334248675526472871224778045594E2L`,
186	`1.411622588180721285284945138667933330348E3L`,
187	`5.088538459741511988784440103218342840478E3L`,
188	`8.928251553922176506858267311750789273656E3L`,
189	`7.264436000148052545243018622742770549982E3L`,
190	`2.387492459664548651671894725748959751119E3L`,
191	`2.220916652813908085449221282808458466556E2L`,
192	},
193
194	sa[] = {
195	-`1.382234625202480685182526402169222331847E1L`,
196	-`3.315638835627950255832519203687435946482E2L`,
197	-`2.949124863912936259747237164260785326692E3L`,
198	-`1.246622099070875940506391433635999693661E4L`,
199	-`2.673079795851665428695842853070996219632E4L`,
200	-`2.880269786660559337358397106518918220991E4L`,
201	-`1.450600228493968044773354186390390823713E4L`,
202	-`2.874539731125893533960680525192064277816E3L`,
203	-`1.402241261419067750237395034116942296027E2L`,
204	/ 1.000000000000000000000000000000000000000E0 /
205	},
206	/*
207	* Coefficients for approximation to erfc in [1/.35,107]
208	*/
209	/ erfc(1/x) = x exp (-1/x^2 - 0.5625 + rb(x^2)/sb(x^2))*
210	1/6.6666259765625 < 1/x < 1/2.85711669921875
211	Peak relative error 4.2e-22 /*
212	rb[] = {
213	-`4.869587348270494309550558460786501252369E-5L`,
214	-`4.030199390527997378549161722412466959403E-3L`,
215	-`9.434425866377037610206443566288917589122E-2L`,
216	-`9.319032754357658601200655161585539404155E-1L`,
217	-`4.273788174307459947350256581445442062291E0L`,
218	-`8.842289940696150508373541814064198259278E0L`,
219	-`7.069215249419887403187988144752613025255E0L`,
220	-`1.401228723639514787920274427443330704764E0L`,
221	},
222
223	sb[] = {
224	`4.936254964107175160157544545879293019085E-3L`,
225	`1.583457624037795744377163924895349412015E-1L`,
226	`1.850647991850328356622940552450636420484E0L`,
227	`9.927611557279019463768050710008450625415E0L`,
228	`2.531667257649436709617165336779212114570E1L`,
229	`2.869752886406743386458304052862814690045E1L`,
230	`1.182059497870819562441683560749192539345E1L`,
231	/ 1.000000000000000000000000000000000000000E0 /
232	},
233	/ erfc(1/x) = x exp (-1/x^2 - 0.5625 + rc(x^2)/sc(x^2))*
234	1/107 <= 1/x <= 1/6.6666259765625
235	Peak relative error 1.1e-21 /*
236	rc[] = {
237	-`8.299617545269701963973537248996670806850E-5L`,
238	-`6.243845685115818513578933902532056244108E-3L`,
239	-`1.141667210620380223113693474478394397230E-1L`,
240	-`7.521343797212024245375240432734425789409E-1L`,
241	-`1.765321928311155824664963633786967602934E0L`,
242	-`1.029403473103215800456761180695263439188E0L`,
243	},
244
245	sc[] = {
246	`8.413244363014929493035952542677768808601E-3L`,
247	`2.065114333816877479753334599639158060979E-1L`,
248	`1.639064941530797583766364412782135680148E0L`,
249	`4.936788463787115555582319302981666347450E0L`,
250	`5.005177727208955487404729933261347679090E0L`,
251	/ 1.000000000000000000000000000000000000000E0 /
252	};
253
254	long double
255	__erfl (long double x)
256	{
257	long double R, S, P, Q, s, y, z, r;
258	int32_t ix, i;
259	uint32_t se, i0, i1;
260
261	GET_LDOUBLE_WORDS (se, i0, i1, x);
262	ix = se & `0x7fff`;
263
264	if (ix >= `0x7fff`)
265	{ / erf(nan)=nan /
266	i = ((se & `0xffff`) >> `15`) << `1`;
267	return (long double) (`1` - i) + one / x; / erf(+-inf)=+-1 /
268	}
269
270	ix = (ix << `16`) \| (i0 >> `16`);
271	if (ix < `0x3ffed800`) / \|x\|<0.84375 /
272	{
273	if (ix < `0x3fde8000`) / \|x\|<2*-33 /*
274	{
275	if (ix < `0x00080000`)
276	{
277	/ Avoid spurious underflow. /
278	long double ret = `0.0625` * (`16.0` * x + (`16.0` * efx) * x);
279	math_check_force_underflow (ret);
280	return ret;
281	}
282	return x + efx * x;
283	}
284	z = x * x;
285	r = pp[`0`] + z * (pp[`1`]
286	+ z * (pp[`2`] + z * (pp[`3`] + z * (pp[`4`] + z * pp[`5`]))));
287	s = qq[`0`] + z * (qq[`1`]
288	+ z * (qq[`2`] + z * (qq[`3`] + z * (qq[`4`] + z * (qq[`5`] + z)))));
289	y = r / s;
290	return x + x * y;
291	}
292	if (ix < `0x3fffa000`) / 1.25 /
293	{ / 0.84375 <= \|x\| < 1.25 /
294	s = fabsl (x: x) - one;
295	P = pa[`0`] + s * (pa[`1`] + s * (pa[`2`]
296	+ s * (pa[`3`] + s * (pa[`4`] + s * (pa[`5`] + s * (pa[`6`] + s * pa[`7`]))))));
297	Q = qa[`0`] + s * (qa[`1`] + s * (qa[`2`]
298	+ s * (qa[`3`] + s * (qa[`4`] + s * (qa[`5`] + s * (qa[`6`] + s))))));
299	if ((se & `0x8000`) == `0`)
300	return erx + P / Q;
301	else
302	return -erx - P / Q;
303	}
304	if (ix >= `0x4001d555`) / 6.6666259765625 /
305	{ / inf>\|x\|>=6.666 /
306	if ((se & `0x8000`) == `0`)
307	return one - tiny;
308	else
309	return tiny - one;
310	}
311	x = fabsl (x: x);
312	s = one / (x * x);
313	if (ix < `0x4000b6db`) / 2.85711669921875 /
314	{
315	R = ra[`0`] + s * (ra[`1`] + s * (ra[`2`] + s * (ra[`3`] + s * (ra[`4`] +
316	s * (ra[`5`] + s * (ra[`6`] + s * (ra[`7`] + s * ra[`8`])))))));
317	S = sa[`0`] + s * (sa[`1`] + s * (sa[`2`] + s * (sa[`3`] + s * (sa[`4`] +
318	s * (sa[`5`] + s * (sa[`6`] + s * (sa[`7`] + s * (sa[`8`] + s))))))));
319	}
320	else
321	{ / \|x\| >= 1/0.35 /
322	R = rb[`0`] + s * (rb[`1`] + s * (rb[`2`] + s * (rb[`3`] + s * (rb[`4`] +
323	s * (rb[`5`] + s * (rb[`6`] + s * rb[`7`]))))));
324	S = sb[`0`] + s * (sb[`1`] + s * (sb[`2`] + s * (sb[`3`] + s * (sb[`4`] +
325	s * (sb[`5`] + s * (sb[`6`] + s))))));
326	}
327	z = x;
328	GET_LDOUBLE_WORDS (i, i0, i1, z);
329	i1 = `0`;
330	SET_LDOUBLE_WORDS (z, i, i0, i1);
331	r =
332	__ieee754_expl (-z * z - `0.5625`) * __ieee754_expl ((z - x) * (z + x) +
333	R / S);
334	if ((se & `0x8000`) == `0`)
335	return one - r / x;
336	else
337	return r / x - one;
338	}
339
340	libm_alias_ldouble (__erf, erf)
341	long double
342	__erfcl (long double x)
343	{
344	int32_t hx, ix;
345	long double R, S, P, Q, s, y, z, r;
346	uint32_t se, i0, i1;
347
348	GET_LDOUBLE_WORDS (se, i0, i1, x);
349	ix = se & `0x7fff`;
350	if (ix >= `0x7fff`)
351	{ / erfc(nan)=nan /
352	/ erfc(+-inf)=0,2 /
353	return (long double) (((se & `0xffff`) >> `15`) << `1`) + one / x;
354	}
355
356	ix = (ix << `16`) \| (i0 >> `16`);
357	if (ix < `0x3ffed800`) / \|x\|<0.84375 /
358	{
359	if (ix < `0x3fbe0000`) / \|x\|<2*-65 /*
360	return one - x;
361	z = x * x;
362	r = pp[`0`] + z * (pp[`1`]
363	+ z * (pp[`2`] + z * (pp[`3`] + z * (pp[`4`] + z * pp[`5`]))));
364	s = qq[`0`] + z * (qq[`1`]
365	+ z * (qq[`2`] + z * (qq[`3`] + z * (qq[`4`] + z * (qq[`5`] + z)))));
366	y = r / s;
367	if (ix < `0x3ffd8000`) / x<1/4 /
368	{
369	return one - (x + x * y);
370	}
371	else
372	{
373	r = x * y;
374	r += (x - half);
375	return half - r;
376	}
377	}
378	if (ix < `0x3fffa000`) / 1.25 /
379	{ / 0.84375 <= \|x\| < 1.25 /
380	s = fabsl (x: x) - one;
381	P = pa[`0`] + s * (pa[`1`] + s * (pa[`2`]
382	+ s * (pa[`3`] + s * (pa[`4`] + s * (pa[`5`] + s * (pa[`6`] + s * pa[`7`]))))));
383	Q = qa[`0`] + s * (qa[`1`] + s * (qa[`2`]
384	+ s * (qa[`3`] + s * (qa[`4`] + s * (qa[`5`] + s * (qa[`6`] + s))))));
385	if ((se & `0x8000`) == `0`)
386	{
387	z = one - erx;
388	return z - P / Q;
389	}
390	else
391	{
392	z = erx + P / Q;
393	return one + z;
394	}
395	}
396	if (ix < `0x4005d600`) / 107 /
397	{ / \|x\|<107 /
398	x = fabsl (x: x);
399	s = one / (x * x);
400	if (ix < `0x4000b6db`) / 2.85711669921875 /
401	{ / \|x\| < 1/.35 ~ 2.857143 /
402	R = ra[`0`] + s * (ra[`1`] + s * (ra[`2`] + s * (ra[`3`] + s * (ra[`4`] +
403	s * (ra[`5`] + s * (ra[`6`] + s * (ra[`7`] + s * ra[`8`])))))));
404	S = sa[`0`] + s * (sa[`1`] + s * (sa[`2`] + s * (sa[`3`] + s * (sa[`4`] +
405	s * (sa[`5`] + s * (sa[`6`] + s * (sa[`7`] + s * (sa[`8`] + s))))))));
406	}
407	else if (ix < `0x4001d555`) / 6.6666259765625 /
408	{ / 6.666 > \|x\| >= 1/.35 ~ 2.857143 /
409	R = rb[`0`] + s * (rb[`1`] + s * (rb[`2`] + s * (rb[`3`] + s * (rb[`4`] +
410	s * (rb[`5`] + s * (rb[`6`] + s * rb[`7`]))))));
411	S = sb[`0`] + s * (sb[`1`] + s * (sb[`2`] + s * (sb[`3`] + s * (sb[`4`] +
412	s * (sb[`5`] + s * (sb[`6`] + s))))));
413	}
414	else
415	{ / \|x\| >= 6.666 /
416	if (se & `0x8000`)
417	return two - tiny; / x < -6.666 /
418
419	R = rc[`0`] + s * (rc[`1`] + s * (rc[`2`] + s * (rc[`3`] +
420	s * (rc[`4`] + s * rc[`5`]))));
421	S = sc[`0`] + s * (sc[`1`] + s * (sc[`2`] + s * (sc[`3`] +
422	s * (sc[`4`] + s))));
423	}
424	z = x;
425	GET_LDOUBLE_WORDS (hx, i0, i1, z);
426	i1 = `0`;
427	i0 &= `0xffffff00`;
428	SET_LDOUBLE_WORDS (z, hx, i0, i1);
429	r = __ieee754_expl (-z * z - `0.5625`) *
430	__ieee754_expl ((z - x) * (z + x) + R / S);
431	if ((se & `0x8000`) == `0`)
432	{
433	long double ret = r / x;
434	if (ret == `0`)
435	__set_errno (ERANGE);
436	return ret;
437	}
438	else
439	return two - r / x;
440	}
441	else
442	{
443	if ((se & `0x8000`) == `0`)
444	{
445	__set_errno (ERANGE);
446	return tiny * tiny;
447	}
448	else
449	return two - tiny;
450	}
451	}
452
453	libm_alias_ldouble (__erfc, erfc)
454

source code of glibc/sysdeps/ieee754/ldbl-96/s_erfl.c