e_powl.c source code [glibc/sysdeps/ieee754/ldbl-128/e_powl.c]

1	/*
2	* ====================================================
3	* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4	*
5	* Developed at SunPro, a Sun Microsystems, Inc. business.
6	* Permission to use, copy, modify, and distribute this
7	* software is freely granted, provided that this notice
8	* is preserved.
9	* ====================================================
10	*/
11
12	/ Expansions and modifications for 128-bit long double are*
13	Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14	and are incorporated herein by permission of the author. The author
15	reserves the right to distribute this material elsewhere under different
16	copying permissions. These modifications are distributed here under
17	the following terms:
18
19	This library is free software; you can redistribute it and/or
20	modify it under the terms of the GNU Lesser General Public
21	License as published by the Free Software Foundation; either
22	version 2.1 of the License, or (at your option) any later version.
23
24	This library is distributed in the hope that it will be useful,
25	but WITHOUT ANY WARRANTY; without even the implied warranty of
26	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27	Lesser General Public License for more details.
28
29	You should have received a copy of the GNU Lesser General Public
30	License along with this library; if not, see
31	<https://www.gnu.org/licenses/>. /*
32
33	/ __ieee754_powl(x,y) return x*y
34	*
35	* n
36	* Method: Let x = 2 * (1+f)
37	* 1. Compute and return log2(x) in two pieces:
38	* log2(x) = w1 + w2,
39	* where w1 has 113-53 = 60 bit trailing zeros.
40	* 2. Perform y*log2(x) = n+y' by simulating muti-precision
41	* arithmetic, where \|y'\|<=0.5.
42	* 3. Return xy = 2nexp(y'log2)
43	*
44	* Special cases:
45	* 1. (anything) ** 0 is 1
46	* 2. (anything) ** 1 is itself
47	* 3. (anything) ** NAN is NAN
48	* 4. NAN ** (anything except 0) is NAN
49	* 5. +-(\|x\| > 1) ** +INF is +INF
50	* 6. +-(\|x\| > 1) ** -INF is +0
51	* 7. +-(\|x\| < 1) ** +INF is +0
52	* 8. +-(\|x\| < 1) ** -INF is +INF
53	* 9. +-1 ** +-INF is NAN
54	* 10. +0 ** (+anything except 0, NAN) is +0
55	* 11. -0 ** (+anything except 0, NAN, odd integer) is +0
56	* 12. +0 ** (-anything except 0, NAN) is +INF
57	* 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
58	* 14. -0 (odd integer) = -( +0 (odd integer) )
59	* 15. +INF ** (+anything except 0,NAN) is +INF
60	* 16. +INF ** (-anything except 0,NAN) is +0
61	* 17. -INF (anything) = -0 (-anything)
62	* 18. (-anything) (integer) is (-1)(integer)(+anything*integer)
63	* 19. (-anything except 0 and inf) ** (non-integer) is NAN
64	*
65	*/
66
67	#include <math.h>
68	#include <math-barriers.h>
69	#include <math_private.h>
70	#include <libm-alias-finite.h>
71
72	static const _Float128 bp[] = {
73	`1`,
74	L(`1.5`),
75	};
76
77	/ log_2(1.5) /
78	static const _Float128 dp_h[] = {
79	`0.0`,
80	L(`5.8496250072115607565592654282227158546448E-1`)
81	};
82
83	/ Low part of log_2(1.5) /
84	static const _Float128 dp_l[] = {
85	`0.0`,
86	L(`1.0579781240112554492329533686862998106046E-16`)
87	};
88
89	static const _Float128 zero = `0`,
90	one = `1`,
91	two = `2`,
92	two113 = L(`1.0384593717069655257060992658440192E34`),
93	huge = L(`1.0e3000`),
94	tiny = L(`1.0e-3000`);
95
96	/ 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))*
97	z = (x-1)/(x+1)
98	1 <= x <= 1.25
99	Peak relative error 2.3e-37 /*
100	static const _Float128 LN[] =
101	{
102	L(-`3.0779177200290054398792536829702930623200E1`),
103	L(`6.5135778082209159921251824580292116201640E1`),
104	L(-`4.6312921812152436921591152809994014413540E1`),
105	L(`1.2510208195629420304615674658258363295208E1`),
106	L(-`9.9266909031921425609179910128531667336670E-1`)
107	};
108	static const _Float128 LD[] =
109	{
110	L(-`5.129862866715009066465422805058933131960E1`),
111	L(`1.452015077564081884387441590064272782044E2`),
112	L(-`1.524043275549860505277434040464085593165E2`),
113	L(`7.236063513651544224319663428634139768808E1`),
114	L(-`1.494198912340228235853027849917095580053E1`)
115	/ 1.0E0 /
116	};
117
118	/ exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))*
119	0 <= x <= 0.5
120	Peak relative error 5.7e-38 /*
121	static const _Float128 PN[] =
122	{
123	L(`5.081801691915377692446852383385968225675E8`),
124	L(`9.360895299872484512023336636427675327355E6`),
125	L(`4.213701282274196030811629773097579432957E4`),
126	L(`5.201006511142748908655720086041570288182E1`),
127	L(`9.088368420359444263703202925095675982530E-3`),
128	};
129	static const _Float128 PD[] =
130	{
131	L(`3.049081015149226615468111430031590411682E9`),
132	L(`1.069833887183886839966085436512368982758E8`),
133	L(`8.259257717868875207333991924545445705394E5`),
134	L(`1.872583833284143212651746812884298360922E3`),
135	/ 1.0E0 /
136	};
137
138	static const _Float128
139	/ ln 2 /
140	lg2 = L(`6.9314718055994530941723212145817656807550E-1`),
141	lg2_h = L(`6.9314718055994528622676398299518041312695E-1`),
142	lg2_l = L(`2.3190468138462996154948554638754786504121E-17`),
143	ovt = L(`8.0085662595372944372e-0017`),
144	/ 2/(3log(2)) /*
145	cp = L(`9.6179669392597560490661645400126142495110E-1`),
146	cp_h = L(`9.6179669392597555432899980587535537779331E-1`),
147	cp_l = L(`5.0577616648125906047157785230014751039424E-17`);
148
149	_Float128
150	__ieee754_powl (_Float128 x, _Float128 y)
151	{
152	_Float128 z, ax, z_h, z_l, p_h, p_l;
153	_Float128 y1, t1, t2, r, s, sgn, t, u, v, w;
154	_Float128 s2, s_h, s_l, t_h, t_l, ay;
155	int32_t i, j, k, yisint, n;
156	uint32_t ix, iy;
157	int32_t hx, hy;
158	ieee854_long_double_shape_type o, p, q;
159
160	p.value = x;
161	hx = p.parts32.w0;
162	ix = hx & `0x7fffffff`;
163
164	q.value = y;
165	hy = q.parts32.w0;
166	iy = hy & `0x7fffffff`;
167
168
169	/ y==zero: x*0 = 1 /*
170	if ((iy \| q.parts32.w1 \| q.parts32.w2 \| q.parts32.w3) == `0`
171	&& !issignaling (x))
172	return one;
173
174	/ 1.0y = 1; -1.0+-Inf = 1 /
175	if (x == one && !issignaling (y))
176	return one;
177	if (x == -`1` && iy == `0x7fff0000`
178	&& (q.parts32.w1 \| q.parts32.w2 \| q.parts32.w3) == `0`)
179	return one;
180
181	/ +-NaN return x+y /
182	if ((ix > `0x7fff0000`)
183	\|\| ((ix == `0x7fff0000`)
184	&& ((p.parts32.w1 \| p.parts32.w2 \| p.parts32.w3) != `0`))
185	\|\| (iy > `0x7fff0000`)
186	\|\| ((iy == `0x7fff0000`)
187	&& ((q.parts32.w1 \| q.parts32.w2 \| q.parts32.w3) != `0`)))
188	return x + y;
189
190	/ determine if y is an odd int when x < 0*
191	* yisint = 0 ... y is not an integer
192	* yisint = 1 ... y is an odd int
193	* yisint = 2 ... y is an even int
194	*/
195	yisint = `0`;
196	if (hx < `0`)
197	{
198	if (iy >= `0x40700000`) / 2^113 /
199	yisint = `2`; / even integer y /
200	else if (iy >= `0x3fff0000`) / 1.0 /
201	{
202	if (floorl (y) == y)
203	{
204	z = `0.5` * y;
205	if (floorl (z) == z)
206	yisint = `2`;
207	else
208	yisint = `1`;
209	}
210	}
211	}
212
213	/ special value of y /
214	if ((q.parts32.w1 \| q.parts32.w2 \| q.parts32.w3) == `0`)
215	{
216	if (iy == `0x7fff0000`) / y is +-inf /
217	{
218	if (((ix - `0x3fff0000`) \| p.parts32.w1 \| p.parts32.w2 \| p.parts32.w3)
219	== `0`)
220	return y - y; / +-1*inf is NaN /*
221	else if (ix >= `0x3fff0000`) / (\|x\|>1)*+-inf = inf,0 /*
222	return (hy >= `0`) ? y : zero;
223	else / (\|x\|<1)*-,+inf = inf,0 /*
224	return (hy < `0`) ? -y : zero;
225	}
226	if (iy == `0x3fff0000`)
227	{ / y is +-1 /
228	if (hy < `0`)
229	return one / x;
230	else
231	return x;
232	}
233	if (hy == `0x40000000`)
234	return x * x; / y is 2 /
235	if (hy == `0x3ffe0000`)
236	{ / y is 0.5 /
237	if (hx >= `0`) / x >= +0 /
238	return sqrtl (x);
239	}
240	}
241
242	ax = fabsl (x);
243	/ special value of x /
244	if ((p.parts32.w1 \| p.parts32.w2 \| p.parts32.w3) == `0`)
245	{
246	if (ix == `0x7fff0000` \|\| ix == `0` \|\| ix == `0x3fff0000`)
247	{
248	z = ax; /x is +-0,+-inf,+-1 /
249	if (hy < `0`)
250	z = one / z; / z = (1/\|x\|) /
251	if (hx < `0`)
252	{
253	if (((ix - `0x3fff0000`) \| yisint) == `0`)
254	{
255	z = (z - z) / (z - z); / (-1)*non-int is NaN /*
256	}
257	else if (yisint == `1`)
258	z = -z; / (x<0)odd = -(\|x\|odd) /
259	}
260	return z;
261	}
262	}
263
264	/ (x<0)*(non-int) is NaN /*
265	if (((((uint32_t) hx >> `31`) - `1`) \| yisint) == `0`)
266	return (x - x) / (x - x);
267
268	/ sgn (sign of result -ve*odd) = -1 else = 1 /*
269	sgn = one;
270	if (((((uint32_t) hx >> `31`) - `1`) \| (yisint - `1`)) == `0`)
271	sgn = -one; / (-ve)*(odd int) /*
272
273	/ \|y\| is huge.*
274	2^-16495 = 1/2 of smallest representable value.
275	If (1 - 1/131072)^y underflows, y > 1.4986e9 /*
276	if (iy > `0x401d654b`)
277	{
278	/ if (1 - 2^-113)^y underflows, y > 1.1873e38 /
279	if (iy > `0x407d654b`)
280	{
281	if (ix <= `0x3ffeffff`)
282	return (hy < `0`) ? huge * huge : tiny * tiny;
283	if (ix >= `0x3fff0000`)
284	return (hy > `0`) ? huge * huge : tiny * tiny;
285	}
286	/ over/underflow if x is not close to one /
287	if (ix < `0x3ffeffff`)
288	return (hy < `0`) ? sgn * huge * huge : sgn * tiny * tiny;
289	if (ix > `0x3fff0000`)
290	return (hy > `0`) ? sgn * huge * huge : sgn * tiny * tiny;
291	}
292
293	ay = y > `0` ? y : -y;
294	if (ay < `0x1p-128`)
295	y = y < `0` ? -`0x1p-128` : `0x1p-128`;
296
297	n = `0`;
298	/ take care subnormal number /
299	if (ix < `0x00010000`)
300	{
301	ax *= two113;
302	n -= `113`;
303	o.value = ax;
304	ix = o.parts32.w0;
305	}
306	n += ((ix) >> `16`) - `0x3fff`;
307	j = ix & `0x0000ffff`;
308	/ determine interval /
309	ix = j \| `0x3fff0000`; / normalize ix /
310	if (j <= `0x3988`)
311	k = `0`; / \|x\|<sqrt(3/2) /
312	else if (j < `0xbb67`)
313	k = `1`; / \|x\|<sqrt(3) /
314	else
315	{
316	k = `0`;
317	n += `1`;
318	ix -= `0x00010000`;
319	}
320
321	o.value = ax;
322	o.parts32.w0 = ix;
323	ax = o.value;
324
325	/ compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) /
326	u = ax - bp[k]; / bp[0]=1.0, bp[1]=1.5 /
327	v = one / (ax + bp[k]);
328	s = u * v;
329	s_h = s;
330
331	o.value = s_h;
332	o.parts32.w3 = `0`;
333	o.parts32.w2 &= `0xf8000000`;
334	s_h = o.value;
335	/ t_h=ax+bp[k] High /
336	t_h = ax + bp[k];
337	o.value = t_h;
338	o.parts32.w3 = `0`;
339	o.parts32.w2 &= `0xf8000000`;
340	t_h = o.value;
341	t_l = ax - (t_h - bp[k]);
342	s_l = v * ((u - s_h * t_h) - s_h * t_l);
343	/ compute log(ax) /
344	s2 = s * s;
345	u = LN[`0`] + s2 * (LN[`1`] + s2 * (LN[`2`] + s2 * (LN[`3`] + s2 * LN[`4`])));
346	v = LD[`0`] + s2 * (LD[`1`] + s2 * (LD[`2`] + s2 * (LD[`3`] + s2 * (LD[`4`] + s2))));
347	r = s2 * s2 * u / v;
348	r += s_l * (s_h + s);
349	s2 = s_h * s_h;
350	t_h = `3.0` + s2 + r;
351	o.value = t_h;
352	o.parts32.w3 = `0`;
353	o.parts32.w2 &= `0xf8000000`;
354	t_h = o.value;
355	t_l = r - ((t_h - `3.0`) - s2);
356	/ u+v = s(1+...) /*
357	u = s_h * t_h;
358	v = s_l * t_h + t_l * s;
359	/ 2/(3log2)(s+...) /*
360	p_h = u + v;
361	o.value = p_h;
362	o.parts32.w3 = `0`;
363	o.parts32.w2 &= `0xf8000000`;
364	p_h = o.value;
365	p_l = v - (p_h - u);
366	z_h = cp_h * p_h; / cp_h+cp_l = 2/(3log2) /*
367	z_l = cp_l * p_h + p_l * cp + dp_l[k];
368	/ log2(ax) = (s+..)2/(3log2) = n + dp_h + z_h + z_l /
369	t = (_Float128) n;
370	t1 = (((z_h + z_l) + dp_h[k]) + t);
371	o.value = t1;
372	o.parts32.w3 = `0`;
373	o.parts32.w2 &= `0xf8000000`;
374	t1 = o.value;
375	t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
376
377	/ split up y into y1+y2 and compute (y1+y2)(t1+t2) /*
378	y1 = y;
379	o.value = y1;
380	o.parts32.w3 = `0`;
381	o.parts32.w2 &= `0xf8000000`;
382	y1 = o.value;
383	p_l = (y - y1) * t1 + y * t2;
384	p_h = y1 * t1;
385	z = p_l + p_h;
386	o.value = z;
387	j = o.parts32.w0;
388	if (j >= `0x400d0000`) / z >= 16384 /
389	{
390	/ if z > 16384 /
391	if (((j - `0x400d0000`) \| o.parts32.w1 \| o.parts32.w2 \| o.parts32.w3) != `0`)
392	return sgn * huge * huge; / overflow /
393	else
394	{
395	if (p_l + ovt > z - p_h)
396	return sgn * huge * huge; / overflow /
397	}
398	}
399	else if ((j & `0x7fffffff`) >= `0x400d01b9`) / z <= -16495 /
400	{
401	/ z < -16495 /
402	if (((j - `0xc00d01bc`) \| o.parts32.w1 \| o.parts32.w2 \| o.parts32.w3)
403	!= `0`)
404	return sgn * tiny * tiny; / underflow /
405	else
406	{
407	if (p_l <= z - p_h)
408	return sgn * tiny * tiny; / underflow /
409	}
410	}
411	/ compute 2*(p_h+p_l) /*
412	i = j & `0x7fffffff`;
413	k = (i >> `16`) - `0x3fff`;
414	n = `0`;
415	if (i > `0x3ffe0000`)
416	{ / if \|z\| > 0.5, set n = [z+0.5] /
417	n = floorl (z + L(`0.5`));
418	t = n;
419	p_h -= t;
420	}
421	t = p_l + p_h;
422	o.value = t;
423	o.parts32.w3 = `0`;
424	o.parts32.w2 &= `0xf8000000`;
425	t = o.value;
426	u = t * lg2_h;
427	v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
428	z = u + v;
429	w = v - (z - u);
430	/ exp(z) /
431	t = z * z;
432	u = PN[`0`] + t * (PN[`1`] + t * (PN[`2`] + t * (PN[`3`] + t * PN[`4`])));
433	v = PD[`0`] + t * (PD[`1`] + t * (PD[`2`] + t * (PD[`3`] + t)));
434	t1 = z - t * u / v;
435	r = (z * t1) / (t1 - two) - (w + z * w);
436	z = one - (r - z);
437	o.value = z;
438	j = o.parts32.w0;
439	j += (n << `16`);
440	if ((j >> `16`) <= `0`)
441	{
442	z = __scalbnl (z, n); / subnormal output /
443	_Float128 force_underflow = z * z;
444	math_force_eval (force_underflow);
445	}
446	else
447	{
448	o.parts32.w0 = j;
449	z = o.value;
450	}
451	return sgn * z;
452	}
453	libm_alias_finite (__ieee754_powl, __powl)
454

source code of glibc/sysdeps/ieee754/ldbl-128/e_powl.c