s_fmal.c source code [glibc/sysdeps/ieee754/ldbl-128ibm/s_fmal.c]

1	/ Compute x * y + z as ternary operation.*
2	Copyright (C) 2011-2022 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#define NO_MATH_REDIRECT
20	#include <fenv.h>
21	#include <float.h>
22	#include <math.h>
23	#include <math-barriers.h>
24	#include <math_private.h>
25	#include <fenv_private.h>
26	#include <math-underflow.h>
27	#include <math_ldbl_opt.h>
28	#include <mul_split.h>
29	#include <stdlib.h>
30
31	/ Calculate X + Y exactly and store the result in HI + LO. It is*
32	given that \|X\| >= \|Y\| and the values are small enough that no
33	overflow occurs. /*
34
35	static void
36	add_split (double hi, double* lo, double* x, double y)
37	{
38	/ Apply Dekker's algorithm. /
39	*hi = x + y;
40	lo = (x - hi) + y;
41	}
42
43	/ Value with extended range, used in intermediate computations. /
44	typedef struct
45	{
46	/ Value in [0.5, 1), as from frexp, or 0. /
47	double val;
48	/ Exponent of power of 2 it is multiplied by, or 0 for zero. /
49	int exp;
50	} ext_val;
51
52	/ Store D as an ext_val value. /
53
54	static void
55	store_ext_val (ext_val v, double* d)
56	{
57	v->val = __frexp (x: d, exponent: &v->exp);
58	}
59
60	/ Store X * Y as ext_val values V0 and V1. /
61
62	static void
63	mul_ext_val (ext_val v0, ext_val v1, double x, double y)
64	{
65	int xexp, yexp;
66	x = __frexp (x: x, exponent: &xexp);
67	y = __frexp (x: y, exponent: &yexp);
68	double hi, lo;
69	mul_split (hi: &hi, lo: &lo, x, y);
70	store_ext_val (v: v0, d: hi);
71	if (hi != `0`)
72	v0->exp += xexp + yexp;
73	store_ext_val (v: v1, d: lo);
74	if (lo != `0`)
75	v1->exp += xexp + yexp;
76	}
77
78	/ Compare absolute values of ext_val values pointed to by P and Q for*
79	qsort. /*
80
81	static int
82	compare (const void p, const* void *q)
83	{
84	const ext_val *pe = p;
85	const ext_val *qe = q;
86	if (pe->val == `0`)
87	return qe->val == `0` ? `0` : -`1`;
88	else if (qe->val == `0`)
89	return `1`;
90	else if (pe->exp < qe->exp)
91	return -`1`;
92	else if (pe->exp > qe->exp)
93	return `1`;
94	else
95	{
96	double pd = fabs (x: pe->val);
97	double qd = fabs (x: qe->val);
98	if (pd < qd)
99	return -`1`;
100	else if (pd == qd)
101	return `0`;
102	else
103	return `1`;
104	}
105	}
106
107	/ Calculate X + Y exactly, storing the high part in X (rounded to
108	nearest) and the low part in Y. It is given that \|X\| >= \|Y\|. /
109
110	static void
111	add_split_ext (ext_val x, ext_val y)
112	{
113	int xexp = x->exp, yexp = y->exp;
114	if (y->val == `0` \|\| xexp - yexp > `53`)
115	return;
116	double hi = x->val;
117	double lo = __scalbn (x: y->val, n: yexp - xexp);
118	add_split (hi: &hi, lo: &lo, x: hi, y: lo);
119	store_ext_val (v: x, d: hi);
120	if (hi != `0`)
121	x->exp += xexp;
122	store_ext_val (v: y, d: lo);
123	if (lo != `0`)
124	y->exp += xexp;
125	}
126
127	long double
128	__fmal (long double x, long double y, long double z)
129	{
130	double xhi, xlo, yhi, ylo, zhi, zlo;
131	int64_t hx, hy, hz;
132	int xexp, yexp, zexp;
133	double scale_val;
134	int scale_exp;
135	ldbl_unpack (x, &xhi, &xlo);
136	EXTRACT_WORDS64 (hx, xhi);
137	xexp = (hx & `0x7ff0000000000000LL`) >> `52`;
138	ldbl_unpack (y, &yhi, &ylo);
139	EXTRACT_WORDS64 (hy, yhi);
140	yexp = (hy & `0x7ff0000000000000LL`) >> `52`;
141	ldbl_unpack (z, &zhi, &zlo);
142	EXTRACT_WORDS64 (hz, zhi);
143	zexp = (hz & `0x7ff0000000000000LL`) >> `52`;
144
145	/ If z is Inf or NaN, but x and y are finite, avoid any exceptions*
146	from computing x y. /
147	if (zexp == `0x7ff` && xexp != `0x7ff` && yexp != `0x7ff`)
148	return (z + x) + y;
149
150	/ If z is zero and x are y are nonzero, compute the result as x * y*
151	to avoid the wrong sign of a zero result if x y underflows to*
152	0. /*
153	if (z == `0` && x != `0` && y != `0`)
154	return x * y;
155
156	/ If x or y or z is Inf/NaN, or if x * y is zero, compute as x * y*
157	+ z. /*
158	if (xexp == `0x7ff` \|\| yexp == `0x7ff` \|\| zexp == `0x7ff`
159	\|\| x == `0` \|\| y == `0`)
160	return (x * y) + z;
161
162	{
163	SET_RESTORE_ROUND (FE_TONEAREST);
164
165	ext_val vals[`10`];
166	store_ext_val (v: &vals[`0`], d: zhi);
167	store_ext_val (v: &vals[`1`], d: zlo);
168	mul_ext_val (v0: &vals[`2`], v1: &vals[`3`], x: xhi, y: yhi);
169	mul_ext_val (v0: &vals[`4`], v1: &vals[`5`], x: xhi, y: ylo);
170	mul_ext_val (v0: &vals[`6`], v1: &vals[`7`], x: xlo, y: yhi);
171	mul_ext_val (v0: &vals[`8`], v1: &vals[`9`], x: xlo, y: ylo);
172	qsort (base: vals, nmemb: `10`, size: sizeof (ext_val), compar: compare);
173	/ Add up the values so that each element of VALS has absolute*
174	value at most equal to the last set bit of the next nonzero
175	element. /*
176	for (size_t i = `0`; i <= `8`; i++)
177	{
178	add_split_ext (x: &vals[i + `1`], y: &vals[i]);
179	qsort (base: vals + i + `1`, nmemb: `9` - i, size: sizeof (ext_val), compar: compare);
180	}
181	/ Add up the values in the other direction, so that each element*
182	of VALS has absolute value less than 5ulp of the next
183	value. /*
184	size_t dstpos = `9`;
185	for (size_t i = `1`; i <= `9`; i++)
186	{
187	if (vals[dstpos].val == `0`)
188	{
189	vals[dstpos] = vals[`9` - i];
190	vals[`9` - i].val = `0`;
191	vals[`9` - i].exp = `0`;
192	}
193	else
194	{
195	add_split_ext (x: &vals[dstpos], y: &vals[`9` - i]);
196	if (vals[`9` - i].val != `0`)
197	{
198	if (`9` - i < dstpos - `1`)
199	{
200	vals[dstpos - `1`] = vals[`9` - i];
201	vals[`9` - i].val = `0`;
202	vals[`9` - i].exp = `0`;
203	}
204	dstpos--;
205	}
206	}
207	}
208	/ If the result is an exact zero, it results from adding two*
209	values with opposite signs; recompute in the original rounding
210	mode. /*
211	if (vals[`9`].val == `0`)
212	goto zero_out;
213	/ Adding the top three values will now give a result as accurate*
214	as the underlying long double arithmetic. /*
215	add_split_ext (x: &vals[`9`], y: &vals[`8`]);
216	if (compare (p: &vals[`8`], q: &vals[`7`]) < `0`)
217	{
218	ext_val tmp = vals[`7`];
219	vals[`7`] = vals[`8`];
220	vals[`8`] = tmp;
221	}
222	add_split_ext (x: &vals[`8`], y: &vals[`7`]);
223	add_split_ext (x: &vals[`9`], y: &vals[`8`]);
224	if (vals[`9`].exp > DBL_MAX_EXP \|\| vals[`9`].exp < DBL_MIN_EXP)
225	{
226	/ Overflow or underflow, with the result depending on the*
227	original rounding mode, but not on the low part computed
228	here. /*
229	scale_val = vals[`9`].val;
230	scale_exp = vals[`9`].exp;
231	goto scale_out;
232	}
233	double hi = __scalbn (x: vals[`9`].val, n: vals[`9`].exp);
234	double lo = __scalbn (x: vals[`8`].val, n: vals[`8`].exp);
235	/ It is possible that the low part became subnormal and was*
236	rounded so that the result is no longer canonical. /*
237	ldbl_canonicalize (&hi, &lo);
238	long double ret = ldbl_pack (hi, lo);
239	math_check_force_underflow (ret);
240	return ret;
241	}
242
243	scale_out:
244	scale_val = math_opt_barrier (scale_val);
245	scale_val = __scalbn (x: scale_val, n: scale_exp);
246	if (fabs (x: scale_val) == DBL_MAX)
247	return copysignl (LDBL_MAX, y: scale_val);
248	math_check_force_underflow (scale_val);
249	return scale_val;
250
251	zero_out:;
252	double zero = `0.0`;
253	zero = math_opt_barrier (zero);
254	return zero - zero;
255	}
256	#if IS_IN (libm)
257	long_double_symbol (libm, __fmal, fmal);
258	#else
259	long_double_symbol (libc, __fmal, fmal);
260	#endif
261

source code of glibc/sysdeps/ieee754/ldbl-128ibm/s_fmal.c