s_fma.c source code [glibc/sysdeps/ieee754/dbl-64/s_fma.c]

1	/ Compute x * y + z as ternary operation.*
2	Copyright (C) 2010-2022 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#define NO_MATH_REDIRECT
20	#include <float.h>
21	#define dfmal __hide_dfmal
22	#define f32xfmaf64 __hide_f32xfmaf64
23	#include <math.h>
24	#undef dfmal
25	#undef f32xfmaf64
26	#include <fenv.h>
27	#include <ieee754.h>
28	#include <math-barriers.h>
29	#include <fenv_private.h>
30	#include <libm-alias-double.h>
31	#include <math-narrow-alias.h>
32	#include <tininess.h>
33	#include <math-use-builtins.h>
34
35	/ This implementation uses rounding to odd to avoid problems with*
36	double rounding. See a paper by Boldo and Melquiond:
37	http://www.lri.fr/~melquion/doc/08-tc.pdf /*
38
39	double
40	__fma (double x, double y, double z)
41	{
42	#if USE_FMA_BUILTIN
43	return __builtin_fma (x, y, z);
44	#else
45	/ Use generic implementation. /
46	union ieee754_double u, v, w;
47	int adjust = `0`;
48	u.d = x;
49	v.d = y;
50	w.d = z;
51	if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
52	>= `0x7ff` + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, `0`)
53	\|\| __builtin_expect (u.ieee.exponent >= `0x7ff` - DBL_MANT_DIG, `0`)
54	\|\| __builtin_expect (v.ieee.exponent >= `0x7ff` - DBL_MANT_DIG, `0`)
55	\|\| __builtin_expect (w.ieee.exponent >= `0x7ff` - DBL_MANT_DIG, `0`)
56	\|\| __builtin_expect (u.ieee.exponent + v.ieee.exponent
57	<= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, `0`))
58	{
59	/ If z is Inf, but x and y are finite, the result should be*
60	z rather than NaN. /*
61	if (w.ieee.exponent == `0x7ff`
62	&& u.ieee.exponent != `0x7ff`
63	&& v.ieee.exponent != `0x7ff`)
64	return (z + x) + y;
65	/ If z is zero and x are y are nonzero, compute the result*
66	as x y to avoid the wrong sign of a zero result if x * y*
67	underflows to 0. /*
68	if (z == `0` && x != `0` && y != `0`)
69	return x * y;
70	/ If x or y or z is Inf/NaN, or if x * y is zero, compute as*
71	x y + z. /
72	if (u.ieee.exponent == `0x7ff`
73	\|\| v.ieee.exponent == `0x7ff`
74	\|\| w.ieee.exponent == `0x7ff`
75	\|\| x == `0`
76	\|\| y == `0`)
77	return x * y + z;
78	/ If fma will certainly overflow, compute as x * y. /
79	if (u.ieee.exponent + v.ieee.exponent > `0x7ff` + IEEE754_DOUBLE_BIAS)
80	return x * y;
81	/ If x * y is less than 1/4 of DBL_TRUE_MIN, neither the*
82	result nor whether there is underflow depends on its exact
83	value, only on its sign. /*
84	if (u.ieee.exponent + v.ieee.exponent
85	< IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - `2`)
86	{
87	int neg = u.ieee.negative ^ v.ieee.negative;
88	double tiny = neg ? -`0x1p-1074` : `0x1p-1074`;
89	if (w.ieee.exponent >= `3`)
90	return tiny + z;
91	/ Scaling up, adding TINY and scaling down produces the*
92	correct result, because in round-to-nearest mode adding
93	TINY has no effect and in other modes double rounding is
94	harmless. But it may not produce required underflow
95	exceptions. /*
96	v.d = z * `0x1p54` + tiny;
97	if (TININESS_AFTER_ROUNDING
98	? v.ieee.exponent < `55`
99	: (w.ieee.exponent == `0`
100	\|\| (w.ieee.exponent == `1`
101	&& w.ieee.negative != neg
102	&& w.ieee.mantissa1 == `0`
103	&& w.ieee.mantissa0 == `0`)))
104	{
105	double force_underflow = x * y;
106	math_force_eval (force_underflow);
107	}
108	return v.d * `0x1p-54`;
109	}
110	if (u.ieee.exponent + v.ieee.exponent
111	>= `0x7ff` + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
112	{
113	/ Compute 1p-53 times smaller result and multiply*
114	at the end. /*
115	if (u.ieee.exponent > v.ieee.exponent)
116	u.ieee.exponent -= DBL_MANT_DIG;
117	else
118	v.ieee.exponent -= DBL_MANT_DIG;
119	/ If x + y exponent is very large and z exponent is very small,*
120	it doesn't matter if we don't adjust it. /*
121	if (w.ieee.exponent > DBL_MANT_DIG)
122	w.ieee.exponent -= DBL_MANT_DIG;
123	adjust = `1`;
124	}
125	else if (w.ieee.exponent >= `0x7ff` - DBL_MANT_DIG)
126	{
127	/ Similarly.*
128	If z exponent is very large and x and y exponents are
129	very small, adjust them up to avoid spurious underflows,
130	rather than down. /*
131	if (u.ieee.exponent + v.ieee.exponent
132	<= IEEE754_DOUBLE_BIAS + `2` * DBL_MANT_DIG)
133	{
134	if (u.ieee.exponent > v.ieee.exponent)
135	u.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
136	else
137	v.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
138	}
139	else if (u.ieee.exponent > v.ieee.exponent)
140	{
141	if (u.ieee.exponent > DBL_MANT_DIG)
142	u.ieee.exponent -= DBL_MANT_DIG;
143	}
144	else if (v.ieee.exponent > DBL_MANT_DIG)
145	v.ieee.exponent -= DBL_MANT_DIG;
146	w.ieee.exponent -= DBL_MANT_DIG;
147	adjust = `1`;
148	}
149	else if (u.ieee.exponent >= `0x7ff` - DBL_MANT_DIG)
150	{
151	u.ieee.exponent -= DBL_MANT_DIG;
152	if (v.ieee.exponent)
153	v.ieee.exponent += DBL_MANT_DIG;
154	else
155	v.d *= `0x1p53`;
156	}
157	else if (v.ieee.exponent >= `0x7ff` - DBL_MANT_DIG)
158	{
159	v.ieee.exponent -= DBL_MANT_DIG;
160	if (u.ieee.exponent)
161	u.ieee.exponent += DBL_MANT_DIG;
162	else
163	u.d *= `0x1p53`;
164	}
165	else / if (u.ieee.exponent + v.ieee.exponent*
166	<= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) /*
167	{
168	if (u.ieee.exponent > v.ieee.exponent)
169	u.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
170	else
171	v.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
172	if (w.ieee.exponent <= `4` * DBL_MANT_DIG + `6`)
173	{
174	if (w.ieee.exponent)
175	w.ieee.exponent += `2` * DBL_MANT_DIG + `2`;
176	else
177	w.d *= `0x1p108`;
178	adjust = -`1`;
179	}
180	/ Otherwise x * y should just affect inexact*
181	and nothing else. /*
182	}
183	x = u.d;
184	y = v.d;
185	z = w.d;
186	}
187
188	/ Ensure correct sign of exact 0 + 0. /
189	if (__glibc_unlikely ((x == `0` \|\| y == `0`) && z == `0`))
190	{
191	x = math_opt_barrier (x);
192	return x * y + z;
193	}
194
195	fenv_t env;
196	libc_feholdexcept_setround (e: &env, FE_TONEAREST);
197
198	/ Multiplication m1 + m2 = x * y using Dekker's algorithm. /
199	#define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
200	double x1 = x * C;
201	double y1 = y * C;
202	double m1 = x * y;
203	x1 = (x - x1) + x1;
204	y1 = (y - y1) + y1;
205	double x2 = x - x1;
206	double y2 = y - y1;
207	double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
208
209	/ Addition a1 + a2 = z + m1 using Knuth's algorithm. /
210	double a1 = z + m1;
211	double t1 = a1 - z;
212	double t2 = a1 - t1;
213	t1 = m1 - t1;
214	t2 = z - t2;
215	double a2 = t1 + t2;
216	/ Ensure the arithmetic is not scheduled after feclearexcept call. /
217	math_force_eval (m2);
218	math_force_eval (a2);
219	feclearexcept (FE_INEXACT);
220
221	/ If the result is an exact zero, ensure it has the correct sign. /
222	if (a1 == `0` && m2 == `0`)
223	{
224	libc_feupdateenv (e: &env);
225	/ Ensure that round-to-nearest value of z + m1 is not reused. /
226	z = math_opt_barrier (z);
227	return z + m1;
228	}
229
230	libc_fesetround (FE_TOWARDZERO);
231
232	/ Perform m2 + a2 addition with round to odd. /
233	u.d = a2 + m2;
234
235	if (__glibc_unlikely (adjust < `0`))
236	{
237	if ((u.ieee.mantissa1 & `1`) == `0`)
238	u.ieee.mantissa1 \|= libc_fetestexcept (FE_INEXACT) != `0`;
239	v.d = a1 + u.d;
240	/ Ensure the addition is not scheduled after fetestexcept call. /
241	math_force_eval (v.d);
242	}
243
244	/ Reset rounding mode and test for inexact simultaneously. /
245	int j = libc_feupdateenv_test (e: &env, FE_INEXACT) != `0`;
246
247	if (__glibc_likely (adjust == `0`))
248	{
249	if ((u.ieee.mantissa1 & `1`) == `0` && u.ieee.exponent != `0x7ff`)
250	u.ieee.mantissa1 \|= j;
251	/ Result is a1 + u.d. /
252	return a1 + u.d;
253	}
254	else if (__glibc_likely (adjust > `0`))
255	{
256	if ((u.ieee.mantissa1 & `1`) == `0` && u.ieee.exponent != `0x7ff`)
257	u.ieee.mantissa1 \|= j;
258	/ Result is a1 + u.d, scaled up. /
259	return (a1 + u.d) * `0x1p53`;
260	}
261	else
262	{
263	/ If a1 + u.d is exact, the only rounding happens during*
264	scaling down. /*
265	if (j == `0`)
266	return v.d * `0x1p-108`;
267	/ If result rounded to zero is not subnormal, no double*
268	rounding will occur. /*
269	if (v.ieee.exponent > `108`)
270	return (a1 + u.d) * `0x1p-108`;
271	/ If v.d * 0x1p-108 with round to zero is a subnormal above*
272	or equal to DBL_MIN / 2, then v.d 0x1p-108 shifts mantissa*
273	down just by 1 bit, which means v.ieee.mantissa1 \|= j would
274	change the round bit, not sticky or guard bit.
275	v.d 0x1p-108 never normalizes by shifting up,*
276	so round bit plus sticky bit should be already enough
277	for proper rounding. /*
278	if (v.ieee.exponent == `108`)
279	{
280	/ If the exponent would be in the normal range when*
281	rounding to normal precision with unbounded exponent
282	range, the exact result is known and spurious underflows
283	must be avoided on systems detecting tininess after
284	rounding. /*
285	if (TININESS_AFTER_ROUNDING)
286	{
287	w.d = a1 + u.d;
288	if (w.ieee.exponent == `109`)
289	return w.d * `0x1p-108`;
290	}
291	/ v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,*
292	v.ieee.mantissa1 & 1 is the round bit and j is our sticky
293	bit. /*
294	w.d = `0.0`;
295	w.ieee.mantissa1 = ((v.ieee.mantissa1 & `3`) << `1`) \| j;
296	w.ieee.negative = v.ieee.negative;
297	v.ieee.mantissa1 &= ~`3U`;
298	v.d *= `0x1p-108`;
299	w.d *= `0x1p-2`;
300	return v.d + w.d;
301	}
302	v.ieee.mantissa1 \|= j;
303	return v.d * `0x1p-108`;
304	}
305	#endif /* ! USE_FMA_BUILTIN */
306	}
307	#ifndef __fma
308	libm_alias_double (__fma, fma)
309	libm_alias_double_narrow (__fma, fma)
310	#endif
311

source code of glibc/sysdeps/ieee754/dbl-64/s_fma.c