1/* Compute x * y + z as ternary operation.
2 Copyright (C) 2011-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#define NO_MATH_REDIRECT
20#include <fenv.h>
21#include <float.h>
22#include <math.h>
23#include <math-barriers.h>
24#include <math_private.h>
25#include <fenv_private.h>
26#include <math-underflow.h>
27#include <math_ldbl_opt.h>
28#include <mul_split.h>
29#include <stdlib.h>
30
31/* Calculate X + Y exactly and store the result in *HI + *LO. It is
32 given that |X| >= |Y| and the values are small enough that no
33 overflow occurs. */
34
35static void
36add_split (double *hi, double *lo, double x, double y)
37{
38 /* Apply Dekker's algorithm. */
39 *hi = x + y;
40 *lo = (x - *hi) + y;
41}
42
43/* Value with extended range, used in intermediate computations. */
44typedef struct
45{
46 /* Value in [0.5, 1), as from frexp, or 0. */
47 double val;
48 /* Exponent of power of 2 it is multiplied by, or 0 for zero. */
49 int exp;
50} ext_val;
51
52/* Store D as an ext_val value. */
53
54static void
55store_ext_val (ext_val *v, double d)
56{
57 v->val = __frexp (x: d, exponent: &v->exp);
58}
59
60/* Store X * Y as ext_val values *V0 and *V1. */
61
62static void
63mul_ext_val (ext_val *v0, ext_val *v1, double x, double y)
64{
65 int xexp, yexp;
66 x = __frexp (x: x, exponent: &xexp);
67 y = __frexp (x: y, exponent: &yexp);
68 double hi, lo;
69 mul_split (hi: &hi, lo: &lo, x, y);
70 store_ext_val (v: v0, d: hi);
71 if (hi != 0)
72 v0->exp += xexp + yexp;
73 store_ext_val (v: v1, d: lo);
74 if (lo != 0)
75 v1->exp += xexp + yexp;
76}
77
78/* Compare absolute values of ext_val values pointed to by P and Q for
79 qsort. */
80
81static int
82compare (const void *p, const void *q)
83{
84 const ext_val *pe = p;
85 const ext_val *qe = q;
86 if (pe->val == 0)
87 return qe->val == 0 ? 0 : -1;
88 else if (qe->val == 0)
89 return 1;
90 else if (pe->exp < qe->exp)
91 return -1;
92 else if (pe->exp > qe->exp)
93 return 1;
94 else
95 {
96 double pd = fabs (x: pe->val);
97 double qd = fabs (x: qe->val);
98 if (pd < qd)
99 return -1;
100 else if (pd == qd)
101 return 0;
102 else
103 return 1;
104 }
105}
106
107/* Calculate *X + *Y exactly, storing the high part in *X (rounded to
108 nearest) and the low part in *Y. It is given that |X| >= |Y|. */
109
110static void
111add_split_ext (ext_val *x, ext_val *y)
112{
113 int xexp = x->exp, yexp = y->exp;
114 if (y->val == 0 || xexp - yexp > 53)
115 return;
116 double hi = x->val;
117 double lo = __scalbn (x: y->val, n: yexp - xexp);
118 add_split (hi: &hi, lo: &lo, x: hi, y: lo);
119 store_ext_val (v: x, d: hi);
120 if (hi != 0)
121 x->exp += xexp;
122 store_ext_val (v: y, d: lo);
123 if (lo != 0)
124 y->exp += xexp;
125}
126
127long double
128__fmal (long double x, long double y, long double z)
129{
130 double xhi, xlo, yhi, ylo, zhi, zlo;
131 int64_t hx, hy, hz;
132 int xexp, yexp, zexp;
133 double scale_val;
134 int scale_exp;
135 ldbl_unpack (x, &xhi, &xlo);
136 EXTRACT_WORDS64 (hx, xhi);
137 xexp = (hx & 0x7ff0000000000000LL) >> 52;
138 ldbl_unpack (y, &yhi, &ylo);
139 EXTRACT_WORDS64 (hy, yhi);
140 yexp = (hy & 0x7ff0000000000000LL) >> 52;
141 ldbl_unpack (z, &zhi, &zlo);
142 EXTRACT_WORDS64 (hz, zhi);
143 zexp = (hz & 0x7ff0000000000000LL) >> 52;
144
145 /* If z is Inf or NaN, but x and y are finite, avoid any exceptions
146 from computing x * y. */
147 if (zexp == 0x7ff && xexp != 0x7ff && yexp != 0x7ff)
148 return (z + x) + y;
149
150 /* If z is zero and x are y are nonzero, compute the result as x * y
151 to avoid the wrong sign of a zero result if x * y underflows to
152 0. */
153 if (z == 0 && x != 0 && y != 0)
154 return x * y;
155
156 /* If x or y or z is Inf/NaN, or if x * y is zero, compute as x * y
157 + z. */
158 if (xexp == 0x7ff || yexp == 0x7ff || zexp == 0x7ff
159 || x == 0 || y == 0)
160 return (x * y) + z;
161
162 {
163 SET_RESTORE_ROUND (FE_TONEAREST);
164
165 ext_val vals[10];
166 store_ext_val (v: &vals[0], d: zhi);
167 store_ext_val (v: &vals[1], d: zlo);
168 mul_ext_val (v0: &vals[2], v1: &vals[3], x: xhi, y: yhi);
169 mul_ext_val (v0: &vals[4], v1: &vals[5], x: xhi, y: ylo);
170 mul_ext_val (v0: &vals[6], v1: &vals[7], x: xlo, y: yhi);
171 mul_ext_val (v0: &vals[8], v1: &vals[9], x: xlo, y: ylo);
172 qsort (base: vals, nmemb: 10, size: sizeof (ext_val), compar: compare);
173 /* Add up the values so that each element of VALS has absolute
174 value at most equal to the last set bit of the next nonzero
175 element. */
176 for (size_t i = 0; i <= 8; i++)
177 {
178 add_split_ext (x: &vals[i + 1], y: &vals[i]);
179 qsort (base: vals + i + 1, nmemb: 9 - i, size: sizeof (ext_val), compar: compare);
180 }
181 /* Add up the values in the other direction, so that each element
182 of VALS has absolute value less than 5ulp of the next
183 value. */
184 size_t dstpos = 9;
185 for (size_t i = 1; i <= 9; i++)
186 {
187 if (vals[dstpos].val == 0)
188 {
189 vals[dstpos] = vals[9 - i];
190 vals[9 - i].val = 0;
191 vals[9 - i].exp = 0;
192 }
193 else
194 {
195 add_split_ext (x: &vals[dstpos], y: &vals[9 - i]);
196 if (vals[9 - i].val != 0)
197 {
198 if (9 - i < dstpos - 1)
199 {
200 vals[dstpos - 1] = vals[9 - i];
201 vals[9 - i].val = 0;
202 vals[9 - i].exp = 0;
203 }
204 dstpos--;
205 }
206 }
207 }
208 /* If the result is an exact zero, it results from adding two
209 values with opposite signs; recompute in the original rounding
210 mode. */
211 if (vals[9].val == 0)
212 goto zero_out;
213 /* Adding the top three values will now give a result as accurate
214 as the underlying long double arithmetic. */
215 add_split_ext (x: &vals[9], y: &vals[8]);
216 if (compare (p: &vals[8], q: &vals[7]) < 0)
217 {
218 ext_val tmp = vals[7];
219 vals[7] = vals[8];
220 vals[8] = tmp;
221 }
222 add_split_ext (x: &vals[8], y: &vals[7]);
223 add_split_ext (x: &vals[9], y: &vals[8]);
224 if (vals[9].exp > DBL_MAX_EXP || vals[9].exp < DBL_MIN_EXP)
225 {
226 /* Overflow or underflow, with the result depending on the
227 original rounding mode, but not on the low part computed
228 here. */
229 scale_val = vals[9].val;
230 scale_exp = vals[9].exp;
231 goto scale_out;
232 }
233 double hi = __scalbn (x: vals[9].val, n: vals[9].exp);
234 double lo = __scalbn (x: vals[8].val, n: vals[8].exp);
235 /* It is possible that the low part became subnormal and was
236 rounded so that the result is no longer canonical. */
237 ldbl_canonicalize (&hi, &lo);
238 long double ret = ldbl_pack (hi, lo);
239 math_check_force_underflow (ret);
240 return ret;
241 }
242
243 scale_out:
244 scale_val = math_opt_barrier (scale_val);
245 scale_val = __scalbn (x: scale_val, n: scale_exp);
246 if (fabs (x: scale_val) == DBL_MAX)
247 return copysignl (LDBL_MAX, y: scale_val);
248 math_check_force_underflow (scale_val);
249 return scale_val;
250
251 zero_out:;
252 double zero = 0.0;
253 zero = math_opt_barrier (zero);
254 return zero - zero;
255}
256#if IS_IN (libm)
257long_double_symbol (libm, __fmal, fmal);
258#else
259long_double_symbol (libc, __fmal, fmal);
260#endif
261

source code of glibc/sysdeps/ieee754/ldbl-128ibm/s_fmal.c