1 | /* Internal libc stuff for floating point environment routines. |
2 | Copyright (C) 1997-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #ifndef _FENV_LIBC_H |
20 | #define _FENV_LIBC_H 1 |
21 | |
22 | #include <fenv.h> |
23 | #include <ldsodefs.h> |
24 | #include <sysdep.h> |
25 | |
26 | extern const fenv_t *__fe_nomask_env_priv (void); |
27 | |
28 | extern const fenv_t *__fe_mask_env (void) attribute_hidden; |
29 | |
30 | /* If the old env had any enabled exceptions and the new env has no enabled |
31 | exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the |
32 | FPU to run faster because it always takes the default action and can not |
33 | generate SIGFPE. */ |
34 | #define __TEST_AND_ENTER_NON_STOP(old, new) \ |
35 | do { \ |
36 | if (((old) & FPSCR_ENABLES_MASK) != 0 && ((new) & FPSCR_ENABLES_MASK) == 0) \ |
37 | (void) __fe_mask_env (); \ |
38 | } while (0) |
39 | |
40 | /* If the old env has no enabled exceptions and the new env has any enabled |
41 | exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the |
42 | hardware into "precise mode" and may cause the FPU to run slower on some |
43 | hardware. */ |
44 | #define __TEST_AND_EXIT_NON_STOP(old, new) \ |
45 | do { \ |
46 | if (((old) & FPSCR_ENABLES_MASK) == 0 && ((new) & FPSCR_ENABLES_MASK) != 0) \ |
47 | (void) __fe_nomask_env_priv (); \ |
48 | } while (0) |
49 | |
50 | /* The sticky bits in the FPSCR indicating exceptions have occurred. */ |
51 | #define FPSCR_STICKY_BITS ((FE_ALL_EXCEPT | FE_ALL_INVALID) & ~FE_INVALID) |
52 | |
53 | /* Equivalent to fegetenv, but returns a fenv_t instead of taking a |
54 | pointer. */ |
55 | #define fegetenv_register() __builtin_mffs() |
56 | |
57 | /* Equivalent to fegetenv_register, but only returns bits for |
58 | status, exception enables, and mode. |
59 | Nicely, it turns out that the 'mffsl' instruction will decode to |
60 | 'mffs' on architectures older than "power9" because the additional |
61 | bits set for 'mffsl' are "don't care" for 'mffs'. 'mffs' is a superset |
62 | of 'mffsl'. */ |
63 | #define fegetenv_control() \ |
64 | ({register double __fr; \ |
65 | __asm__ __volatile__ ( \ |
66 | ".machine push; .machine \"power9\"; mffsl %0; .machine pop" \ |
67 | : "=f" (__fr)); \ |
68 | __fr; \ |
69 | }) |
70 | |
71 | /* Starting with GCC 14 __builtin_set_fpscr_rn can be used to return the |
72 | FPSCR fields as a double. This support is available |
73 | on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined. |
74 | To retain backward compatibility with older GCC, we still retain the |
75 | old inline assembly implementation.*/ |
76 | #ifdef __SET_FPSCR_RN_RETURNS_FPSCR__ |
77 | #define __fe_mffscrn(rn) __builtin_set_fpscr_rn (rn) |
78 | #else |
79 | #define __fe_mffscrn(rn) \ |
80 | ({register fenv_union_t __fr; \ |
81 | if (__builtin_constant_p (rn)) \ |
82 | __asm__ __volatile__ ( \ |
83 | ".machine push; .machine \"power9\"; mffscrni %0,%1; .machine pop" \ |
84 | : "=f" (__fr.fenv) : "n" (rn)); \ |
85 | else \ |
86 | { \ |
87 | __fr.l = (rn); \ |
88 | __asm__ __volatile__ ( \ |
89 | ".machine push; .machine \"power9\"; mffscrn %0,%1; .machine pop" \ |
90 | : "=f" (__fr.fenv) : "f" (__fr.fenv)); \ |
91 | } \ |
92 | __fr.fenv; \ |
93 | }) |
94 | #endif |
95 | |
96 | /* Like fegetenv_control, but also sets the rounding mode. */ |
97 | #ifdef _ARCH_PWR9 |
98 | #define fegetenv_and_set_rn(rn) __fe_mffscrn (rn) |
99 | #else |
100 | /* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary |
101 | but not sufficient, because it does not set the rounding mode. |
102 | Explicitly set the rounding mode when 'mffscrn' actually doesn't. */ |
103 | #define fegetenv_and_set_rn(rn) \ |
104 | ({register fenv_union_t __fr; \ |
105 | __fr.fenv = __fe_mffscrn (rn); \ |
106 | if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))) \ |
107 | __fesetround_inline (rn); \ |
108 | __fr.fenv; \ |
109 | }) |
110 | #endif |
111 | |
112 | /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer. */ |
113 | #define fesetenv_register(env) \ |
114 | do { \ |
115 | double d = (env); \ |
116 | if(GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \ |
117 | asm volatile (".machine push; " \ |
118 | ".machine \"power6\"; " \ |
119 | "mtfsf 0xff,%0,1,0; " \ |
120 | ".machine pop" : : "f" (d)); \ |
121 | else \ |
122 | __builtin_mtfsf (0xff, d); \ |
123 | } while(0) |
124 | |
125 | /* Set the last 2 nibbles of the FPSCR, which contain the |
126 | exception enables and the rounding mode. |
127 | 'fegetenv_control' retrieves these bits by reading the FPSCR. */ |
128 | #define fesetenv_control(env) __builtin_mtfsf (0b00000011, (env)); |
129 | |
130 | /* This very handy macro: |
131 | - Sets the rounding mode to 'round to nearest'; |
132 | - Sets the processor into IEEE mode; and |
133 | - Prevents exceptions from being raised for inexact results. |
134 | These things happen to be exactly what you need for typical elementary |
135 | functions. */ |
136 | #define relax_fenv_state() \ |
137 | do { \ |
138 | if (GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \ |
139 | asm volatile (".machine push; .machine \"power6\"; " \ |
140 | "mtfsfi 7,0,1; .machine pop"); \ |
141 | asm volatile ("mtfsfi 7,0"); \ |
142 | } while(0) |
143 | |
144 | /* Set/clear a particular FPSCR bit (for instance, |
145 | reset_fpscr_bit(FPSCR_VE); |
146 | prevents INVALID exceptions from being raised). */ |
147 | #define set_fpscr_bit(x) asm volatile ("mtfsb1 %0" : : "n"(x)) |
148 | #define reset_fpscr_bit(x) asm volatile ("mtfsb0 %0" : : "n"(x)) |
149 | |
150 | typedef union |
151 | { |
152 | fenv_t fenv; |
153 | unsigned long long l; |
154 | } fenv_union_t; |
155 | |
156 | |
157 | static inline int |
158 | __fesetround_inline (int round) |
159 | { |
160 | #ifdef _ARCH_PWR9 |
161 | __fe_mffscrn (round); |
162 | #else |
163 | if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00)) |
164 | __fe_mffscrn (round); |
165 | else if ((unsigned int) round < 2) |
166 | { |
167 | asm volatile ("mtfsb0 30" ); |
168 | if ((unsigned int) round == 0) |
169 | asm volatile ("mtfsb0 31" ); |
170 | else |
171 | asm volatile ("mtfsb1 31" ); |
172 | } |
173 | else |
174 | { |
175 | asm volatile ("mtfsb1 30" ); |
176 | if ((unsigned int) round == 2) |
177 | asm volatile ("mtfsb0 31" ); |
178 | else |
179 | asm volatile ("mtfsb1 31" ); |
180 | } |
181 | #endif |
182 | return 0; |
183 | } |
184 | |
185 | /* Same as __fesetround_inline, however without runtime check to use DFP |
186 | mtfsfi syntax (as relax_fenv_state) or if round value is valid. */ |
187 | static inline void |
188 | __fesetround_inline_nocheck (const int round) |
189 | { |
190 | #ifdef _ARCH_PWR9 |
191 | __fe_mffscrn (round); |
192 | #else |
193 | if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00)) |
194 | __fe_mffscrn (round); |
195 | else |
196 | asm volatile ("mtfsfi 7,%0" : : "n" (round)); |
197 | #endif |
198 | } |
199 | |
200 | #define FPSCR_MASK(bit) (1 << (31 - (bit))) |
201 | |
202 | /* Definitions of all the FPSCR bit numbers */ |
203 | enum { |
204 | FPSCR_FX = 0, /* exception summary */ |
205 | #define FPSCR_FX_MASK (FPSCR_MASK (FPSCR_FX)) |
206 | FPSCR_FEX, /* enabled exception summary */ |
207 | #define FPSCR_FEX_MASK (FPSCR_MASK FPSCR_FEX)) |
208 | FPSCR_VX, /* invalid operation summary */ |
209 | #define FPSCR_VX_MASK (FPSCR_MASK (FPSCR_VX)) |
210 | FPSCR_OX, /* overflow */ |
211 | #define FPSCR_OX_MASK (FPSCR_MASK (FPSCR_OX)) |
212 | FPSCR_UX, /* underflow */ |
213 | #define FPSCR_UX_MASK (FPSCR_MASK (FPSCR_UX)) |
214 | FPSCR_ZX, /* zero divide */ |
215 | #define FPSCR_ZX_MASK (FPSCR_MASK (FPSCR_ZX)) |
216 | FPSCR_XX, /* inexact */ |
217 | #define FPSCR_XX_MASK (FPSCR_MASK (FPSCR_XX)) |
218 | FPSCR_VXSNAN, /* invalid operation for sNaN */ |
219 | #define FPSCR_VXSNAN_MASK (FPSCR_MASK (FPSCR_VXSNAN)) |
220 | FPSCR_VXISI, /* invalid operation for Inf-Inf */ |
221 | #define FPSCR_VXISI_MASK (FPSCR_MASK (FPSCR_VXISI)) |
222 | FPSCR_VXIDI, /* invalid operation for Inf/Inf */ |
223 | #define FPSCR_VXIDI_MASK (FPSCR_MASK (FPSCR_VXIDI)) |
224 | FPSCR_VXZDZ, /* invalid operation for 0/0 */ |
225 | #define FPSCR_VXZDZ_MASK (FPSCR_MASK (FPSCR_VXZDZ)) |
226 | FPSCR_VXIMZ, /* invalid operation for Inf*0 */ |
227 | #define FPSCR_VXIMZ_MASK (FPSCR_MASK (FPSCR_VXIMZ)) |
228 | FPSCR_VXVC, /* invalid operation for invalid compare */ |
229 | #define FPSCR_VXVC_MASK (FPSCR_MASK (FPSCR_VXVC)) |
230 | FPSCR_FR, /* fraction rounded [fraction was incremented by round] */ |
231 | #define FPSCR_FR_MASK (FPSCR_MASK (FPSCR_FR)) |
232 | FPSCR_FI, /* fraction inexact */ |
233 | #define FPSCR_FI_MASK (FPSCR_MASK (FPSCR_FI)) |
234 | FPSCR_FPRF_C, /* result class descriptor */ |
235 | #define FPSCR_FPRF_C_MASK (FPSCR_MASK (FPSCR_FPRF_C)) |
236 | FPSCR_FPRF_FL, /* result less than (usually, less than 0) */ |
237 | #define FPSCR_FPRF_FL_MASK (FPSCR_MASK (FPSCR_FPRF_FL)) |
238 | FPSCR_FPRF_FG, /* result greater than */ |
239 | #define FPSCR_FPRF_FG_MASK (FPSCR_MASK (FPSCR_FPRF_FG)) |
240 | FPSCR_FPRF_FE, /* result equal to */ |
241 | #define FPSCR_FPRF_FE_MASK (FPSCR_MASK (FPSCR_FPRF_FE)) |
242 | FPSCR_FPRF_FU, /* result unordered */ |
243 | #define FPSCR_FPRF_FU_MASK (FPSCR_MASK (FPSCR_FPRF_FU)) |
244 | FPSCR_20, /* reserved */ |
245 | FPSCR_VXSOFT, /* invalid operation set by software */ |
246 | #define FPSCR_VXSOFT_MASK (FPSCR_MASK (FPSCR_VXSOFT)) |
247 | FPSCR_VXSQRT, /* invalid operation for square root */ |
248 | #define FPSCR_VXSQRT_MASK (FPSCR_MASK (FPSCR_VXSQRT)) |
249 | FPSCR_VXCVI, /* invalid operation for invalid integer convert */ |
250 | #define FPSCR_VXCVI_MASK (FPSCR_MASK (FPSCR_VXCVI)) |
251 | FPSCR_VE, /* invalid operation exception enable */ |
252 | #define FPSCR_VE_MASK (FPSCR_MASK (FPSCR_VE)) |
253 | FPSCR_OE, /* overflow exception enable */ |
254 | #define FPSCR_OE_MASK (FPSCR_MASK (FPSCR_OE)) |
255 | FPSCR_UE, /* underflow exception enable */ |
256 | #define FPSCR_UE_MASK (FPSCR_MASK (FPSCR_UE)) |
257 | FPSCR_ZE, /* zero divide exception enable */ |
258 | #define FPSCR_ZE_MASK (FPSCR_MASK (FPSCR_ZE)) |
259 | FPSCR_XE, /* inexact exception enable */ |
260 | #define FPSCR_XE_MASK (FPSCR_MASK (FPSCR_XE)) |
261 | #ifdef _ARCH_PWR6 |
262 | FPSCR_29, /* Reserved in ISA 2.05 */ |
263 | #define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_29)) |
264 | #else |
265 | FPSCR_NI, /* non-IEEE mode (typically, no denormalised numbers) */ |
266 | #define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_NI)) |
267 | #endif /* _ARCH_PWR6 */ |
268 | /* the remaining two least-significant bits keep the rounding mode */ |
269 | FPSCR_RN_hi, |
270 | #define FPSCR_RN_hi_MASK (FPSCR_MASK (FPSCR_RN_hi)) |
271 | FPSCR_RN_lo |
272 | #define FPSCR_RN_lo_MASK (FPSCR_MASK (FPSCR_RN_lo)) |
273 | }; |
274 | |
275 | #define FPSCR_RN_MASK (FPSCR_RN_hi_MASK|FPSCR_RN_lo_MASK) |
276 | #define FPSCR_ENABLES_MASK \ |
277 | (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK) |
278 | #define FPSCR_BASIC_EXCEPTIONS_MASK \ |
279 | (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK) |
280 | #define FPSCR_EXCEPTIONS_MASK (FPSCR_BASIC_EXCEPTIONS_MASK| \ |
281 | FPSCR_VXSNAN_MASK|FPSCR_VXISI_MASK|FPSCR_VXIDI_MASK|FPSCR_VXZDZ_MASK| \ |
282 | FPSCR_VXIMZ_MASK|FPSCR_VXVC_MASK|FPSCR_VXSOFT_MASK|FPSCR_VXSQRT_MASK| \ |
283 | FPSCR_VXCVI_MASK) |
284 | #define FPSCR_FPRF_MASK \ |
285 | (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \ |
286 | FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK) |
287 | #define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK) |
288 | #define FPSCR_STATUS_MASK (FPSCR_FR_MASK|FPSCR_FI_MASK|FPSCR_FPRF_MASK) |
289 | |
290 | /* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits |
291 | in the FPSCR, albeit shifted to different but corresponding locations. |
292 | Similarly, the exception indicator bits in the FPSCR correspond one-to-one |
293 | with the exception enable bits. It is thus possible to map the FENV(1) |
294 | exceptions directly to the FPSCR enables with a simple mask and shift, |
295 | and vice versa. */ |
296 | #define FPSCR_EXCEPT_TO_ENABLE_SHIFT 22 |
297 | |
298 | static inline int |
299 | fenv_reg_to_exceptions (unsigned long long l) |
300 | { |
301 | return (((int)l) & FPSCR_ENABLES_MASK) << FPSCR_EXCEPT_TO_ENABLE_SHIFT; |
302 | } |
303 | |
304 | static inline unsigned long long |
305 | fenv_exceptions_to_reg (int excepts) |
306 | { |
307 | return (unsigned long long) |
308 | (excepts & FE_ALL_EXCEPT) >> FPSCR_EXCEPT_TO_ENABLE_SHIFT; |
309 | } |
310 | |
311 | #ifdef _ARCH_PWR6 |
312 | /* Not supported in ISA 2.05. Provided for source compat only. */ |
313 | # define FPSCR_NI 29 |
314 | #endif /* _ARCH_PWR6 */ |
315 | |
316 | /* This operation (i) sets the appropriate FPSCR bits for its |
317 | parameter, (ii) converts sNaN to the corresponding qNaN, and (iii) |
318 | otherwise passes its parameter through unchanged (in particular, -0 |
319 | and +0 stay as they were). The `obvious' way to do this is optimised |
320 | out by gcc. */ |
321 | #define f_wash(x) \ |
322 | ({ double d; asm volatile ("fmul %0,%1,%2" \ |
323 | : "=f"(d) \ |
324 | : "f" (x), "f"((float)1.0)); d; }) |
325 | #define f_washf(x) \ |
326 | ({ float f; asm volatile ("fmuls %0,%1,%2" \ |
327 | : "=f"(f) \ |
328 | : "f" (x), "f"((float)1.0)); f; }) |
329 | |
330 | #endif /* fenv_libc.h */ |
331 | |