fenv_libc.h source code [glibc/sysdeps/powerpc/fpu/fenv_libc.h]

1	/ Internal libc stuff for floating point environment routines.*
2	Copyright (C) 1997-2024 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#ifndef _FENV_LIBC_H
20	#define _FENV_LIBC_H 1
21
22	#include <fenv.h>
23	#include <ldsodefs.h>
24	#include <sysdep.h>
25
26	extern const fenv_t __fe_nomask_env_priv (void*);
27
28	extern const fenv_t __fe_mask_env (void*) attribute_hidden;
29
30	/ If the old env had any enabled exceptions and the new env has no enabled*
31	exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the
32	FPU to run faster because it always takes the default action and can not
33	generate SIGFPE. /*
34	#define __TEST_AND_ENTER_NON_STOP(old, new) \
35	do { \
36	if (((old) & FPSCR_ENABLES_MASK) != 0 && ((new) & FPSCR_ENABLES_MASK) == 0) \
37	(void) __fe_mask_env (); \
38	} while (0)
39
40	/ If the old env has no enabled exceptions and the new env has any enabled*
41	exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the
42	hardware into "precise mode" and may cause the FPU to run slower on some
43	hardware. /*
44	#define __TEST_AND_EXIT_NON_STOP(old, new) \
45	do { \
46	if (((old) & FPSCR_ENABLES_MASK) == 0 && ((new) & FPSCR_ENABLES_MASK) != 0) \
47	(void) __fe_nomask_env_priv (); \
48	} while (0)
49
50	/ The sticky bits in the FPSCR indicating exceptions have occurred. /
51	#define FPSCR_STICKY_BITS ((FE_ALL_EXCEPT \| FE_ALL_INVALID) & ~FE_INVALID)
52
53	/ Equivalent to fegetenv, but returns a fenv_t instead of taking a*
54	pointer. /*
55	#define fegetenv_register() __builtin_mffs()
56
57	/ Equivalent to fegetenv_register, but only returns bits for*
58	status, exception enables, and mode.
59	Nicely, it turns out that the 'mffsl' instruction will decode to
60	'mffs' on architectures older than "power9" because the additional
61	bits set for 'mffsl' are "don't care" for 'mffs'. 'mffs' is a superset
62	of 'mffsl'. /*
63	#define fegetenv_control() \
64	({register double __fr; \
65	__asm__ __volatile__ ( \
66	".machine push; .machine \"power9\"; mffsl %0; .machine pop" \
67	: "=f" (__fr)); \
68	__fr; \
69	})
70
71	/ Starting with GCC 14 __builtin_set_fpscr_rn can be used to return the*
72	FPSCR fields as a double. This support is available
73	on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined.
74	To retain backward compatibility with older GCC, we still retain the
75	old inline assembly implementation./*
76	#ifdef __SET_FPSCR_RN_RETURNS_FPSCR__
77	#define __fe_mffscrn(rn) __builtin_set_fpscr_rn (rn)
78	#else
79	#define __fe_mffscrn(rn) \
80	({register fenv_union_t __fr; \
81	if (__builtin_constant_p (rn)) \
82	__asm__ __volatile__ ( \
83	".machine push; .machine \"power9\"; mffscrni %0,%1; .machine pop" \
84	: "=f" (__fr.fenv) : "n" (rn)); \
85	else \
86	{ \
87	__fr.l = (rn); \
88	__asm__ __volatile__ ( \
89	".machine push; .machine \"power9\"; mffscrn %0,%1; .machine pop" \
90	: "=f" (__fr.fenv) : "f" (__fr.fenv)); \
91	} \
92	__fr.fenv; \
93	})
94	#endif
95
96	/ Like fegetenv_control, but also sets the rounding mode. /
97	#ifdef _ARCH_PWR9
98	#define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
99	#else
100	/ 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary*
101	but not sufficient, because it does not set the rounding mode.
102	Explicitly set the rounding mode when 'mffscrn' actually doesn't. /*
103	#define fegetenv_and_set_rn(rn) \
104	({register fenv_union_t __fr; \
105	__fr.fenv = __fe_mffscrn (rn); \
106	if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))) \
107	__fesetround_inline (rn); \
108	__fr.fenv; \
109	})
110	#endif
111
112	/ Equivalent to fesetenv, but takes a fenv_t instead of a pointer. /
113	#define fesetenv_register(env) \
114	do { \
115	double d = (env); \
116	if(GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \
117	asm volatile (".machine push; " \
118	".machine \"power6\"; " \
119	"mtfsf 0xff,%0,1,0; " \
120	".machine pop" : : "f" (d)); \
121	else \
122	__builtin_mtfsf (0xff, d); \
123	} while(0)
124
125	/ Set the last 2 nibbles of the FPSCR, which contain the*
126	exception enables and the rounding mode.
127	'fegetenv_control' retrieves these bits by reading the FPSCR. /*
128	#define fesetenv_control(env) __builtin_mtfsf (0b00000011, (env));
129
130	/ This very handy macro:*
131	- Sets the rounding mode to 'round to nearest';
132	- Sets the processor into IEEE mode; and
133	- Prevents exceptions from being raised for inexact results.
134	These things happen to be exactly what you need for typical elementary
135	functions. /*
136	#define relax_fenv_state() \
137	do { \
138	if (GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \
139	asm volatile (".machine push; .machine \"power6\"; " \
140	"mtfsfi 7,0,1; .machine pop"); \
141	asm volatile ("mtfsfi 7,0"); \
142	} while(0)
143
144	/ Set/clear a particular FPSCR bit (for instance,*
145	reset_fpscr_bit(FPSCR_VE);
146	prevents INVALID exceptions from being raised). /*
147	#define set_fpscr_bit(x) asm volatile ("mtfsb1 %0" : : "n"(x))
148	#define reset_fpscr_bit(x) asm volatile ("mtfsb0 %0" : : "n"(x))
149
150	typedef union
151	{
152	fenv_t fenv;
153	unsigned long long l;
154	} fenv_union_t;
155
156
157	static inline int
158	__fesetround_inline (int round)
159	{
160	#ifdef _ARCH_PWR9
161	__fe_mffscrn (round);
162	#else
163	if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
164	__fe_mffscrn (round);
165	else if ((unsigned int) round < `2`)
166	{
167	asm volatile ("mtfsb0 30");
168	if ((unsigned int) round == `0`)
169	asm volatile ("mtfsb0 31");
170	else
171	asm volatile ("mtfsb1 31");
172	}
173	else
174	{
175	asm volatile ("mtfsb1 30");
176	if ((unsigned int) round == `2`)
177	asm volatile ("mtfsb0 31");
178	else
179	asm volatile ("mtfsb1 31");
180	}
181	#endif
182	return `0`;
183	}
184
185	/ Same as __fesetround_inline, however without runtime check to use DFP*
186	mtfsfi syntax (as relax_fenv_state) or if round value is valid. /*
187	static inline void
188	__fesetround_inline_nocheck (const int round)
189	{
190	#ifdef _ARCH_PWR9
191	__fe_mffscrn (round);
192	#else
193	if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
194	__fe_mffscrn (round);
195	else
196	asm volatile ("mtfsfi 7,%0" : : "n" (round));
197	#endif
198	}
199
200	#define FPSCR_MASK(bit) (1 << (31 - (bit)))
201
202	/ Definitions of all the FPSCR bit numbers /
203	enum {
204	FPSCR_FX = `0`, / exception summary /
205	#define FPSCR_FX_MASK (FPSCR_MASK (FPSCR_FX))
206	FPSCR_FEX, / enabled exception summary /
207	#define FPSCR_FEX_MASK (FPSCR_MASK FPSCR_FEX))
208	FPSCR_VX, / invalid operation summary /
209	#define FPSCR_VX_MASK (FPSCR_MASK (FPSCR_VX))
210	FPSCR_OX, / overflow /
211	#define FPSCR_OX_MASK (FPSCR_MASK (FPSCR_OX))
212	FPSCR_UX, / underflow /
213	#define FPSCR_UX_MASK (FPSCR_MASK (FPSCR_UX))
214	FPSCR_ZX, / zero divide /
215	#define FPSCR_ZX_MASK (FPSCR_MASK (FPSCR_ZX))
216	FPSCR_XX, / inexact /
217	#define FPSCR_XX_MASK (FPSCR_MASK (FPSCR_XX))
218	FPSCR_VXSNAN, / invalid operation for sNaN /
219	#define FPSCR_VXSNAN_MASK (FPSCR_MASK (FPSCR_VXSNAN))
220	FPSCR_VXISI, / invalid operation for Inf-Inf /
221	#define FPSCR_VXISI_MASK (FPSCR_MASK (FPSCR_VXISI))
222	FPSCR_VXIDI, / invalid operation for Inf/Inf /
223	#define FPSCR_VXIDI_MASK (FPSCR_MASK (FPSCR_VXIDI))
224	FPSCR_VXZDZ, / invalid operation for 0/0 /
225	#define FPSCR_VXZDZ_MASK (FPSCR_MASK (FPSCR_VXZDZ))
226	FPSCR_VXIMZ, / invalid operation for Inf0 /*
227	#define FPSCR_VXIMZ_MASK (FPSCR_MASK (FPSCR_VXIMZ))
228	FPSCR_VXVC, / invalid operation for invalid compare /
229	#define FPSCR_VXVC_MASK (FPSCR_MASK (FPSCR_VXVC))
230	FPSCR_FR, / fraction rounded [fraction was incremented by round] /
231	#define FPSCR_FR_MASK (FPSCR_MASK (FPSCR_FR))
232	FPSCR_FI, / fraction inexact /
233	#define FPSCR_FI_MASK (FPSCR_MASK (FPSCR_FI))
234	FPSCR_FPRF_C, / result class descriptor /
235	#define FPSCR_FPRF_C_MASK (FPSCR_MASK (FPSCR_FPRF_C))
236	FPSCR_FPRF_FL, / result less than (usually, less than 0) /
237	#define FPSCR_FPRF_FL_MASK (FPSCR_MASK (FPSCR_FPRF_FL))
238	FPSCR_FPRF_FG, / result greater than /
239	#define FPSCR_FPRF_FG_MASK (FPSCR_MASK (FPSCR_FPRF_FG))
240	FPSCR_FPRF_FE, / result equal to /
241	#define FPSCR_FPRF_FE_MASK (FPSCR_MASK (FPSCR_FPRF_FE))
242	FPSCR_FPRF_FU, / result unordered /
243	#define FPSCR_FPRF_FU_MASK (FPSCR_MASK (FPSCR_FPRF_FU))
244	FPSCR_20, / reserved /
245	FPSCR_VXSOFT, / invalid operation set by software /
246	#define FPSCR_VXSOFT_MASK (FPSCR_MASK (FPSCR_VXSOFT))
247	FPSCR_VXSQRT, / invalid operation for square root /
248	#define FPSCR_VXSQRT_MASK (FPSCR_MASK (FPSCR_VXSQRT))
249	FPSCR_VXCVI, / invalid operation for invalid integer convert /
250	#define FPSCR_VXCVI_MASK (FPSCR_MASK (FPSCR_VXCVI))
251	FPSCR_VE, / invalid operation exception enable /
252	#define FPSCR_VE_MASK (FPSCR_MASK (FPSCR_VE))
253	FPSCR_OE, / overflow exception enable /
254	#define FPSCR_OE_MASK (FPSCR_MASK (FPSCR_OE))
255	FPSCR_UE, / underflow exception enable /
256	#define FPSCR_UE_MASK (FPSCR_MASK (FPSCR_UE))
257	FPSCR_ZE, / zero divide exception enable /
258	#define FPSCR_ZE_MASK (FPSCR_MASK (FPSCR_ZE))
259	FPSCR_XE, / inexact exception enable /
260	#define FPSCR_XE_MASK (FPSCR_MASK (FPSCR_XE))
261	#ifdef _ARCH_PWR6
262	FPSCR_29, / Reserved in ISA 2.05 /
263	#define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_29))
264	#else
265	FPSCR_NI, / non-IEEE mode (typically, no denormalised numbers) /
266	#define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_NI))
267	#endif /* _ARCH_PWR6 */
268	/ the remaining two least-significant bits keep the rounding mode /
269	FPSCR_RN_hi,
270	#define FPSCR_RN_hi_MASK (FPSCR_MASK (FPSCR_RN_hi))
271	FPSCR_RN_lo
272	#define FPSCR_RN_lo_MASK (FPSCR_MASK (FPSCR_RN_lo))
273	};
274
275	#define FPSCR_RN_MASK (FPSCR_RN_hi_MASK\|FPSCR_RN_lo_MASK)
276	#define FPSCR_ENABLES_MASK \
277	(FPSCR_VE_MASK\|FPSCR_OE_MASK\|FPSCR_UE_MASK\|FPSCR_ZE_MASK\|FPSCR_XE_MASK)
278	#define FPSCR_BASIC_EXCEPTIONS_MASK \
279	(FPSCR_VX_MASK\|FPSCR_OX_MASK\|FPSCR_UX_MASK\|FPSCR_ZX_MASK\|FPSCR_XX_MASK)
280	#define FPSCR_EXCEPTIONS_MASK (FPSCR_BASIC_EXCEPTIONS_MASK\| \
281	FPSCR_VXSNAN_MASK\|FPSCR_VXISI_MASK\|FPSCR_VXIDI_MASK\|FPSCR_VXZDZ_MASK\| \
282	FPSCR_VXIMZ_MASK\|FPSCR_VXVC_MASK\|FPSCR_VXSOFT_MASK\|FPSCR_VXSQRT_MASK\| \
283	FPSCR_VXCVI_MASK)
284	#define FPSCR_FPRF_MASK \
285	(FPSCR_FPRF_C_MASK\|FPSCR_FPRF_FL_MASK\|FPSCR_FPRF_FG_MASK\| \
286	FPSCR_FPRF_FE_MASK\|FPSCR_FPRF_FU_MASK)
287	#define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK\|FPSCR_NI_MASK\|FPSCR_RN_MASK)
288	#define FPSCR_STATUS_MASK (FPSCR_FR_MASK\|FPSCR_FI_MASK\|FPSCR_FPRF_MASK)
289
290	/ The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits*
291	in the FPSCR, albeit shifted to different but corresponding locations.
292	Similarly, the exception indicator bits in the FPSCR correspond one-to-one
293	with the exception enable bits. It is thus possible to map the FENV(1)
294	exceptions directly to the FPSCR enables with a simple mask and shift,
295	and vice versa. /*
296	#define FPSCR_EXCEPT_TO_ENABLE_SHIFT 22
297
298	static inline int
299	fenv_reg_to_exceptions (unsigned long long l)
300	{
301	return (((int)l) & FPSCR_ENABLES_MASK) << FPSCR_EXCEPT_TO_ENABLE_SHIFT;
302	}
303
304	static inline unsigned long long
305	fenv_exceptions_to_reg (int excepts)
306	{
307	return (unsigned long long)
308	(excepts & FE_ALL_EXCEPT) >> FPSCR_EXCEPT_TO_ENABLE_SHIFT;
309	}
310
311	#ifdef _ARCH_PWR6
312	/ Not supported in ISA 2.05. Provided for source compat only. /
313	# define FPSCR_NI 29
314	#endif /* _ARCH_PWR6 */
315
316	/ This operation (i) sets the appropriate FPSCR bits for its*
317	parameter, (ii) converts sNaN to the corresponding qNaN, and (iii)
318	otherwise passes its parameter through unchanged (in particular, -0
319	and +0 stay as they were). The `obvious' way to do this is optimised
320	out by gcc. /*
321	#define f_wash(x) \
322	({ double d; asm volatile ("fmul %0,%1,%2" \
323	: "=f"(d) \
324	: "f" (x), "f"((float)1.0)); d; })
325	#define f_washf(x) \
326	({ float f; asm volatile ("fmuls %0,%1,%2" \
327	: "=f"(f) \
328	: "f" (x), "f"((float)1.0)); f; })
329
330	#endif /* fenv_libc.h */
331

source code of glibc/sysdeps/powerpc/fpu/fenv_libc.h