e_sqrtf.c source code [glibc/sysdeps/powerpc/fpu/e_sqrtf.c]

1	/ Single-precision floating point square root.*
2	Copyright (C) 1997-2022 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <math.h>
20	#include <math_private.h>
21	#include <fenv_libc.h>
22	#include <libm-alias-finite.h>
23	#include <math-use-builtins.h>
24
25	float
26	__ieee754_sqrtf (float x)
27	{
28	#if USE_SQRTF_BUILTIN
29	return __builtin_sqrtf (x);
30	#else
31	/ The method is based on a description in*
32	Computation of elementary functions on the IBM RISC System/6000 processor,
33	P. W. Markstein, IBM J. Res. Develop, 34(1) 1990.
34	Basically, it consists of two interleaved Newton-Raphson approximations,
35	one to find the actual square root, and one to find its reciprocal
36	without the expense of a division operation. The tricky bit here
37	is the use of the POWER/PowerPC multiply-add operation to get the
38	required accuracy with high speed.
39
40	The argument reduction works by a combination of table lookup to
41	obtain the initial guesses, and some careful modification of the
42	generated guesses (which mostly runs on the integer unit, while the
43	Newton-Raphson is running on the FPU). /*
44
45	extern const float __t_sqrt[`1024`];
46
47	if (x > `0`)
48	{
49	if (x != INFINITY)
50	{
51	/ Variables named starting with 's' exist in the*
52	argument-reduced space, so that 2 > sx >= 0.5,
53	1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... .
54	Variables named ending with 'i' are integer versions of
55	floating-point values. /*
56	float sx; / The value of which we're trying to find the square*
57	root. /*
58	float sg, g; / Guess of the square root of x. /
59	float sd, d; / Difference between the square of the guess and x. /
60	float sy; / Estimate of 1/2g (overestimated by 1ulp). /
61	float sy2; / 2sy /*
62	float e; / Difference between yg and 1/2 (note that e==se). /*
63	float shx; / == sx * fsg /
64	float fsg; / sgfsg == g. /*
65	fenv_t fe; / Saved floating-point environment (stores rounding*
66	mode and whether the inexact exception is
67	enabled). /*
68	uint32_t xi, sxi, fsgi;
69	const float *t_sqrt;
70
71	GET_FLOAT_WORD (xi, x);
72	fe = fegetenv_register ();
73	relax_fenv_state ();
74	sxi = (xi & `0x3fffffff`) \| `0x3f000000`;
75	SET_FLOAT_WORD (sx, sxi);
76	t_sqrt = __t_sqrt + (xi >> (`23` - `8` - `1`) & `0x3fe`);
77	sg = t_sqrt[`0`];
78	sy = t_sqrt[`1`];
79
80	/ Here we have three Newton-Raphson iterations each of a*
81	division and a square root and the remainder of the
82	argument reduction, all interleaved. /*
83	sd = -__builtin_fmaf (sg, sg, -sx);
84	fsgi = (xi + `0x40000000`) >> `1` & `0x7f800000`;
85	sy2 = sy + sy;
86	sg = __builtin_fmaf (sy, sd, sg); / 16-bit approximation to*
87	sqrt(sx). /*
88	e = -__builtin_fmaf (sy, sg, -`0x1.0000020365653p-1`);
89	SET_FLOAT_WORD (fsg, fsgi);
90	sd = -__builtin_fmaf (sg, sg, -sx);
91	sy = __builtin_fmaf (e, sy2, sy);
92	if ((xi & `0x7f800000`) == `0`)
93	goto denorm;
94	shx = sx * fsg;
95	sg = __builtin_fmaf (sy, sd, sg); / 32-bit approximation to*
96	sqrt(sx), but perhaps
97	rounded incorrectly. /*
98	sy2 = sy + sy;
99	g = sg * fsg;
100	e = -__builtin_fmaf (sy, sg, -`0x1.0000020365653p-1`);
101	d = -__builtin_fmaf (g, sg, -shx);
102	sy = __builtin_fmaf (e, sy2, sy);
103	fesetenv_register (fe);
104	return __builtin_fmaf (sy, d, g);
105	denorm:
106	/ For denormalised numbers, we normalise, calculate the*
107	square root, and return an adjusted result. /*
108	fesetenv_register (fe);
109	return __ieee754_sqrtf (x * `0x1p+48`) * `0x1p-24`;
110	}
111	}
112	else if (x < `0`)
113	{
114	/ For some reason, some PowerPC32 processors don't implement*
115	FE_INVALID_SQRT. /*
116	# ifdef FE_INVALID_SQRT
117	feraiseexcept (FE_INVALID_SQRT);
118
119	fenv_union_t u = { .fenv = fegetenv_register () };
120	if ((u.l & FE_INVALID) == `0`)
121	# endif
122	feraiseexcept (FE_INVALID);
123	x = NAN;
124	}
125	return f_washf (x);
126	#endif /* USE_SQRTF_BUILTIN */
127	}
128	libm_alias_finite (__ieee754_sqrtf, __sqrtf)
129

source code of glibc/sysdeps/powerpc/fpu/e_sqrtf.c