e_sqrt.c source code [glibc/sysdeps/powerpc/fpu/e_sqrt.c]

1	/ Double-precision floating point square root.*
2	Copyright (C) 1997-2022 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <math.h>
20	#include <math_private.h>
21	#include <fenv_libc.h>
22	#include <libm-alias-finite.h>
23	#include <math-use-builtins.h>
24
25	double
26	__ieee754_sqrt (double x)
27	{
28	#if USE_SQRT_BUILTIN
29	return __builtin_sqrt (x);
30	#else
31	/ The method is based on a description in*
32	Computation of elementary functions on the IBM RISC System/6000 processor,
33	P. W. Markstein, IBM J. Res. Develop, 34(1) 1990.
34	Basically, it consists of two interleaved Newton-Raphson approximations,
35	one to find the actual square root, and one to find its reciprocal
36	without the expense of a division operation. The tricky bit here
37	is the use of the POWER/PowerPC multiply-add operation to get the
38	required accuracy with high speed.
39
40	The argument reduction works by a combination of table lookup to
41	obtain the initial guesses, and some careful modification of the
42	generated guesses (which mostly runs on the integer unit, while the
43	Newton-Raphson is running on the FPU). /*
44
45	extern const float __t_sqrt[`1024`];
46
47	if (x > `0`)
48	{
49	/ schedule the EXTRACT_WORDS to get separation between the store*
50	and the load. /*
51	ieee_double_shape_type ew_u;
52	ieee_double_shape_type iw_u;
53	ew_u.value = (x);
54	if (x != INFINITY)
55	{
56	/ Variables named starting with 's' exist in the*
57	argument-reduced space, so that 2 > sx >= 0.5,
58	1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... .
59	Variables named ending with 'i' are integer versions of
60	floating-point values. /*
61	double sx; / The value of which we're trying to find the*
62	square root. /*
63	double sg, g; / Guess of the square root of x. /
64	double sd, d; / Difference between the square of the guess and x. /
65	double sy; / Estimate of 1/2g (overestimated by 1ulp). /
66	double sy2; / 2sy /*
67	double e; / Difference between yg and 1/2 (se = e fsy). /
68	double shx; / == sx * fsg /
69	double fsg; / sgfsg == g. /*
70	fenv_t fe; / Saved floating-point environment (stores rounding*
71	mode and whether the inexact exception is
72	enabled). /*
73	uint32_t xi0, xi1, sxi, fsgi;
74	const float *t_sqrt;
75
76	fe = fegetenv_register ();
77	/ complete the EXTRACT_WORDS (xi0,xi1,x) operation. /
78	xi0 = ew_u.parts.msw;
79	xi1 = ew_u.parts.lsw;
80	relax_fenv_state ();
81	sxi = (xi0 & `0x3fffffff`) \| `0x3fe00000`;
82	/ schedule the INSERT_WORDS (sx, sxi, xi1) to get separation*
83	between the store and the load. /*
84	iw_u.parts.msw = sxi;
85	iw_u.parts.lsw = xi1;
86	t_sqrt = __t_sqrt + (xi0 >> (`52` - `32` - `8` - `1`) & `0x3fe`);
87	sg = t_sqrt[`0`];
88	sy = t_sqrt[`1`];
89	/ complete the INSERT_WORDS (sx, sxi, xi1) operation. /
90	sx = iw_u.value;
91
92	/ Here we have three Newton-Raphson iterations each of a*
93	division and a square root and the remainder of the
94	argument reduction, all interleaved. /*
95	sd = -__builtin_fma (sg, sg, -sx);
96	fsgi = (xi0 + `0x40000000`) >> `1` & `0x7ff00000`;
97	sy2 = sy + sy;
98	sg = __builtin_fma (sy, sd, sg); / 16-bit approximation to*
99	sqrt(sx). /*
100
101	/ schedule the INSERT_WORDS (fsg, fsgi, 0) to get separation*
102	between the store and the load. /*
103	INSERT_WORDS (fsg, fsgi, `0`);
104	iw_u.parts.msw = fsgi;
105	iw_u.parts.lsw = (`0`);
106	e = -__builtin_fma (sy, sg, -`0x1.0000000000001p-1`);
107	sd = -__builtin_fma (sg, sg, -sx);
108	if ((xi0 & `0x7ff00000`) == `0`)
109	goto denorm;
110	sy = __builtin_fma (e, sy2, sy);
111	sg = __builtin_fma (sy, sd, sg); / 32-bit approximation to*
112	sqrt(sx). /*
113	sy2 = sy + sy;
114	/ complete the INSERT_WORDS (fsg, fsgi, 0) operation. /
115	fsg = iw_u.value;
116	e = -__builtin_fma (sy, sg, -`0x1.0000000000001p-1`);
117	sd = -__builtin_fma (sg, sg, -sx);
118	sy = __builtin_fma (e, sy2, sy);
119	shx = sx * fsg;
120	sg = __builtin_fma (sy, sd, sg); / 64-bit approximation to*
121	sqrt(sx), but perhaps
122	rounded incorrectly. /*
123	sy2 = sy + sy;
124	g = sg * fsg;
125	e = -__builtin_fma (sy, sg, -`0x1.0000000000001p-1`);
126	d = -__builtin_fma (g, sg, -shx);
127	sy = __builtin_fma (e, sy2, sy);
128	fesetenv_register (fe);
129	return __builtin_fma (sy, d, g);
130	denorm:
131	/ For denormalised numbers, we normalise, calculate the*
132	square root, and return an adjusted result. /*
133	fesetenv_register (fe);
134	return __ieee754_sqrt (x * `0x1p+108f`) * `0x1p-54f`;
135	}
136	}
137	else if (x < `0`)
138	{
139	/ For some reason, some PowerPC32 processors don't implement*
140	FE_INVALID_SQRT. /*
141	# ifdef FE_INVALID_SQRT
142	__feraiseexcept (FE_INVALID_SQRT);
143
144	fenv_union_t u = { .fenv = fegetenv_register () };
145	if ((u.l & FE_INVALID) == `0`)
146	# endif
147	__feraiseexcept (FE_INVALID);
148	x = NAN;
149	}
150	return f_wash (x);
151	#endif /* USE_SQRT_BUILTIN */
152	}
153
154	libm_alias_finite (__ieee754_sqrt, __sqrt)
155

source code of glibc/sysdeps/powerpc/fpu/e_sqrt.c