dp_sqrt.c source code [linux/arch/mips/math-emu/dp_sqrt.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/ IEEE754 floating point arithmetic*
3	* double precision square root
4	*/
5	/*
6	* MIPS floating point support
7	* Copyright (C) 1994-2000 Algorithmics Ltd.
8	*/
9
10	#include "ieee754dp.h"
11
12	static const unsigned int table[] = {
13	`0`, `1204`, `3062`, `5746`, `9193`, `13348`, `18162`, `23592`,
14	`29598`, `36145`, `43202`, `50740`, `58733`, `67158`, `75992`,
15	`85215`, `83599`, `71378`, `60428`, `50647`, `41945`, `34246`,
16	`27478`, `21581`, `16499`, `12183`, `8588`, `5674`, `3403`,
17	`1742`, `661`, `130`
18	};
19
20	union ieee754dp ieee754dp_sqrt(union ieee754dp x)
21	{
22	struct _ieee754_csr oldcsr;
23	union ieee754dp y, z, t;
24	unsigned int scalx, yh;
25	COMPXDP;
26
27	EXPLODEXDP;
28	ieee754_clearcx();
29	FLUSHXDP;
30
31	/ x == INF or NAN? /
32	switch (xc) {
33	case IEEE754_CLASS_SNAN:
34	return ieee754dp_nanxcpt(x);
35
36	case IEEE754_CLASS_QNAN:
37	/ sqrt(Nan) = Nan /
38	return x;
39
40	case IEEE754_CLASS_ZERO:
41	/ sqrt(0) = 0 /
42	return x;
43
44	case IEEE754_CLASS_INF:
45	if (xs) {
46	/ sqrt(-Inf) = Nan /
47	ieee754_setcx(IEEE754_INVALID_OPERATION);
48	return ieee754dp_indef();
49	}
50	/ sqrt(+Inf) = Inf /
51	return x;
52
53	case IEEE754_CLASS_DNORM:
54	DPDNORMX;
55	fallthrough;
56	case IEEE754_CLASS_NORM:
57	if (xs) {
58	/ sqrt(-x) = Nan /
59	ieee754_setcx(IEEE754_INVALID_OPERATION);
60	return ieee754dp_indef();
61	}
62	break;
63	}
64
65	/ save old csr; switch off INX enable & flag; set RN rounding /
66	oldcsr = ieee754_csr;
67	ieee754_csr.mx &= ~IEEE754_INEXACT;
68	ieee754_csr.sx &= ~IEEE754_INEXACT;
69	ieee754_csr.rm = FPU_CSR_RN;
70
71	/ adjust exponent to prevent overflow /
72	scalx = `0`;
73	if (xe > `512`) { / x > 2*-512? /*
74	xe -= `512`; / x = x / 2*512 /*
75	scalx += `256`;
76	} else if (xe < -`512`) { / x < 2*-512? /*
77	xe += `512`; / x = x * 2*512 /*
78	scalx -= `256`;
79	}
80
81	x = builddp(s: `0`, bx: xe + DP_EBIAS, m: xm & ~DP_HIDDEN_BIT);
82	y = x;
83
84	/ magic initial approximation to almost 8 sig. bits /
85	yh = y.bits >> `32`;
86	yh = (yh >> `1`) + `0x1ff80000`;
87	yh = yh - table[(yh >> `15`) & `31`];
88	y.bits = ((u64) yh << `32`) \| (y.bits & `0xffffffff`);
89
90	/ Heron's rule once with correction to improve to ~18 sig. bits /
91	/ t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; /
92	t = ieee754dp_div(x, y);
93	y = ieee754dp_add(x: y, y: t);
94	y.bits -= `0x0010000600000000LL`;
95	y.bits &= `0xffffffff00000000LL`;
96
97	/ triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp /
98	/ t=yy; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)y; /
99	t = ieee754dp_mul(x: y, y);
100	z = t;
101	t.bexp += `0x001`;
102	t = ieee754dp_add(x: t, y: z);
103	z = ieee754dp_mul(x: ieee754dp_sub(x, y: z), y);
104
105	/ t=z/(t+x) ; pt[n0]+=0x00100000; y+=t; /
106	t = ieee754dp_div(x: z, y: ieee754dp_add(x: t, y: x));
107	t.bexp += `0x001`;
108	y = ieee754dp_add(x: y, y: t);
109
110	/ twiddle last bit to force y correctly rounded /
111
112	/ set RZ, clear INEX flag /
113	ieee754_csr.rm = FPU_CSR_RZ;
114	ieee754_csr.sx &= ~IEEE754_INEXACT;
115
116	/ t=x/y; ...chopped quotient, possibly inexact /
117	t = ieee754dp_div(x, y);
118
119	if (ieee754_csr.sx & IEEE754_INEXACT \|\| t.bits != y.bits) {
120
121	if (!(ieee754_csr.sx & IEEE754_INEXACT))
122	/ t = t-ulp /
123	t.bits -= `1`;
124
125	/ add inexact to result status /
126	oldcsr.cx \|= IEEE754_INEXACT;
127	oldcsr.sx \|= IEEE754_INEXACT;
128
129	switch (oldcsr.rm) {
130	case FPU_CSR_RU:
131	y.bits += `1`;
132	fallthrough;
133	case FPU_CSR_RN:
134	t.bits += `1`;
135	break;
136	}
137
138	/ y=y+t; ...chopped sum /
139	y = ieee754dp_add(x: y, y: t);
140
141	/ adjust scalx for correctly rounded sqrt(x) /
142	scalx -= `1`;
143	}
144
145	/ py[n0]=py[n0]+scalx; ...scale back y /
146	y.bexp += scalx;
147
148	/ restore rounding mode, possibly set inexact /
149	ieee754_csr = oldcsr;
150
151	return y;
152	}
153

source code of linux/arch/mips/math-emu/dp_sqrt.c