wm_sqrt.S source code [linux/arch/x86/math-emu/wm_sqrt.S]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	.file "wm_sqrt.S"
3	/---------------------------------------------------------------------------+*
4	\| wm_sqrt.S \|
5	\| \|
6	\| Fixed point arithmetic square root evaluation. \|
7	\| \|
8	\| Copyright (C) 1992,1993,1995,1997 \|
9	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|
10	\| Australia. E-mail billm@suburbia.net \|
11	\| \|
12	\| Call from C as: \|
13	\| int wm_sqrt(FPU_REG n, unsigned int control_word) \|*
14	\| \|
15	+---------------------------------------------------------------------------/*
16
17	/---------------------------------------------------------------------------+*
18	\| wm_sqrt(FPU_REG n, unsigned int control_word) \|*
19	\| returns the square root of n in n. \|
20	\| \|
21	\| Use Newton's method to compute the square root of a number, which must \|
22	\| be in the range [1.0 .. 4.0), to 64 bits accuracy. \|
23	\| Does not check the sign or tag of the argument. \|
24	\| Sets the exponent, but not the sign or tag of the result. \|
25	\| \|
26	\| The guess is kept in %esi:%edi \|
27	+---------------------------------------------------------------------------/*
28
29	#include "exception.h"
30	#include "fpu_emu.h"
31
32
33	#ifndef NON_REENTRANT_FPU
34	/ Local storage on the stack: /
35	#define FPU_accum_3 -4(%ebp) /* ms word */
36	#define FPU_accum_2 -8(%ebp)
37	#define FPU_accum_1 -12(%ebp)
38	#define FPU_accum_0 -16(%ebp)
39
40	/*
41	* The de-normalised argument:
42	* sq_2 sq_1 sq_0
43	* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
44	* ^ binary point here
45	*/
46	#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
47	#define FPU_fsqrt_arg_1 -24(%ebp)
48	#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
49
50	#else
51	/ Local storage in a static area: /
52	.data
53	.align `4`,`0`
54	FPU_accum_3:
55	.long `0` / ms word /
56	FPU_accum_2:
57	.long `0`
58	FPU_accum_1:
59	.long `0`
60	FPU_accum_0:
61	.long `0`
62
63	/ The de-normalised argument:*
64	sq_2 sq_1 sq_0
65	b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
66	^ binary point here
67	*/
68	FPU_fsqrt_arg_2:
69	.long `0` / ms word /
70	FPU_fsqrt_arg_1:
71	.long `0`
72	FPU_fsqrt_arg_0:
73	.long `0` / ls word, at most the ms bit is set /
74	#endif /* NON_REENTRANT_FPU */
75
76
77	.text
78	SYM_FUNC_START(wm_sqrt)
79	pushl %ebp
80	movl %esp,%ebp
81	#ifndef NON_REENTRANT_FPU
82	subl $`28`,%esp
83	#endif /* NON_REENTRANT_FPU */
84	pushl %esi
85	pushl %edi
86	pushl %ebx
87
88	movl PARAM1,%esi
89
90	movl SIGH(%esi),%eax
91	movl SIGL(%esi),%ecx
92	xorl %edx,%edx
93
94	/ We use a rough linear estimate for the first guess.. /
95
96	cmpw EXP_BIAS,EXP(%esi)
97	jnz sqrt_arg_ge_2
98
99	shrl $`1`,%eax / arg is in the range [1.0 .. 2.0) /
100	rcrl $`1`,%ecx
101	rcrl $`1`,%edx
102
103	sqrt_arg_ge_2:
104	/ From here on, n is never accessed directly again until it is*
105	replaced by the answer. /*
106
107	movl %eax,FPU_fsqrt_arg_2 / ms word of n /
108	movl %ecx,FPU_fsqrt_arg_1
109	movl %edx,FPU_fsqrt_arg_0
110
111	/ Make a linear first estimate /
112	shrl $`1`,%eax
113	addl $`0x40000000`,%eax
114	movl $`0xaaaaaaaa`,%ecx
115	mull %ecx
116	shll %edx / max result was 7fff... /
117	testl $`0x80000000`,%edx / but min was 3fff... /
118	jnz sqrt_prelim_no_adjust
119
120	movl $`0x80000000`,%edx / round up /
121
122	sqrt_prelim_no_adjust:
123	movl %edx,%esi / Our first guess /
124
125	/ We have now computed (approx) (2 + x) / 3, which forms the basis*
126	for a few iterations of Newton's method /*
127
128	movl FPU_fsqrt_arg_2,%ecx / ms word /
129
130	/*
131	* From our initial estimate, three iterations are enough to get us
132	* to 30 bits or so. This will then allow two iterations at better
133	* precision to complete the process.
134	*/
135
136	/ Compute (g + n/g)/2 at each iteration (g is the guess). /
137	shrl %ecx / Doing this first will prevent a divide /
138	/ overflow later. /
139
140	movl %ecx,%edx / msw of the arg / 2 /
141	divl %esi / current estimate /
142	shrl %esi / divide by 2 /
143	addl %eax,%esi / the new estimate /
144
145	movl %ecx,%edx
146	divl %esi
147	shrl %esi
148	addl %eax,%esi
149
150	movl %ecx,%edx
151	divl %esi
152	shrl %esi
153	addl %eax,%esi
154
155	/*
156	* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
157	* we improve it to 60 bits or so.
158	*
159	* The strategy from now on is to compute new estimates from
160	* guess := guess + (n - guess^2) / (2 * guess)
161	*/
162
163	/ First, find the square of the guess /
164	movl %esi,%eax
165	mull %esi
166	/ guess^2 now in %edx:%eax /
167
168	movl FPU_fsqrt_arg_1,%ecx
169	subl %ecx,%eax
170	movl FPU_fsqrt_arg_2,%ecx / ms word of normalized n /
171	sbbl %ecx,%edx
172	jnc sqrt_stage_2_positive
173
174	/ Subtraction gives a negative result,*
175	negate the result before division. /*
176	notl %edx
177	notl %eax
178	addl $`1`,%eax
179	adcl $`0`,%edx
180
181	divl %esi
182	movl %eax,%ecx
183
184	movl %edx,%eax
185	divl %esi
186	jmp sqrt_stage_2_finish
187
188	sqrt_stage_2_positive:
189	divl %esi
190	movl %eax,%ecx
191
192	movl %edx,%eax
193	divl %esi
194
195	notl %ecx
196	notl %eax
197	addl $`1`,%eax
198	adcl $`0`,%ecx
199
200	sqrt_stage_2_finish:
201	sarl $`1`,%ecx / divide by 2 /
202	rcrl $`1`,%eax
203
204	/ Form the new estimate in %esi:%edi /
205	movl %eax,%edi
206	addl %ecx,%esi
207
208	jnz sqrt_stage_2_done / result should be [1..2) /
209
210	#ifdef PARANOID
211	/ It should be possible to get here only if the arg is ffff....ffff /
212	cmpl $`0xffffffff`,FPU_fsqrt_arg_1
213	jnz sqrt_stage_2_error
214	#endif /* PARANOID */
215
216	/ The best rounded result. /
217	xorl %eax,%eax
218	decl %eax
219	movl %eax,%edi
220	movl %eax,%esi
221	movl $`0x7fffffff`,%eax
222	jmp sqrt_round_result
223
224	#ifdef PARANOID
225	sqrt_stage_2_error:
226	pushl EX_INTERNAL\|`0x213`
227	call EXCEPTION
228	#endif /* PARANOID */
229
230	sqrt_stage_2_done:
231
232	/ Now the square root has been computed to better than 60 bits. /
233
234	/ Find the square of the guess. /
235	movl %edi,%eax / ls word of guess /
236	mull %edi
237	movl %edx,FPU_accum_1
238
239	movl %esi,%eax
240	mull %esi
241	movl %edx,FPU_accum_3
242	movl %eax,FPU_accum_2
243
244	movl %edi,%eax
245	mull %esi
246	addl %eax,FPU_accum_1
247	adcl %edx,FPU_accum_2
248	adcl $`0`,FPU_accum_3
249
250	/ movl %esi,%eax /
251	/ mull %edi /
252	addl %eax,FPU_accum_1
253	adcl %edx,FPU_accum_2
254	adcl $`0`,FPU_accum_3
255
256	/ guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 /
257
258	movl FPU_fsqrt_arg_0,%eax / get normalized n /
259	subl %eax,FPU_accum_1
260	movl FPU_fsqrt_arg_1,%eax
261	sbbl %eax,FPU_accum_2
262	movl FPU_fsqrt_arg_2,%eax / ms word of normalized n /
263	sbbl %eax,FPU_accum_3
264	jnc sqrt_stage_3_positive
265
266	/ Subtraction gives a negative result,*
267	negate the result before division /*
268	notl FPU_accum_1
269	notl FPU_accum_2
270	notl FPU_accum_3
271	addl $`1`,FPU_accum_1
272	adcl $`0`,FPU_accum_2
273
274	#ifdef PARANOID
275	adcl $`0`,FPU_accum_3 / This must be zero /
276	jz sqrt_stage_3_no_error
277
278	sqrt_stage_3_error:
279	pushl EX_INTERNAL\|`0x207`
280	call EXCEPTION
281
282	sqrt_stage_3_no_error:
283	#endif /* PARANOID */
284
285	movl FPU_accum_2,%edx
286	movl FPU_accum_1,%eax
287	divl %esi
288	movl %eax,%ecx
289
290	movl %edx,%eax
291	divl %esi
292
293	sarl $`1`,%ecx / divide by 2 /
294	rcrl $`1`,%eax
295
296	/ prepare to round the result /
297
298	addl %ecx,%edi
299	adcl $`0`,%esi
300
301	jmp sqrt_stage_3_finished
302
303	sqrt_stage_3_positive:
304	movl FPU_accum_2,%edx
305	movl FPU_accum_1,%eax
306	divl %esi
307	movl %eax,%ecx
308
309	movl %edx,%eax
310	divl %esi
311
312	sarl $`1`,%ecx / divide by 2 /
313	rcrl $`1`,%eax
314
315	/ prepare to round the result /
316
317	notl %eax / Negate the correction term /
318	notl %ecx
319	addl $`1`,%eax
320	adcl $`0`,%ecx / carry here ==> correction == 0 /
321	adcl $`0xffffffff`,%esi
322
323	addl %ecx,%edi
324	adcl $`0`,%esi
325
326	sqrt_stage_3_finished:
327
328	/*
329	* The result in %esi:%edi:%esi should be good to about 90 bits here,
330	* and the rounding information here does not have sufficient accuracy
331	* in a few rare cases.
332	*/
333	cmpl $`0xffffffe0`,%eax
334	ja sqrt_near_exact_x
335
336	cmpl $`0x00000020`,%eax
337	jb sqrt_near_exact
338
339	cmpl $`0x7fffffe0`,%eax
340	jb sqrt_round_result
341
342	cmpl $`0x80000020`,%eax
343	jb sqrt_get_more_precision
344
345	sqrt_round_result:
346	/ Set up for rounding operations /
347	movl %eax,%edx
348	movl %esi,%eax
349	movl %edi,%ebx
350	movl PARAM1,%edi
351	movw EXP_BIAS,EXP(%edi) / Result is in [1.0 .. 2.0) /
352	jmp fpu_reg_round
353
354
355	sqrt_near_exact_x:
356	/ First, the estimate must be rounded up. /
357	addl $`1`,%edi
358	adcl $`0`,%esi
359
360	sqrt_near_exact:
361	/*
362	* This is an easy case because x^1/2 is monotonic.
363	* We need just find the square of our estimate, compare it
364	* with the argument, and deduce whether our estimate is
365	* above, below, or exact. We use the fact that the estimate
366	* is known to be accurate to about 90 bits.
367	*/
368	movl %edi,%eax / ls word of guess /
369	mull %edi
370	movl %edx,%ebx / 2nd ls word of square /
371	movl %eax,%ecx / ls word of square /
372
373	movl %edi,%eax
374	mull %esi
375	addl %eax,%ebx
376	addl %eax,%ebx
377
378	#ifdef PARANOID
379	cmp $`0xffffffb0`,%ebx
380	jb sqrt_near_exact_ok
381
382	cmp $`0x00000050`,%ebx
383	ja sqrt_near_exact_ok
384
385	pushl EX_INTERNAL\|`0x214`
386	call EXCEPTION
387
388	sqrt_near_exact_ok:
389	#endif /* PARANOID */
390
391	or %ebx,%ebx
392	js sqrt_near_exact_small
393
394	jnz sqrt_near_exact_large
395
396	or %ebx,%edx
397	jnz sqrt_near_exact_large
398
399	/ Our estimate is exactly the right answer /
400	xorl %eax,%eax
401	jmp sqrt_round_result
402
403	sqrt_near_exact_small:
404	/ Our estimate is too small /
405	movl $`0x000000ff`,%eax
406	jmp sqrt_round_result
407
408	sqrt_near_exact_large:
409	/ Our estimate is too large, we need to decrement it /
410	subl $`1`,%edi
411	sbbl $`0`,%esi
412	movl $`0xffffff00`,%eax
413	jmp sqrt_round_result
414
415
416	sqrt_get_more_precision:
417	/ This case is almost the same as the above, except we start*
418	with an extra bit of precision in the estimate. /*
419	stc / The extra bit. /
420	rcll $`1`,%edi / Shift the estimate left one bit /
421	rcll $`1`,%esi
422
423	movl %edi,%eax / ls word of guess /
424	mull %edi
425	movl %edx,%ebx / 2nd ls word of square /
426	movl %eax,%ecx / ls word of square /
427
428	movl %edi,%eax
429	mull %esi
430	addl %eax,%ebx
431	addl %eax,%ebx
432
433	/ Put our estimate back to its original value /
434	stc / The ms bit. /
435	rcrl $`1`,%esi / Shift the estimate left one bit /
436	rcrl $`1`,%edi
437
438	#ifdef PARANOID
439	cmp $`0xffffff60`,%ebx
440	jb sqrt_more_prec_ok
441
442	cmp $`0x000000a0`,%ebx
443	ja sqrt_more_prec_ok
444
445	pushl EX_INTERNAL\|`0x215`
446	call EXCEPTION
447
448	sqrt_more_prec_ok:
449	#endif /* PARANOID */
450
451	or %ebx,%ebx
452	js sqrt_more_prec_small
453
454	jnz sqrt_more_prec_large
455
456	or %ebx,%ecx
457	jnz sqrt_more_prec_large
458
459	/ Our estimate is exactly the right answer /
460	movl $`0x80000000`,%eax
461	jmp sqrt_round_result
462
463	sqrt_more_prec_small:
464	/ Our estimate is too small /
465	movl $`0x800000ff`,%eax
466	jmp sqrt_round_result
467
468	sqrt_more_prec_large:
469	/ Our estimate is too large /
470	movl $`0x7fffff00`,%eax
471	jmp sqrt_round_result
472	SYM_FUNC_END(wm_sqrt)
473

source code of linux/arch/x86/math-emu/wm_sqrt.S