1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | .file "wm_sqrt.S" |
3 | /*---------------------------------------------------------------------------+ |
4 | | wm_sqrt.S | |
5 | | | |
6 | | Fixed point arithmetic square root evaluation. | |
7 | | | |
8 | | Copyright (C) 1992,1993,1995,1997 | |
9 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
10 | | Australia. E-mail billm@suburbia.net | |
11 | | | |
12 | | Call from C as: | |
13 | | int wm_sqrt(FPU_REG *n, unsigned int control_word) | |
14 | | | |
15 | +---------------------------------------------------------------------------*/ |
16 | |
17 | /*---------------------------------------------------------------------------+ |
18 | | wm_sqrt(FPU_REG *n, unsigned int control_word) | |
19 | | returns the square root of n in n. | |
20 | | | |
21 | | Use Newton's method to compute the square root of a number, which must | |
22 | | be in the range [1.0 .. 4.0), to 64 bits accuracy. | |
23 | | Does not check the sign or tag of the argument. | |
24 | | Sets the exponent, but not the sign or tag of the result. | |
25 | | | |
26 | | The guess is kept in %esi:%edi | |
27 | +---------------------------------------------------------------------------*/ |
28 | |
29 | #include "exception.h" |
30 | #include "fpu_emu.h" |
31 | |
32 | |
33 | #ifndef NON_REENTRANT_FPU |
34 | /* Local storage on the stack: */ |
35 | #define FPU_accum_3 -4(%ebp) /* ms word */ |
36 | #define FPU_accum_2 -8(%ebp) |
37 | #define FPU_accum_1 -12(%ebp) |
38 | #define FPU_accum_0 -16(%ebp) |
39 | |
40 | /* |
41 | * The de-normalised argument: |
42 | * sq_2 sq_1 sq_0 |
43 | * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 |
44 | * ^ binary point here |
45 | */ |
46 | #define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ |
47 | #define FPU_fsqrt_arg_1 -24(%ebp) |
48 | #define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ |
49 | |
50 | #else |
51 | /* Local storage in a static area: */ |
52 | .data |
53 | .align 4,0 |
54 | FPU_accum_3: |
55 | .long 0 /* ms word */ |
56 | FPU_accum_2: |
57 | .long 0 |
58 | FPU_accum_1: |
59 | .long 0 |
60 | FPU_accum_0: |
61 | .long 0 |
62 | |
63 | /* The de-normalised argument: |
64 | sq_2 sq_1 sq_0 |
65 | b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 |
66 | ^ binary point here |
67 | */ |
68 | FPU_fsqrt_arg_2: |
69 | .long 0 /* ms word */ |
70 | FPU_fsqrt_arg_1: |
71 | .long 0 |
72 | FPU_fsqrt_arg_0: |
73 | .long 0 /* ls word, at most the ms bit is set */ |
74 | #endif /* NON_REENTRANT_FPU */ |
75 | |
76 | |
77 | .text |
78 | SYM_FUNC_START(wm_sqrt) |
79 | pushl %ebp |
80 | movl %esp,%ebp |
81 | #ifndef NON_REENTRANT_FPU |
82 | subl $28,%esp |
83 | #endif /* NON_REENTRANT_FPU */ |
84 | pushl %esi |
85 | pushl %edi |
86 | pushl %ebx |
87 | |
88 | movl PARAM1,%esi |
89 | |
90 | movl SIGH(%esi),%eax |
91 | movl SIGL(%esi),%ecx |
92 | xorl %edx,%edx |
93 | |
94 | /* We use a rough linear estimate for the first guess.. */ |
95 | |
96 | cmpw EXP_BIAS,EXP(%esi) |
97 | jnz sqrt_arg_ge_2 |
98 | |
99 | shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ |
100 | rcrl $1,%ecx |
101 | rcrl $1,%edx |
102 | |
103 | sqrt_arg_ge_2: |
104 | /* From here on, n is never accessed directly again until it is |
105 | replaced by the answer. */ |
106 | |
107 | movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ |
108 | movl %ecx,FPU_fsqrt_arg_1 |
109 | movl %edx,FPU_fsqrt_arg_0 |
110 | |
111 | /* Make a linear first estimate */ |
112 | shrl $1,%eax |
113 | addl $0x40000000,%eax |
114 | movl $0xaaaaaaaa,%ecx |
115 | mull %ecx |
116 | shll %edx /* max result was 7fff... */ |
117 | testl $0x80000000,%edx /* but min was 3fff... */ |
118 | jnz sqrt_prelim_no_adjust |
119 | |
120 | movl $0x80000000,%edx /* round up */ |
121 | |
122 | sqrt_prelim_no_adjust: |
123 | movl %edx,%esi /* Our first guess */ |
124 | |
125 | /* We have now computed (approx) (2 + x) / 3, which forms the basis |
126 | for a few iterations of Newton's method */ |
127 | |
128 | movl FPU_fsqrt_arg_2,%ecx /* ms word */ |
129 | |
130 | /* |
131 | * From our initial estimate, three iterations are enough to get us |
132 | * to 30 bits or so. This will then allow two iterations at better |
133 | * precision to complete the process. |
134 | */ |
135 | |
136 | /* Compute (g + n/g)/2 at each iteration (g is the guess). */ |
137 | shrl %ecx /* Doing this first will prevent a divide */ |
138 | /* overflow later. */ |
139 | |
140 | movl %ecx,%edx /* msw of the arg / 2 */ |
141 | divl %esi /* current estimate */ |
142 | shrl %esi /* divide by 2 */ |
143 | addl %eax,%esi /* the new estimate */ |
144 | |
145 | movl %ecx,%edx |
146 | divl %esi |
147 | shrl %esi |
148 | addl %eax,%esi |
149 | |
150 | movl %ecx,%edx |
151 | divl %esi |
152 | shrl %esi |
153 | addl %eax,%esi |
154 | |
155 | /* |
156 | * Now that an estimate accurate to about 30 bits has been obtained (in %esi), |
157 | * we improve it to 60 bits or so. |
158 | * |
159 | * The strategy from now on is to compute new estimates from |
160 | * guess := guess + (n - guess^2) / (2 * guess) |
161 | */ |
162 | |
163 | /* First, find the square of the guess */ |
164 | movl %esi,%eax |
165 | mull %esi |
166 | /* guess^2 now in %edx:%eax */ |
167 | |
168 | movl FPU_fsqrt_arg_1,%ecx |
169 | subl %ecx,%eax |
170 | movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ |
171 | sbbl %ecx,%edx |
172 | jnc sqrt_stage_2_positive |
173 | |
174 | /* Subtraction gives a negative result, |
175 | negate the result before division. */ |
176 | notl %edx |
177 | notl %eax |
178 | addl $1,%eax |
179 | adcl $0,%edx |
180 | |
181 | divl %esi |
182 | movl %eax,%ecx |
183 | |
184 | movl %edx,%eax |
185 | divl %esi |
186 | jmp sqrt_stage_2_finish |
187 | |
188 | sqrt_stage_2_positive: |
189 | divl %esi |
190 | movl %eax,%ecx |
191 | |
192 | movl %edx,%eax |
193 | divl %esi |
194 | |
195 | notl %ecx |
196 | notl %eax |
197 | addl $1,%eax |
198 | adcl $0,%ecx |
199 | |
200 | sqrt_stage_2_finish: |
201 | sarl $1,%ecx /* divide by 2 */ |
202 | rcrl $1,%eax |
203 | |
204 | /* Form the new estimate in %esi:%edi */ |
205 | movl %eax,%edi |
206 | addl %ecx,%esi |
207 | |
208 | jnz sqrt_stage_2_done /* result should be [1..2) */ |
209 | |
210 | #ifdef PARANOID |
211 | /* It should be possible to get here only if the arg is ffff....ffff */ |
212 | cmpl $0xffffffff,FPU_fsqrt_arg_1 |
213 | jnz sqrt_stage_2_error |
214 | #endif /* PARANOID */ |
215 | |
216 | /* The best rounded result. */ |
217 | xorl %eax,%eax |
218 | decl %eax |
219 | movl %eax,%edi |
220 | movl %eax,%esi |
221 | movl $0x7fffffff,%eax |
222 | jmp sqrt_round_result |
223 | |
224 | #ifdef PARANOID |
225 | sqrt_stage_2_error: |
226 | pushl EX_INTERNAL|0x213 |
227 | call EXCEPTION |
228 | #endif /* PARANOID */ |
229 | |
230 | sqrt_stage_2_done: |
231 | |
232 | /* Now the square root has been computed to better than 60 bits. */ |
233 | |
234 | /* Find the square of the guess. */ |
235 | movl %edi,%eax /* ls word of guess */ |
236 | mull %edi |
237 | movl %edx,FPU_accum_1 |
238 | |
239 | movl %esi,%eax |
240 | mull %esi |
241 | movl %edx,FPU_accum_3 |
242 | movl %eax,FPU_accum_2 |
243 | |
244 | movl %edi,%eax |
245 | mull %esi |
246 | addl %eax,FPU_accum_1 |
247 | adcl %edx,FPU_accum_2 |
248 | adcl $0,FPU_accum_3 |
249 | |
250 | /* movl %esi,%eax */ |
251 | /* mull %edi */ |
252 | addl %eax,FPU_accum_1 |
253 | adcl %edx,FPU_accum_2 |
254 | adcl $0,FPU_accum_3 |
255 | |
256 | /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ |
257 | |
258 | movl FPU_fsqrt_arg_0,%eax /* get normalized n */ |
259 | subl %eax,FPU_accum_1 |
260 | movl FPU_fsqrt_arg_1,%eax |
261 | sbbl %eax,FPU_accum_2 |
262 | movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ |
263 | sbbl %eax,FPU_accum_3 |
264 | jnc sqrt_stage_3_positive |
265 | |
266 | /* Subtraction gives a negative result, |
267 | negate the result before division */ |
268 | notl FPU_accum_1 |
269 | notl FPU_accum_2 |
270 | notl FPU_accum_3 |
271 | addl $1,FPU_accum_1 |
272 | adcl $0,FPU_accum_2 |
273 | |
274 | #ifdef PARANOID |
275 | adcl $0,FPU_accum_3 /* This must be zero */ |
276 | jz sqrt_stage_3_no_error |
277 | |
278 | sqrt_stage_3_error: |
279 | pushl EX_INTERNAL|0x207 |
280 | call EXCEPTION |
281 | |
282 | sqrt_stage_3_no_error: |
283 | #endif /* PARANOID */ |
284 | |
285 | movl FPU_accum_2,%edx |
286 | movl FPU_accum_1,%eax |
287 | divl %esi |
288 | movl %eax,%ecx |
289 | |
290 | movl %edx,%eax |
291 | divl %esi |
292 | |
293 | sarl $1,%ecx /* divide by 2 */ |
294 | rcrl $1,%eax |
295 | |
296 | /* prepare to round the result */ |
297 | |
298 | addl %ecx,%edi |
299 | adcl $0,%esi |
300 | |
301 | jmp sqrt_stage_3_finished |
302 | |
303 | sqrt_stage_3_positive: |
304 | movl FPU_accum_2,%edx |
305 | movl FPU_accum_1,%eax |
306 | divl %esi |
307 | movl %eax,%ecx |
308 | |
309 | movl %edx,%eax |
310 | divl %esi |
311 | |
312 | sarl $1,%ecx /* divide by 2 */ |
313 | rcrl $1,%eax |
314 | |
315 | /* prepare to round the result */ |
316 | |
317 | notl %eax /* Negate the correction term */ |
318 | notl %ecx |
319 | addl $1,%eax |
320 | adcl $0,%ecx /* carry here ==> correction == 0 */ |
321 | adcl $0xffffffff,%esi |
322 | |
323 | addl %ecx,%edi |
324 | adcl $0,%esi |
325 | |
326 | sqrt_stage_3_finished: |
327 | |
328 | /* |
329 | * The result in %esi:%edi:%esi should be good to about 90 bits here, |
330 | * and the rounding information here does not have sufficient accuracy |
331 | * in a few rare cases. |
332 | */ |
333 | cmpl $0xffffffe0,%eax |
334 | ja sqrt_near_exact_x |
335 | |
336 | cmpl $0x00000020,%eax |
337 | jb sqrt_near_exact |
338 | |
339 | cmpl $0x7fffffe0,%eax |
340 | jb sqrt_round_result |
341 | |
342 | cmpl $0x80000020,%eax |
343 | jb sqrt_get_more_precision |
344 | |
345 | sqrt_round_result: |
346 | /* Set up for rounding operations */ |
347 | movl %eax,%edx |
348 | movl %esi,%eax |
349 | movl %edi,%ebx |
350 | movl PARAM1,%edi |
351 | movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ |
352 | jmp fpu_reg_round |
353 | |
354 | |
355 | sqrt_near_exact_x: |
356 | /* First, the estimate must be rounded up. */ |
357 | addl $1,%edi |
358 | adcl $0,%esi |
359 | |
360 | sqrt_near_exact: |
361 | /* |
362 | * This is an easy case because x^1/2 is monotonic. |
363 | * We need just find the square of our estimate, compare it |
364 | * with the argument, and deduce whether our estimate is |
365 | * above, below, or exact. We use the fact that the estimate |
366 | * is known to be accurate to about 90 bits. |
367 | */ |
368 | movl %edi,%eax /* ls word of guess */ |
369 | mull %edi |
370 | movl %edx,%ebx /* 2nd ls word of square */ |
371 | movl %eax,%ecx /* ls word of square */ |
372 | |
373 | movl %edi,%eax |
374 | mull %esi |
375 | addl %eax,%ebx |
376 | addl %eax,%ebx |
377 | |
378 | #ifdef PARANOID |
379 | cmp $0xffffffb0,%ebx |
380 | jb sqrt_near_exact_ok |
381 | |
382 | cmp $0x00000050,%ebx |
383 | ja sqrt_near_exact_ok |
384 | |
385 | pushl EX_INTERNAL|0x214 |
386 | call EXCEPTION |
387 | |
388 | sqrt_near_exact_ok: |
389 | #endif /* PARANOID */ |
390 | |
391 | or %ebx,%ebx |
392 | js sqrt_near_exact_small |
393 | |
394 | jnz sqrt_near_exact_large |
395 | |
396 | or %ebx,%edx |
397 | jnz sqrt_near_exact_large |
398 | |
399 | /* Our estimate is exactly the right answer */ |
400 | xorl %eax,%eax |
401 | jmp sqrt_round_result |
402 | |
403 | sqrt_near_exact_small: |
404 | /* Our estimate is too small */ |
405 | movl $0x000000ff,%eax |
406 | jmp sqrt_round_result |
407 | |
408 | sqrt_near_exact_large: |
409 | /* Our estimate is too large, we need to decrement it */ |
410 | subl $1,%edi |
411 | sbbl $0,%esi |
412 | movl $0xffffff00,%eax |
413 | jmp sqrt_round_result |
414 | |
415 | |
416 | sqrt_get_more_precision: |
417 | /* This case is almost the same as the above, except we start |
418 | with an extra bit of precision in the estimate. */ |
419 | stc /* The extra bit. */ |
420 | rcll $1,%edi /* Shift the estimate left one bit */ |
421 | rcll $1,%esi |
422 | |
423 | movl %edi,%eax /* ls word of guess */ |
424 | mull %edi |
425 | movl %edx,%ebx /* 2nd ls word of square */ |
426 | movl %eax,%ecx /* ls word of square */ |
427 | |
428 | movl %edi,%eax |
429 | mull %esi |
430 | addl %eax,%ebx |
431 | addl %eax,%ebx |
432 | |
433 | /* Put our estimate back to its original value */ |
434 | stc /* The ms bit. */ |
435 | rcrl $1,%esi /* Shift the estimate left one bit */ |
436 | rcrl $1,%edi |
437 | |
438 | #ifdef PARANOID |
439 | cmp $0xffffff60,%ebx |
440 | jb sqrt_more_prec_ok |
441 | |
442 | cmp $0x000000a0,%ebx |
443 | ja sqrt_more_prec_ok |
444 | |
445 | pushl EX_INTERNAL|0x215 |
446 | call EXCEPTION |
447 | |
448 | sqrt_more_prec_ok: |
449 | #endif /* PARANOID */ |
450 | |
451 | or %ebx,%ebx |
452 | js sqrt_more_prec_small |
453 | |
454 | jnz sqrt_more_prec_large |
455 | |
456 | or %ebx,%ecx |
457 | jnz sqrt_more_prec_large |
458 | |
459 | /* Our estimate is exactly the right answer */ |
460 | movl $0x80000000,%eax |
461 | jmp sqrt_round_result |
462 | |
463 | sqrt_more_prec_small: |
464 | /* Our estimate is too small */ |
465 | movl $0x800000ff,%eax |
466 | jmp sqrt_round_result |
467 | |
468 | sqrt_more_prec_large: |
469 | /* Our estimate is too large */ |
470 | movl $0x7fffff00,%eax |
471 | jmp sqrt_round_result |
472 | SYM_FUNC_END(wm_sqrt) |
473 | |