1/* SPDX-License-Identifier: GPL-2.0 */
2 .file "reg_round.S"
3/*---------------------------------------------------------------------------+
4 | reg_round.S |
5 | |
6 | Rounding/truncation/etc for FPU basic arithmetic functions. |
7 | |
8 | Copyright (C) 1993,1995,1997 |
9 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
10 | Australia. E-mail billm@suburbia.net |
11 | |
12 | This code has four possible entry points. |
13 | The following must be entered by a jmp instruction: |
14 | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |
15 | |
16 | The FPU_round entry point is intended to be used by C code. |
17 | From C, call as: |
18 | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
19 | |
20 | Return value is the tag of the answer, or-ed with FPU_Exception if |
21 | one was raised, or -1 on internal error. |
22 | |
23 | For correct "up" and "down" rounding, the argument must have the correct |
24 | sign. |
25 | |
26 +---------------------------------------------------------------------------*/
27
28/*---------------------------------------------------------------------------+
29 | Four entry points. |
30 | |
31 | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |
32 | %eax:%ebx 64 bit significand |
33 | %edx 32 bit extension of the significand |
34 | %edi pointer to an FPU_REG for the result to be stored |
35 | stack calling function must have set up a C stack frame and |
36 | pushed %esi, %edi, and %ebx |
37 | |
38 | Needed just for the fpu_reg_round_sqrt entry point: |
39 | %cx A control word in the same format as the FPU control word. |
40 | Otherwise, PARAM4 must give such a value. |
41 | |
42 | |
43 | The significand and its extension are assumed to be exact in the |
44 | following sense: |
45 | If the significand by itself is the exact result then the significand |
46 | extension (%edx) must contain 0, otherwise the significand extension |
47 | must be non-zero. |
48 | If the significand extension is non-zero then the significand is |
49 | smaller than the magnitude of the correct exact result by an amount |
50 | greater than zero and less than one ls bit of the significand. |
51 | The significand extension is only required to have three possible |
52 | non-zero values: |
53 | less than 0x80000000 <=> the significand is less than 1/2 an ls |
54 | bit smaller than the magnitude of the |
55 | true exact result. |
56 | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
57 | smaller than the magnitude of the true |
58 | exact result. |
59 | greater than 0x80000000 <=> the significand is more than 1/2 an ls |
60 | bit smaller than the magnitude of the |
61 | true exact result. |
62 | |
63 +---------------------------------------------------------------------------*/
64
65/*---------------------------------------------------------------------------+
66 | The code in this module has become quite complex, but it should handle |
67 | all of the FPU flags which are set at this stage of the basic arithmetic |
68 | computations. |
69 | There are a few rare cases where the results are not set identically to |
70 | a real FPU. These require a bit more thought because at this stage the |
71 | results of the code here appear to be more consistent... |
72 | This may be changed in a future version. |
73 +---------------------------------------------------------------------------*/
74
75
76#include "fpu_emu.h"
77#include "exception.h"
78#include "control_w.h"
79
80/* Flags for FPU_bits_lost */
81#define LOST_DOWN $1
82#define LOST_UP $2
83
84/* Flags for FPU_denormal */
85#define DENORMAL $1
86#define UNMASKED_UNDERFLOW $2
87
88
89#ifndef NON_REENTRANT_FPU
90/* Make the code re-entrant by putting
91 local storage on the stack: */
92#define FPU_bits_lost (%esp)
93#define FPU_denormal 1(%esp)
94
95#else
96/* Not re-entrant, so we can gain speed by putting
97 local storage in a static area: */
98.data
99 .align 4,0
100FPU_bits_lost:
101 .byte 0
102FPU_denormal:
103 .byte 0
104#endif /* NON_REENTRANT_FPU */
105
106
107.text
108.globl fpu_reg_round
109.globl fpu_Arith_exit
110
111/* Entry point when called from C */
112SYM_FUNC_START(FPU_round)
113 pushl %ebp
114 movl %esp,%ebp
115 pushl %esi
116 pushl %edi
117 pushl %ebx
118
119 movl PARAM1,%edi
120 movl SIGH(%edi),%eax
121 movl SIGL(%edi),%ebx
122 movl PARAM2,%edx
123
124fpu_reg_round: /* Normal entry point */
125 movl PARAM4,%ecx
126
127#ifndef NON_REENTRANT_FPU
128 pushl %ebx /* adjust the stack pointer */
129#endif /* NON_REENTRANT_FPU */
130
131#ifdef PARANOID
132/* Cannot use this here yet */
133/* orl %eax,%eax */
134/* jns L_entry_bugged */
135#endif /* PARANOID */
136
137 cmpw EXP_UNDER,EXP(%edi)
138 jle L_Make_denorm /* The number is a de-normal */
139
140 movb $0,FPU_denormal /* 0 -> not a de-normal */
141
142Denorm_done:
143 movb $0,FPU_bits_lost /* No bits yet lost in rounding */
144
145 movl %ecx,%esi
146 andl CW_PC,%ecx
147 cmpl PR_64_BITS,%ecx
148 je LRound_To_64
149
150 cmpl PR_53_BITS,%ecx
151 je LRound_To_53
152
153 cmpl PR_24_BITS,%ecx
154 je LRound_To_24
155
156#ifdef PECULIAR_486
157/* With the precision control bits set to 01 "(reserved)", a real 80486
158 behaves as if the precision control bits were set to 11 "64 bits" */
159 cmpl PR_RESERVED_BITS,%ecx
160 je LRound_To_64
161#ifdef PARANOID
162 jmp L_bugged_denorm_486
163#endif /* PARANOID */
164#else
165#ifdef PARANOID
166 jmp L_bugged_denorm /* There is no bug, just a bad control word */
167#endif /* PARANOID */
168#endif /* PECULIAR_486 */
169
170
171/* Round etc to 24 bit precision */
172LRound_To_24:
173 movl %esi,%ecx
174 andl CW_RC,%ecx
175 cmpl RC_RND,%ecx
176 je LRound_nearest_24
177
178 cmpl RC_CHOP,%ecx
179 je LCheck_truncate_24
180
181 cmpl RC_UP,%ecx /* Towards +infinity */
182 je LUp_24
183
184 cmpl RC_DOWN,%ecx /* Towards -infinity */
185 je LDown_24
186
187#ifdef PARANOID
188 jmp L_bugged_round24
189#endif /* PARANOID */
190
191LUp_24:
192 cmpb SIGN_POS,PARAM5
193 jne LCheck_truncate_24 /* If negative then up==truncate */
194
195 jmp LCheck_24_round_up
196
197LDown_24:
198 cmpb SIGN_POS,PARAM5
199 je LCheck_truncate_24 /* If positive then down==truncate */
200
201LCheck_24_round_up:
202 movl %eax,%ecx
203 andl $0x000000ff,%ecx
204 orl %ebx,%ecx
205 orl %edx,%ecx
206 jnz LDo_24_round_up
207 jmp L_Re_normalise
208
209LRound_nearest_24:
210 /* Do rounding of the 24th bit if needed (nearest or even) */
211 movl %eax,%ecx
212 andl $0x000000ff,%ecx
213 cmpl $0x00000080,%ecx
214 jc LCheck_truncate_24 /* less than half, no increment needed */
215
216 jne LGreater_Half_24 /* greater than half, increment needed */
217
218 /* Possibly half, we need to check the ls bits */
219 orl %ebx,%ebx
220 jnz LGreater_Half_24 /* greater than half, increment needed */
221
222 orl %edx,%edx
223 jnz LGreater_Half_24 /* greater than half, increment needed */
224
225 /* Exactly half, increment only if 24th bit is 1 (round to even) */
226 testl $0x00000100,%eax
227 jz LDo_truncate_24
228
229LGreater_Half_24: /* Rounding: increment at the 24th bit */
230LDo_24_round_up:
231 andl $0xffffff00,%eax /* Truncate to 24 bits */
232 xorl %ebx,%ebx
233 movb LOST_UP,FPU_bits_lost
234 addl $0x00000100,%eax
235 jmp LCheck_Round_Overflow
236
237LCheck_truncate_24:
238 movl %eax,%ecx
239 andl $0x000000ff,%ecx
240 orl %ebx,%ecx
241 orl %edx,%ecx
242 jz L_Re_normalise /* No truncation needed */
243
244LDo_truncate_24:
245 andl $0xffffff00,%eax /* Truncate to 24 bits */
246 xorl %ebx,%ebx
247 movb LOST_DOWN,FPU_bits_lost
248 jmp L_Re_normalise
249
250
251/* Round etc to 53 bit precision */
252LRound_To_53:
253 movl %esi,%ecx
254 andl CW_RC,%ecx
255 cmpl RC_RND,%ecx
256 je LRound_nearest_53
257
258 cmpl RC_CHOP,%ecx
259 je LCheck_truncate_53
260
261 cmpl RC_UP,%ecx /* Towards +infinity */
262 je LUp_53
263
264 cmpl RC_DOWN,%ecx /* Towards -infinity */
265 je LDown_53
266
267#ifdef PARANOID
268 jmp L_bugged_round53
269#endif /* PARANOID */
270
271LUp_53:
272 cmpb SIGN_POS,PARAM5
273 jne LCheck_truncate_53 /* If negative then up==truncate */
274
275 jmp LCheck_53_round_up
276
277LDown_53:
278 cmpb SIGN_POS,PARAM5
279 je LCheck_truncate_53 /* If positive then down==truncate */
280
281LCheck_53_round_up:
282 movl %ebx,%ecx
283 andl $0x000007ff,%ecx
284 orl %edx,%ecx
285 jnz LDo_53_round_up
286 jmp L_Re_normalise
287
288LRound_nearest_53:
289 /* Do rounding of the 53rd bit if needed (nearest or even) */
290 movl %ebx,%ecx
291 andl $0x000007ff,%ecx
292 cmpl $0x00000400,%ecx
293 jc LCheck_truncate_53 /* less than half, no increment needed */
294
295 jnz LGreater_Half_53 /* greater than half, increment needed */
296
297 /* Possibly half, we need to check the ls bits */
298 orl %edx,%edx
299 jnz LGreater_Half_53 /* greater than half, increment needed */
300
301 /* Exactly half, increment only if 53rd bit is 1 (round to even) */
302 testl $0x00000800,%ebx
303 jz LTruncate_53
304
305LGreater_Half_53: /* Rounding: increment at the 53rd bit */
306LDo_53_round_up:
307 movb LOST_UP,FPU_bits_lost
308 andl $0xfffff800,%ebx /* Truncate to 53 bits */
309 addl $0x00000800,%ebx
310 adcl $0,%eax
311 jmp LCheck_Round_Overflow
312
313LCheck_truncate_53:
314 movl %ebx,%ecx
315 andl $0x000007ff,%ecx
316 orl %edx,%ecx
317 jz L_Re_normalise
318
319LTruncate_53:
320 movb LOST_DOWN,FPU_bits_lost
321 andl $0xfffff800,%ebx /* Truncate to 53 bits */
322 jmp L_Re_normalise
323
324
325/* Round etc to 64 bit precision */
326LRound_To_64:
327 movl %esi,%ecx
328 andl CW_RC,%ecx
329 cmpl RC_RND,%ecx
330 je LRound_nearest_64
331
332 cmpl RC_CHOP,%ecx
333 je LCheck_truncate_64
334
335 cmpl RC_UP,%ecx /* Towards +infinity */
336 je LUp_64
337
338 cmpl RC_DOWN,%ecx /* Towards -infinity */
339 je LDown_64
340
341#ifdef PARANOID
342 jmp L_bugged_round64
343#endif /* PARANOID */
344
345LUp_64:
346 cmpb SIGN_POS,PARAM5
347 jne LCheck_truncate_64 /* If negative then up==truncate */
348
349 orl %edx,%edx
350 jnz LDo_64_round_up
351 jmp L_Re_normalise
352
353LDown_64:
354 cmpb SIGN_POS,PARAM5
355 je LCheck_truncate_64 /* If positive then down==truncate */
356
357 orl %edx,%edx
358 jnz LDo_64_round_up
359 jmp L_Re_normalise
360
361LRound_nearest_64:
362 cmpl $0x80000000,%edx
363 jc LCheck_truncate_64
364
365 jne LDo_64_round_up
366
367 /* Now test for round-to-even */
368 testb $1,%bl
369 jz LCheck_truncate_64
370
371LDo_64_round_up:
372 movb LOST_UP,FPU_bits_lost
373 addl $1,%ebx
374 adcl $0,%eax
375
376LCheck_Round_Overflow:
377 jnc L_Re_normalise
378
379 /* Overflow, adjust the result (significand to 1.0) */
380 rcrl $1,%eax
381 rcrl $1,%ebx
382 incw EXP(%edi)
383 jmp L_Re_normalise
384
385LCheck_truncate_64:
386 orl %edx,%edx
387 jz L_Re_normalise
388
389LTruncate_64:
390 movb LOST_DOWN,FPU_bits_lost
391
392L_Re_normalise:
393 testb $0xff,FPU_denormal
394 jnz Normalise_result
395
396L_Normalised:
397 movl TAG_Valid,%edx
398
399L_deNormalised:
400 cmpb LOST_UP,FPU_bits_lost
401 je L_precision_lost_up
402
403 cmpb LOST_DOWN,FPU_bits_lost
404 je L_precision_lost_down
405
406L_no_precision_loss:
407 /* store the result */
408
409L_Store_significand:
410 movl %eax,SIGH(%edi)
411 movl %ebx,SIGL(%edi)
412
413 cmpw EXP_OVER,EXP(%edi)
414 jge L_overflow
415
416 movl %edx,%eax
417
418 /* Convert the exponent to 80x87 form. */
419 addw EXTENDED_Ebias,EXP(%edi)
420 andw $0x7fff,EXP(%edi)
421
422fpu_reg_round_signed_special_exit:
423
424 cmpb SIGN_POS,PARAM5
425 je fpu_reg_round_special_exit
426
427 orw $0x8000,EXP(%edi) /* Negative sign for the result. */
428
429fpu_reg_round_special_exit:
430
431#ifndef NON_REENTRANT_FPU
432 popl %ebx /* adjust the stack pointer */
433#endif /* NON_REENTRANT_FPU */
434
435fpu_Arith_exit:
436 popl %ebx
437 popl %edi
438 popl %esi
439 leave
440 RET
441
442
443/*
444 * Set the FPU status flags to represent precision loss due to
445 * round-up.
446 */
447L_precision_lost_up:
448 push %edx
449 push %eax
450 call set_precision_flag_up
451 popl %eax
452 popl %edx
453 jmp L_no_precision_loss
454
455/*
456 * Set the FPU status flags to represent precision loss due to
457 * truncation.
458 */
459L_precision_lost_down:
460 push %edx
461 push %eax
462 call set_precision_flag_down
463 popl %eax
464 popl %edx
465 jmp L_no_precision_loss
466
467
468/*
469 * The number is a denormal (which might get rounded up to a normal)
470 * Shift the number right the required number of bits, which will
471 * have to be undone later...
472 */
473L_Make_denorm:
474 /* The action to be taken depends upon whether the underflow
475 exception is masked */
476 testb CW_Underflow,%cl /* Underflow mask. */
477 jz Unmasked_underflow /* Do not make a denormal. */
478
479 movb DENORMAL,FPU_denormal
480
481 pushl %ecx /* Save */
482 movw EXP_UNDER+1,%cx
483 subw EXP(%edi),%cx
484
485 cmpw $64,%cx /* shrd only works for 0..31 bits */
486 jnc Denorm_shift_more_than_63
487
488 cmpw $32,%cx /* shrd only works for 0..31 bits */
489 jnc Denorm_shift_more_than_32
490
491/*
492 * We got here without jumps by assuming that the most common requirement
493 * is for a small de-normalising shift.
494 * Shift by [1..31] bits
495 */
496 addw %cx,EXP(%edi)
497 orl %edx,%edx /* extension */
498 setne %ch /* Save whether %edx is non-zero */
499 xorl %edx,%edx
500 shrd %cl,%ebx,%edx
501 shrd %cl,%eax,%ebx
502 shr %cl,%eax
503 orb %ch,%dl
504 popl %ecx
505 jmp Denorm_done
506
507/* Shift by [32..63] bits */
508Denorm_shift_more_than_32:
509 addw %cx,EXP(%edi)
510 subb $32,%cl
511 orl %edx,%edx
512 setne %ch
513 orb %ch,%bl
514 xorl %edx,%edx
515 shrd %cl,%ebx,%edx
516 shrd %cl,%eax,%ebx
517 shr %cl,%eax
518 orl %edx,%edx /* test these 32 bits */
519 setne %cl
520 orb %ch,%bl
521 orb %cl,%bl
522 movl %ebx,%edx
523 movl %eax,%ebx
524 xorl %eax,%eax
525 popl %ecx
526 jmp Denorm_done
527
528/* Shift by [64..) bits */
529Denorm_shift_more_than_63:
530 cmpw $64,%cx
531 jne Denorm_shift_more_than_64
532
533/* Exactly 64 bit shift */
534 addw %cx,EXP(%edi)
535 xorl %ecx,%ecx
536 orl %edx,%edx
537 setne %cl
538 orl %ebx,%ebx
539 setne %ch
540 orb %ch,%cl
541 orb %cl,%al
542 movl %eax,%edx
543 xorl %eax,%eax
544 xorl %ebx,%ebx
545 popl %ecx
546 jmp Denorm_done
547
548Denorm_shift_more_than_64:
549 movw EXP_UNDER+1,EXP(%edi)
550/* This is easy, %eax must be non-zero, so.. */
551 movl $1,%edx
552 xorl %eax,%eax
553 xorl %ebx,%ebx
554 popl %ecx
555 jmp Denorm_done
556
557
558Unmasked_underflow:
559 movb UNMASKED_UNDERFLOW,FPU_denormal
560 jmp Denorm_done
561
562
563/* Undo the de-normalisation. */
564Normalise_result:
565 cmpb UNMASKED_UNDERFLOW,FPU_denormal
566 je Signal_underflow
567
568/* The number must be a denormal if we got here. */
569#ifdef PARANOID
570 /* But check it... just in case. */
571 cmpw EXP_UNDER+1,EXP(%edi)
572 jne L_norm_bugged
573#endif /* PARANOID */
574
575#ifdef PECULIAR_486
576 /*
577 * This implements a special feature of 80486 behaviour.
578 * Underflow will be signaled even if the number is
579 * not a denormal after rounding.
580 * This difference occurs only for masked underflow, and not
581 * in the unmasked case.
582 * Actual 80486 behaviour differs from this in some circumstances.
583 */
584 orl %eax,%eax /* ms bits */
585 js LPseudoDenormal /* Will be masked underflow */
586#else
587 orl %eax,%eax /* ms bits */
588 js L_Normalised /* No longer a denormal */
589#endif /* PECULIAR_486 */
590
591 jnz LDenormal_adj_exponent
592
593 orl %ebx,%ebx
594 jz L_underflow_to_zero /* The contents are zero */
595
596LDenormal_adj_exponent:
597 decw EXP(%edi)
598
599LPseudoDenormal:
600 testb $0xff,FPU_bits_lost /* bits lost == underflow */
601 movl TAG_Special,%edx
602 jz L_deNormalised
603
604 /* There must be a masked underflow */
605 push %eax
606 pushl EX_Underflow
607 call EXCEPTION
608 popl %eax
609 popl %eax
610 movl TAG_Special,%edx
611 jmp L_deNormalised
612
613
614/*
615 * The operations resulted in a number too small to represent.
616 * Masked response.
617 */
618L_underflow_to_zero:
619 push %eax
620 call set_precision_flag_down
621 popl %eax
622
623 push %eax
624 pushl EX_Underflow
625 call EXCEPTION
626 popl %eax
627 popl %eax
628
629/* Reduce the exponent to EXP_UNDER */
630 movw EXP_UNDER,EXP(%edi)
631 movl TAG_Zero,%edx
632 jmp L_Store_significand
633
634
635/* The operations resulted in a number too large to represent. */
636L_overflow:
637 addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */
638 push %edi
639 call arith_overflow
640 pop %edi
641 jmp fpu_reg_round_signed_special_exit
642
643
644Signal_underflow:
645 /* The number may have been changed to a non-denormal */
646 /* by the rounding operations. */
647 cmpw EXP_UNDER,EXP(%edi)
648 jle Do_unmasked_underflow
649
650 jmp L_Normalised
651
652Do_unmasked_underflow:
653 /* Increase the exponent by the magic number */
654 addw $(3*(1<<13)),EXP(%edi)
655 push %eax
656 pushl EX_Underflow
657 call EXCEPTION
658 popl %eax
659 popl %eax
660 jmp L_Normalised
661
662
663#ifdef PARANOID
664#ifdef PECULIAR_486
665L_bugged_denorm_486:
666 pushl EX_INTERNAL|0x236
667 call EXCEPTION
668 popl %ebx
669 jmp L_exception_exit
670#else
671L_bugged_denorm:
672 pushl EX_INTERNAL|0x230
673 call EXCEPTION
674 popl %ebx
675 jmp L_exception_exit
676#endif /* PECULIAR_486 */
677
678L_bugged_round24:
679 pushl EX_INTERNAL|0x231
680 call EXCEPTION
681 popl %ebx
682 jmp L_exception_exit
683
684L_bugged_round53:
685 pushl EX_INTERNAL|0x232
686 call EXCEPTION
687 popl %ebx
688 jmp L_exception_exit
689
690L_bugged_round64:
691 pushl EX_INTERNAL|0x233
692 call EXCEPTION
693 popl %ebx
694 jmp L_exception_exit
695
696L_norm_bugged:
697 pushl EX_INTERNAL|0x234
698 call EXCEPTION
699 popl %ebx
700 jmp L_exception_exit
701
702L_entry_bugged:
703 pushl EX_INTERNAL|0x235
704 call EXCEPTION
705 popl %ebx
706L_exception_exit:
707 mov $-1,%eax
708 jmp fpu_reg_round_special_exit
709#endif /* PARANOID */
710
711SYM_FUNC_END(FPU_round)
712

source code of linux/arch/x86/math-emu/reg_round.S