reg_round.S source code [linux/arch/x86/math-emu/reg_round.S]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	.file "reg_round.S"
3	/---------------------------------------------------------------------------+*
4	\| reg_round.S \|
5	\| \|
6	\| Rounding/truncation/etc for FPU basic arithmetic functions. \|
7	\| \|
8	\| Copyright (C) 1993,1995,1997 \|
9	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|
10	\| Australia. E-mail billm@suburbia.net \|
11	\| \|
12	\| This code has four possible entry points. \|
13	\| The following must be entered by a jmp instruction: \|
14	\| fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. \|
15	\| \|
16	\| The FPU_round entry point is intended to be used by C code. \|
17	\| From C, call as: \|
18	\| int FPU_round(FPU_REG arg, unsigned int extent, unsigned int control_w) \|*
19	\| \|
20	\| Return value is the tag of the answer, or-ed with FPU_Exception if \|
21	\| one was raised, or -1 on internal error. \|
22	\| \|
23	\| For correct "up" and "down" rounding, the argument must have the correct \|
24	\| sign. \|
25	\| \|
26	+---------------------------------------------------------------------------/*
27
28	/---------------------------------------------------------------------------+*
29	\| Four entry points. \|
30	\| \|
31	\| Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: \|
32	\| %eax:%ebx 64 bit significand \|
33	\| %edx 32 bit extension of the significand \|
34	\| %edi pointer to an FPU_REG for the result to be stored \|
35	\| stack calling function must have set up a C stack frame and \|
36	\| pushed %esi, %edi, and %ebx \|
37	\| \|
38	\| Needed just for the fpu_reg_round_sqrt entry point: \|
39	\| %cx A control word in the same format as the FPU control word. \|
40	\| Otherwise, PARAM4 must give such a value. \|
41	\| \|
42	\| \|
43	\| The significand and its extension are assumed to be exact in the \|
44	\| following sense: \|
45	\| If the significand by itself is the exact result then the significand \|
46	\| extension (%edx) must contain 0, otherwise the significand extension \|
47	\| must be non-zero. \|
48	\| If the significand extension is non-zero then the significand is \|
49	\| smaller than the magnitude of the correct exact result by an amount \|
50	\| greater than zero and less than one ls bit of the significand. \|
51	\| The significand extension is only required to have three possible \|
52	\| non-zero values: \|
53	\| less than 0x80000000 <=> the significand is less than 1/2 an ls \|
54	\| bit smaller than the magnitude of the \|
55	\| true exact result. \|
56	\| exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit \|
57	\| smaller than the magnitude of the true \|
58	\| exact result. \|
59	\| greater than 0x80000000 <=> the significand is more than 1/2 an ls \|
60	\| bit smaller than the magnitude of the \|
61	\| true exact result. \|
62	\| \|
63	+---------------------------------------------------------------------------/*
64
65	/---------------------------------------------------------------------------+*
66	\| The code in this module has become quite complex, but it should handle \|
67	\| all of the FPU flags which are set at this stage of the basic arithmetic \|
68	\| computations. \|
69	\| There are a few rare cases where the results are not set identically to \|
70	\| a real FPU. These require a bit more thought because at this stage the \|
71	\| results of the code here appear to be more consistent... \|
72	\| This may be changed in a future version. \|
73	+---------------------------------------------------------------------------/*
74
75
76	#include "fpu_emu.h"
77	#include "exception.h"
78	#include "control_w.h"
79
80	/ Flags for FPU_bits_lost /
81	#define LOST_DOWN $1
82	#define LOST_UP $2
83
84	/ Flags for FPU_denormal /
85	#define DENORMAL $1
86	#define UNMASKED_UNDERFLOW $2
87
88
89	#ifndef NON_REENTRANT_FPU
90	/ Make the code re-entrant by putting*
91	local storage on the stack: /*
92	#define FPU_bits_lost (%esp)
93	#define FPU_denormal 1(%esp)
94
95	#else
96	/ Not re-entrant, so we can gain speed by putting*
97	local storage in a static area: /*
98	.data
99	.align `4`,`0`
100	FPU_bits_lost:
101	.byte `0`
102	FPU_denormal:
103	.byte `0`
104	#endif /* NON_REENTRANT_FPU */
105
106
107	.text
108	.globl fpu_reg_round
109	.globl fpu_Arith_exit
110
111	/ Entry point when called from C /
112	SYM_FUNC_START(FPU_round)
113	pushl %ebp
114	movl %esp,%ebp
115	pushl %esi
116	pushl %edi
117	pushl %ebx
118
119	movl PARAM1,%edi
120	movl SIGH(%edi),%eax
121	movl SIGL(%edi),%ebx
122	movl PARAM2,%edx
123
124	fpu_reg_round: / Normal entry point /
125	movl PARAM4,%ecx
126
127	#ifndef NON_REENTRANT_FPU
128	pushl %ebx / adjust the stack pointer /
129	#endif /* NON_REENTRANT_FPU */
130
131	#ifdef PARANOID
132	/ Cannot use this here yet /
133	/ orl %eax,%eax /
134	/ jns L_entry_bugged /
135	#endif /* PARANOID */
136
137	cmpw EXP_UNDER,EXP(%edi)
138	jle L_Make_denorm / The number is a de-normal /
139
140	movb $`0`,FPU_denormal / 0 -> not a de-normal /
141
142	Denorm_done:
143	movb $`0`,FPU_bits_lost / No bits yet lost in rounding /
144
145	movl %ecx,%esi
146	andl CW_PC,%ecx
147	cmpl PR_64_BITS,%ecx
148	je LRound_To_64
149
150	cmpl PR_53_BITS,%ecx
151	je LRound_To_53
152
153	cmpl PR_24_BITS,%ecx
154	je LRound_To_24
155
156	#ifdef PECULIAR_486
157	/ With the precision control bits set to 01 "(reserved)", a real 80486*
158	behaves as if the precision control bits were set to 11 "64 bits" /*
159	cmpl PR_RESERVED_BITS,%ecx
160	je LRound_To_64
161	#ifdef PARANOID
162	jmp L_bugged_denorm_486
163	#endif /* PARANOID */
164	#else
165	#ifdef PARANOID
166	jmp L_bugged_denorm / There is no bug, just a bad control word /
167	#endif /* PARANOID */
168	#endif /* PECULIAR_486 */
169
170
171	/ Round etc to 24 bit precision /
172	LRound_To_24:
173	movl %esi,%ecx
174	andl CW_RC,%ecx
175	cmpl RC_RND,%ecx
176	je LRound_nearest_24
177
178	cmpl RC_CHOP,%ecx
179	je LCheck_truncate_24
180
181	cmpl RC_UP,%ecx / Towards +infinity /
182	je LUp_24
183
184	cmpl RC_DOWN,%ecx / Towards -infinity /
185	je LDown_24
186
187	#ifdef PARANOID
188	jmp L_bugged_round24
189	#endif /* PARANOID */
190
191	LUp_24:
192	cmpb SIGN_POS,PARAM5
193	jne LCheck_truncate_24 / If negative then up==truncate /
194
195	jmp LCheck_24_round_up
196
197	LDown_24:
198	cmpb SIGN_POS,PARAM5
199	je LCheck_truncate_24 / If positive then down==truncate /
200
201	LCheck_24_round_up:
202	movl %eax,%ecx
203	andl $`0x000000ff`,%ecx
204	orl %ebx,%ecx
205	orl %edx,%ecx
206	jnz LDo_24_round_up
207	jmp L_Re_normalise
208
209	LRound_nearest_24:
210	/ Do rounding of the 24th bit if needed (nearest or even) /
211	movl %eax,%ecx
212	andl $`0x000000ff`,%ecx
213	cmpl $`0x00000080`,%ecx
214	jc LCheck_truncate_24 / less than half, no increment needed /
215
216	jne LGreater_Half_24 / greater than half, increment needed /
217
218	/ Possibly half, we need to check the ls bits /
219	orl %ebx,%ebx
220	jnz LGreater_Half_24 / greater than half, increment needed /
221
222	orl %edx,%edx
223	jnz LGreater_Half_24 / greater than half, increment needed /
224
225	/ Exactly half, increment only if 24th bit is 1 (round to even) /
226	testl $`0x00000100`,%eax
227	jz LDo_truncate_24
228
229	LGreater_Half_24: / Rounding: increment at the 24th bit /
230	LDo_24_round_up:
231	andl $`0xffffff00`,%eax / Truncate to 24 bits /
232	xorl %ebx,%ebx
233	movb LOST_UP,FPU_bits_lost
234	addl $`0x00000100`,%eax
235	jmp LCheck_Round_Overflow
236
237	LCheck_truncate_24:
238	movl %eax,%ecx
239	andl $`0x000000ff`,%ecx
240	orl %ebx,%ecx
241	orl %edx,%ecx
242	jz L_Re_normalise / No truncation needed /
243
244	LDo_truncate_24:
245	andl $`0xffffff00`,%eax / Truncate to 24 bits /
246	xorl %ebx,%ebx
247	movb LOST_DOWN,FPU_bits_lost
248	jmp L_Re_normalise
249
250
251	/ Round etc to 53 bit precision /
252	LRound_To_53:
253	movl %esi,%ecx
254	andl CW_RC,%ecx
255	cmpl RC_RND,%ecx
256	je LRound_nearest_53
257
258	cmpl RC_CHOP,%ecx
259	je LCheck_truncate_53
260
261	cmpl RC_UP,%ecx / Towards +infinity /
262	je LUp_53
263
264	cmpl RC_DOWN,%ecx / Towards -infinity /
265	je LDown_53
266
267	#ifdef PARANOID
268	jmp L_bugged_round53
269	#endif /* PARANOID */
270
271	LUp_53:
272	cmpb SIGN_POS,PARAM5
273	jne LCheck_truncate_53 / If negative then up==truncate /
274
275	jmp LCheck_53_round_up
276
277	LDown_53:
278	cmpb SIGN_POS,PARAM5
279	je LCheck_truncate_53 / If positive then down==truncate /
280
281	LCheck_53_round_up:
282	movl %ebx,%ecx
283	andl $`0x000007ff`,%ecx
284	orl %edx,%ecx
285	jnz LDo_53_round_up
286	jmp L_Re_normalise
287
288	LRound_nearest_53:
289	/ Do rounding of the 53rd bit if needed (nearest or even) /
290	movl %ebx,%ecx
291	andl $`0x000007ff`,%ecx
292	cmpl $`0x00000400`,%ecx
293	jc LCheck_truncate_53 / less than half, no increment needed /
294
295	jnz LGreater_Half_53 / greater than half, increment needed /
296
297	/ Possibly half, we need to check the ls bits /
298	orl %edx,%edx
299	jnz LGreater_Half_53 / greater than half, increment needed /
300
301	/ Exactly half, increment only if 53rd bit is 1 (round to even) /
302	testl $`0x00000800`,%ebx
303	jz LTruncate_53
304
305	LGreater_Half_53: / Rounding: increment at the 53rd bit /
306	LDo_53_round_up:
307	movb LOST_UP,FPU_bits_lost
308	andl $`0xfffff800`,%ebx / Truncate to 53 bits /
309	addl $`0x00000800`,%ebx
310	adcl $`0`,%eax
311	jmp LCheck_Round_Overflow
312
313	LCheck_truncate_53:
314	movl %ebx,%ecx
315	andl $`0x000007ff`,%ecx
316	orl %edx,%ecx
317	jz L_Re_normalise
318
319	LTruncate_53:
320	movb LOST_DOWN,FPU_bits_lost
321	andl $`0xfffff800`,%ebx / Truncate to 53 bits /
322	jmp L_Re_normalise
323
324
325	/ Round etc to 64 bit precision /
326	LRound_To_64:
327	movl %esi,%ecx
328	andl CW_RC,%ecx
329	cmpl RC_RND,%ecx
330	je LRound_nearest_64
331
332	cmpl RC_CHOP,%ecx
333	je LCheck_truncate_64
334
335	cmpl RC_UP,%ecx / Towards +infinity /
336	je LUp_64
337
338	cmpl RC_DOWN,%ecx / Towards -infinity /
339	je LDown_64
340
341	#ifdef PARANOID
342	jmp L_bugged_round64
343	#endif /* PARANOID */
344
345	LUp_64:
346	cmpb SIGN_POS,PARAM5
347	jne LCheck_truncate_64 / If negative then up==truncate /
348
349	orl %edx,%edx
350	jnz LDo_64_round_up
351	jmp L_Re_normalise
352
353	LDown_64:
354	cmpb SIGN_POS,PARAM5
355	je LCheck_truncate_64 / If positive then down==truncate /
356
357	orl %edx,%edx
358	jnz LDo_64_round_up
359	jmp L_Re_normalise
360
361	LRound_nearest_64:
362	cmpl $`0x80000000`,%edx
363	jc LCheck_truncate_64
364
365	jne LDo_64_round_up
366
367	/ Now test for round-to-even /
368	testb $`1`,%bl
369	jz LCheck_truncate_64
370
371	LDo_64_round_up:
372	movb LOST_UP,FPU_bits_lost
373	addl $`1`,%ebx
374	adcl $`0`,%eax
375
376	LCheck_Round_Overflow:
377	jnc L_Re_normalise
378
379	/ Overflow, adjust the result (significand to 1.0) /
380	rcrl $`1`,%eax
381	rcrl $`1`,%ebx
382	incw EXP(%edi)
383	jmp L_Re_normalise
384
385	LCheck_truncate_64:
386	orl %edx,%edx
387	jz L_Re_normalise
388
389	LTruncate_64:
390	movb LOST_DOWN,FPU_bits_lost
391
392	L_Re_normalise:
393	testb $`0xff`,FPU_denormal
394	jnz Normalise_result
395
396	L_Normalised:
397	movl TAG_Valid,%edx
398
399	L_deNormalised:
400	cmpb LOST_UP,FPU_bits_lost
401	je L_precision_lost_up
402
403	cmpb LOST_DOWN,FPU_bits_lost
404	je L_precision_lost_down
405
406	L_no_precision_loss:
407	/ store the result /
408
409	L_Store_significand:
410	movl %eax,SIGH(%edi)
411	movl %ebx,SIGL(%edi)
412
413	cmpw EXP_OVER,EXP(%edi)
414	jge L_overflow
415
416	movl %edx,%eax
417
418	/ Convert the exponent to 80x87 form. /
419	addw EXTENDED_Ebias,EXP(%edi)
420	andw $`0x7fff`,EXP(%edi)
421
422	fpu_reg_round_signed_special_exit:
423
424	cmpb SIGN_POS,PARAM5
425	je fpu_reg_round_special_exit
426
427	orw $`0x8000`,EXP(%edi) / Negative sign for the result. /
428
429	fpu_reg_round_special_exit:
430
431	#ifndef NON_REENTRANT_FPU
432	popl %ebx / adjust the stack pointer /
433	#endif /* NON_REENTRANT_FPU */
434
435	fpu_Arith_exit:
436	popl %ebx
437	popl %edi
438	popl %esi
439	leave
440	RET
441
442
443	/*
444	* Set the FPU status flags to represent precision loss due to
445	* round-up.
446	*/
447	L_precision_lost_up:
448	push %edx
449	push %eax
450	call set_precision_flag_up
451	popl %eax
452	popl %edx
453	jmp L_no_precision_loss
454
455	/*
456	* Set the FPU status flags to represent precision loss due to
457	* truncation.
458	*/
459	L_precision_lost_down:
460	push %edx
461	push %eax
462	call set_precision_flag_down
463	popl %eax
464	popl %edx
465	jmp L_no_precision_loss
466
467
468	/*
469	* The number is a denormal (which might get rounded up to a normal)
470	* Shift the number right the required number of bits, which will
471	* have to be undone later...
472	*/
473	L_Make_denorm:
474	/ The action to be taken depends upon whether the underflow*
475	exception is masked /*
476	testb CW_Underflow,%cl / Underflow mask. /
477	jz Unmasked_underflow / Do not make a denormal. /
478
479	movb DENORMAL,FPU_denormal
480
481	pushl %ecx / Save /
482	movw EXP_UNDER+`1`,%cx
483	subw EXP(%edi),%cx
484
485	cmpw $`64`,%cx / shrd only works for 0..31 bits /
486	jnc Denorm_shift_more_than_63
487
488	cmpw $`32`,%cx / shrd only works for 0..31 bits /
489	jnc Denorm_shift_more_than_32
490
491	/*
492	* We got here without jumps by assuming that the most common requirement
493	* is for a small de-normalising shift.
494	* Shift by [1..31] bits
495	*/
496	addw %cx,EXP(%edi)
497	orl %edx,%edx / extension /
498	setne %ch / Save whether %edx is non-zero /
499	xorl %edx,%edx
500	shrd %cl,%ebx,%edx
501	shrd %cl,%eax,%ebx
502	shr %cl,%eax
503	orb %ch,%dl
504	popl %ecx
505	jmp Denorm_done
506
507	/ Shift by [32..63] bits /
508	Denorm_shift_more_than_32:
509	addw %cx,EXP(%edi)
510	subb $`32`,%cl
511	orl %edx,%edx
512	setne %ch
513	orb %ch,%bl
514	xorl %edx,%edx
515	shrd %cl,%ebx,%edx
516	shrd %cl,%eax,%ebx
517	shr %cl,%eax
518	orl %edx,%edx / test these 32 bits /
519	setne %cl
520	orb %ch,%bl
521	orb %cl,%bl
522	movl %ebx,%edx
523	movl %eax,%ebx
524	xorl %eax,%eax
525	popl %ecx
526	jmp Denorm_done
527
528	/ Shift by [64..) bits /
529	Denorm_shift_more_than_63:
530	cmpw $`64`,%cx
531	jne Denorm_shift_more_than_64
532
533	/ Exactly 64 bit shift /
534	addw %cx,EXP(%edi)
535	xorl %ecx,%ecx
536	orl %edx,%edx
537	setne %cl
538	orl %ebx,%ebx
539	setne %ch
540	orb %ch,%cl
541	orb %cl,%al
542	movl %eax,%edx
543	xorl %eax,%eax
544	xorl %ebx,%ebx
545	popl %ecx
546	jmp Denorm_done
547
548	Denorm_shift_more_than_64:
549	movw EXP_UNDER+`1`,EXP(%edi)
550	/ This is easy, %eax must be non-zero, so.. /
551	movl $`1`,%edx
552	xorl %eax,%eax
553	xorl %ebx,%ebx
554	popl %ecx
555	jmp Denorm_done
556
557
558	Unmasked_underflow:
559	movb UNMASKED_UNDERFLOW,FPU_denormal
560	jmp Denorm_done
561
562
563	/ Undo the de-normalisation. /
564	Normalise_result:
565	cmpb UNMASKED_UNDERFLOW,FPU_denormal
566	je Signal_underflow
567
568	/ The number must be a denormal if we got here. /
569	#ifdef PARANOID
570	/ But check it... just in case. /
571	cmpw EXP_UNDER+`1`,EXP(%edi)
572	jne L_norm_bugged
573	#endif /* PARANOID */
574
575	#ifdef PECULIAR_486
576	/*
577	* This implements a special feature of 80486 behaviour.
578	* Underflow will be signaled even if the number is
579	* not a denormal after rounding.
580	* This difference occurs only for masked underflow, and not
581	* in the unmasked case.
582	* Actual 80486 behaviour differs from this in some circumstances.
583	*/
584	orl %eax,%eax / ms bits /
585	js LPseudoDenormal / Will be masked underflow /
586	#else
587	orl %eax,%eax / ms bits /
588	js L_Normalised / No longer a denormal /
589	#endif /* PECULIAR_486 */
590
591	jnz LDenormal_adj_exponent
592
593	orl %ebx,%ebx
594	jz L_underflow_to_zero / The contents are zero /
595
596	LDenormal_adj_exponent:
597	decw EXP(%edi)
598
599	LPseudoDenormal:
600	testb $`0xff`,FPU_bits_lost / bits lost == underflow /
601	movl TAG_Special,%edx
602	jz L_deNormalised
603
604	/ There must be a masked underflow /
605	push %eax
606	pushl EX_Underflow
607	call EXCEPTION
608	popl %eax
609	popl %eax
610	movl TAG_Special,%edx
611	jmp L_deNormalised
612
613
614	/*
615	* The operations resulted in a number too small to represent.
616	* Masked response.
617	*/
618	L_underflow_to_zero:
619	push %eax
620	call set_precision_flag_down
621	popl %eax
622
623	push %eax
624	pushl EX_Underflow
625	call EXCEPTION
626	popl %eax
627	popl %eax
628
629	/ Reduce the exponent to EXP_UNDER /
630	movw EXP_UNDER,EXP(%edi)
631	movl TAG_Zero,%edx
632	jmp L_Store_significand
633
634
635	/ The operations resulted in a number too large to represent. /
636	L_overflow:
637	addw EXTENDED_Ebias,EXP(%edi) / Set for unmasked response. /
638	push %edi
639	call arith_overflow
640	pop %edi
641	jmp fpu_reg_round_signed_special_exit
642
643
644	Signal_underflow:
645	/ The number may have been changed to a non-denormal /
646	/ by the rounding operations. /
647	cmpw EXP_UNDER,EXP(%edi)
648	jle Do_unmasked_underflow
649
650	jmp L_Normalised
651
652	Do_unmasked_underflow:
653	/ Increase the exponent by the magic number /
654	addw $(`3`*(`1`<<`13`)),EXP(%edi)
655	push %eax
656	pushl EX_Underflow
657	call EXCEPTION
658	popl %eax
659	popl %eax
660	jmp L_Normalised
661
662
663	#ifdef PARANOID
664	#ifdef PECULIAR_486
665	L_bugged_denorm_486:
666	pushl EX_INTERNAL\|`0x236`
667	call EXCEPTION
668	popl %ebx
669	jmp L_exception_exit
670	#else
671	L_bugged_denorm:
672	pushl EX_INTERNAL\|`0x230`
673	call EXCEPTION
674	popl %ebx
675	jmp L_exception_exit
676	#endif /* PECULIAR_486 */
677
678	L_bugged_round24:
679	pushl EX_INTERNAL\|`0x231`
680	call EXCEPTION
681	popl %ebx
682	jmp L_exception_exit
683
684	L_bugged_round53:
685	pushl EX_INTERNAL\|`0x232`
686	call EXCEPTION
687	popl %ebx
688	jmp L_exception_exit
689
690	L_bugged_round64:
691	pushl EX_INTERNAL\|`0x233`
692	call EXCEPTION
693	popl %ebx
694	jmp L_exception_exit
695
696	L_norm_bugged:
697	pushl EX_INTERNAL\|`0x234`
698	call EXCEPTION
699	popl %ebx
700	jmp L_exception_exit
701
702	L_entry_bugged:
703	pushl EX_INTERNAL\|`0x235`
704	call EXCEPTION
705	popl %ebx
706	L_exception_exit:
707	mov $-`1`,%eax
708	jmp fpu_reg_round_special_exit
709	#endif /* PARANOID */
710
711	SYM_FUNC_END(FPU_round)
712

source code of linux/arch/x86/math-emu/reg_round.S