e_expl.S source code [glibc/sysdeps/i386/fpu/e_expl.S]

1	/*
2	* Public domain.
3	*
4	*/
5
6	/*
7	* The 8087 method for the exponential function is to calculate
8	* exp(x) = 2^(x log2(e))
9	* after separating integer and fractional parts
10	* x log2(e) = i + f, \|f\| <= .5
11	* 2^i is immediate but f needs to be precise for long double accuracy.
12	* Suppress range reduction error in computing f by the following.
13	* Separate x into integer and fractional parts
14	* x = xi + xf, \|xf\| <= .5
15	* Separate log2(e) into the sum of an exact number c0 and small part c1.
16	* c0 + c1 = log2(e) to extra precision
17	* Then
18	* f = (c0 xi - i) + c0 xf + c1 x
19	* where c0 xi is exact and so also is (c0 xi - i).
20	* -- moshier@na-net.ornl.gov
21	*/
22
23	#include <libm-alias-ldouble.h>
24	#include <machine/asm.h>
25	#include <i386-math-asm.h>
26	#include <libm-alias-finite.h>
27
28	#ifdef USE_AS_EXP10L
29	# define IEEE754_EXPL __ieee754_exp10l
30	# define FLDLOG fldl2t
31	#elif defined USE_AS_EXPM1L
32	# define IEEE754_EXPL __expm1l
33	# define FLDLOG fldl2e
34	#else
35	# define IEEE754_EXPL __ieee754_expl
36	# define FLDLOG fldl2e
37	#endif
38
39	.section .rodata.cst16,"aM",@progbits,`16`
40
41	.p2align `4`
42	#ifdef USE_AS_EXP10L
43	.type c0,@object
44	c0: .byte `0`, `0`, `0`, `0`, `0`, `0`, `0x9a`, `0xd4`, `0x00`, `0x40`
45	.byte `0`, `0`, `0`, `0`, `0`, `0`
46	ASM_SIZE_DIRECTIVE(c0)
47	.type c1,@object
48	c1: .byte `0x58`, `0x92`, `0xfc`, `0x15`, `0x37`, `0x9a`, `0x97`, `0xf0`, `0xef`, `0x3f`
49	.byte `0`, `0`, `0`, `0`, `0`, `0`
50	ASM_SIZE_DIRECTIVE(c1)
51	#else
52	.type c0,@object
53	c0: .byte `0`, `0`, `0`, `0`, `0`, `0`, `0xaa`, `0xb8`, `0xff`, `0x3f`
54	.byte `0`, `0`, `0`, `0`, `0`, `0`
55	ASM_SIZE_DIRECTIVE(c0)
56	.type c1,@object
57	c1: .byte `0x20`, `0xfa`, `0xee`, `0xc2`, `0x5f`, `0x70`, `0xa5`, `0xec`, `0xed`, `0x3f`
58	.byte `0`, `0`, `0`, `0`, `0`, `0`
59	ASM_SIZE_DIRECTIVE(c1)
60	#endif
61	#ifndef USE_AS_EXPM1L
62	.type csat,@object
63	csat: .byte `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0x80`, `0x0e`, `0x40`
64	.byte `0`, `0`, `0`, `0`, `0`, `0`
65	ASM_SIZE_DIRECTIVE(csat)
66	DEFINE_LDBL_MIN
67	#endif
68
69	#ifdef PIC
70	# define MO(op) op##@GOTOFF(%ecx)
71	#else
72	# define MO(op) op
73	#endif
74
75	.text
76	ENTRY(IEEE754_EXPL)
77	#ifdef USE_AS_EXPM1L
78	movzwl `4`+`8`(%esp), %eax
79	xorb $`0x80`, %ah // invert sign bit (now 1 is "positive")
80	cmpl $`0xc006`, %eax // is num positive and exp >= 6 (number is >= 128.0)?
81	jae HIDDEN_JUMPTARGET (__expl) // (if num is denormal, it is at least >= 64.0)
82	#endif
83	fldt `4`(%esp)
84	/ I added the following ugly construct because expl(+-Inf) resulted*
85	in NaN. The ugliness results from the bright minds at Intel.
86	For the i686 the code can be written better.
87	-- drepper@cygnus.com. /*
88	fxam / Is NaN or +-Inf? /
89	#ifdef PIC
90	LOAD_PIC_REG (cx)
91	#endif
92	#ifdef USE_AS_EXPM1L
93	xorb $`0x80`, %ah
94	cmpl $`0xc006`, %eax
95	fstsw %ax
96	movb $`0x45`, %dh
97	jb `4f`
98
99	/ Below -64.0 (may be -NaN or -Inf). /
100	andb %ah, %dh
101	cmpb $`0x01`, %dh
102	je `6f` / Is +-NaN, jump. /
103	jmp `1f` / -large, possibly -Inf. /
104
105	`4`: / In range -64.0 to 64.0 (may be +-0 but not NaN or +-Inf). /
106	/ Test for +-0 as argument. /
107	andb %ah, %dh
108	cmpb $`0x40`, %dh
109	je `2f`
110
111	/ Test for arguments that are small but not subnormal. /
112	movzwl `4`+`8`(%esp), %eax
113	andl $`0x7fff`, %eax
114	cmpl $`0x3fbf`, %eax
115	jge `3f`
116	/ Argument's exponent below -64; avoid spurious underflow if*
117	normal. /*
118	cmpl $`0x0001`, %eax
119	jge `2f`
120	/ Force underflow and return the argument, to avoid wrong signs*
121	of zero results from the code below in some rounding modes. /*
122	fld %st
123	fmul %st
124	fstp %st
125	jmp `2f`
126	#else
127	movzwl `4`+`8`(%esp), %eax
128	andl $`0x7fff`, %eax
129	cmpl $`0x400d`, %eax
130	jg `5f`
131	cmpl $`0x3fbc`, %eax
132	jge `3f`
133	/ Argument's exponent below -67, result rounds to 1. /
134	fld1
135	faddp
136	jmp `2f`
137	`5`: / Overflow, underflow or infinity or NaN as argument. /
138	fstsw %ax
139	movb $`0x45`, %dh
140	andb %ah, %dh
141	cmpb $`0x05`, %dh
142	je `1f` / Is +-Inf, jump. /
143	cmpb $`0x01`, %dh
144	je `6f` / Is +-NaN, jump. /
145	/ Overflow or underflow; saturate. /
146	fstp %st
147	fldt MO(csat)
148	andb $`2`, %ah
149	jz `3f`
150	fchs
151	#endif
152	`3`: FLDLOG / 1 log2(base) /
153	fmul %st(`1`), %st / 1 x log2(base) /
154	/ Set round-to-nearest temporarily. /
155	subl $`8`, %esp
156	cfi_adjust_cfa_offset (`8`)
157	fstcw `4`(%esp)
158	movl $`0xf3ff`, %edx
159	andl `4`(%esp), %edx
160	movl %edx, (%esp)
161	fldcw (%esp)
162	frndint / 1 i /
163	fld %st(`1`) / 2 x /
164	frndint / 2 xi /
165	fldcw `4`(%esp)
166	addl $`8`, %esp
167	cfi_adjust_cfa_offset (-`8`)
168	fld %st(`1`) / 3 i /
169	fldt MO(c0) / 4 c0 /
170	fld %st(`2`) / 5 xi /
171	fmul %st(`1`), %st / 5 c0 xi /
172	fsubp %st, %st(`2`) / 4 f = c0 xi - i /
173	fld %st(`4`) / 5 x /
174	fsub %st(`3`), %st / 5 xf = x - xi /
175	fmulp %st, %st(`1`) / 4 c0 xf /
176	faddp %st, %st(`1`) / 3 f = f + c0 xf /
177	fldt MO(c1) / 4 /
178	fmul %st(`4`), %st / 4 c1 * x /
179	faddp %st, %st(`1`) / 3 f = f + c1 * x /
180	f2xm1 / 3 2^(fract(x * log2(base))) - 1 /
181	#ifdef USE_AS_EXPM1L
182	fstp %st(`1`) / 2 /
183	fscale / 2 scale factor is st(1); base^x - 2^i /
184	fxch / 2 i /
185	fld1 / 3 1.0 /
186	fscale / 3 2^i /
187	fld1 / 4 1.0 /
188	fsubrp %st, %st(`1`) / 3 2^i - 1.0 /
189	fstp %st(`1`) / 2 /
190	faddp %st, %st(`1`) / 1 base^x - 1.0 /
191	#else
192	fld1 / 4 1.0 /
193	faddp / 3 2^(fract(x * log2(base))) /
194	fstp %st(`1`) / 2 /
195	fscale / 2 scale factor is st(1); base^x /
196	fstp %st(`1`) / 1 /
197	LDBL_CHECK_FORCE_UFLOW_NONNEG
198	#endif
199	fstp %st(`1`) / 0 /
200	jmp `2f`
201	`1`:
202	#ifdef USE_AS_EXPM1L
203	/ For expm1l, only negative sign gets here. /
204	fstp %st
205	fld1
206	fchs
207	#else
208	testl $`0x200`, %eax / Test sign. /
209	jz `2f` / If positive, jump. /
210	fstp %st
211	fldz / Set result to 0. /
212	#endif
213	`2`: ret
214	`6`: / NaN argument. /
215	fadd %st
216	ret
217	END(IEEE754_EXPL)
218
219	#ifdef USE_AS_EXPM1L
220	libm_hidden_def (__expm1l)
221	libm_alias_ldouble (__expm1, expm1)
222	#elif defined USE_AS_EXP10L
223	libm_alias_finite (__ieee754_exp10l, __exp10l)
224	#else
225	libm_alias_finite (__ieee754_expl, __expl)
226	#endif
227

source code of glibc/sysdeps/i386/fpu/e_expl.S