e_pow.S source code [glibc/sysdeps/i386/fpu/e_pow.S]

1	/ ix87 specific implementation of pow function.*
2	Copyright (C) 1996-2022 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <machine/asm.h>
20	#include <i386-math-asm.h>
21	#include <libm-alias-finite.h>
22
23	.section .rodata.cst8,"aM",@progbits,`8`
24
25	.p2align `3`
26	.type one,@object
27	one: .double `1.0`
28	ASM_SIZE_DIRECTIVE(one)
29	.type limit,@object
30	limit: .double `0.29`
31	ASM_SIZE_DIRECTIVE(limit)
32	.type p63,@object
33	p63: .byte `0`, `0`, `0`, `0`, `0`, `0`, `0xe0`, `0x43`
34	ASM_SIZE_DIRECTIVE(p63)
35	.type p10,@object
36	p10: .byte `0`, `0`, `0`, `0`, `0`, `0`, `0x90`, `0x40`
37	ASM_SIZE_DIRECTIVE(p10)
38
39	.section .rodata.cst16,"aM",@progbits,`16`
40
41	.p2align `3`
42	.type infinity,@object
43	inf_zero:
44	infinity:
45	.byte `0`, `0`, `0`, `0`, `0`, `0`, `0xf0`, `0x7f`
46	ASM_SIZE_DIRECTIVE(infinity)
47	.type zero,@object
48	zero: .double `0.0`
49	ASM_SIZE_DIRECTIVE(zero)
50	.type minf_mzero,@object
51	minf_mzero:
52	minfinity:
53	.byte `0`, `0`, `0`, `0`, `0`, `0`, `0xf0`, `0xff`
54	mzero:
55	.byte `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0x80`
56	ASM_SIZE_DIRECTIVE(minf_mzero)
57	DEFINE_DBL_MIN
58
59	#ifdef PIC
60	# define MO(op) op##@GOTOFF(%ecx)
61	# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f)
62	#else
63	# define MO(op) op
64	# define MOX(op,x,f) op(,x,f)
65	#endif
66
67	.text
68	ENTRY(__ieee754_pow)
69	fldl `12`(%esp) // y
70	fxam
71
72	#ifdef PIC
73	LOAD_PIC_REG (cx)
74	#endif
75
76	fnstsw
77	movb %ah, %dl
78	andb $`0x45`, %ah
79	cmpb $`0x40`, %ah // is y == 0 ?
80	je `11f`
81
82	cmpb $`0x05`, %ah // is y == ±inf ?
83	je `12f`
84
85	cmpb $`0x01`, %ah // is y == NaN ?
86	je `30f`
87
88	fldl `4`(%esp) // x : y
89
90	subl $`8`,%esp
91	cfi_adjust_cfa_offset (`8`)
92
93	fxam
94	fnstsw
95	movb %ah, %dh
96	andb $`0x45`, %ah
97	cmpb $`0x40`, %ah
98	je `20f` // x is ±0
99
100	cmpb $`0x05`, %ah
101	je `15f` // x is ±inf
102
103	cmpb $`0x01`, %ah
104	je `32f` // x is NaN
105
106	fxch // y : x
107
108	/ fistpll raises invalid exception for \|y\| >= 1L<<63. /
109	fld %st // y : y : x
110	fabs // \|y\| : y : x
111	fcompl MO(p63) // y : x
112	fnstsw
113	sahf
114	jnc `2f`
115
116	/ First see whether `y' is a natural number. In this case we*
117	can use a more precise algorithm. /*
118	fld %st // y : y : x
119	fistpll (%esp) // y : x
120	fildll (%esp) // int(y) : y : x
121	fucomp %st(`1`) // y : x
122	fnstsw
123	sahf
124	jne `3f`
125
126	/ OK, we have an integer value for y. If large enough that*
127	errors may propagate out of the 11 bits excess precision, use
128	the algorithm for real exponent instead. /*
129	fld %st // y : y : x
130	fabs // \|y\| : y : x
131	fcompl MO(p10) // y : x
132	fnstsw
133	sahf
134	jnc `2f`
135	popl %eax
136	cfi_adjust_cfa_offset (-`4`)
137	popl %edx
138	cfi_adjust_cfa_offset (-`4`)
139	orl $`0`, %edx
140	fstp %st(`0`) // x
141	jns `4f` // y >= 0, jump
142	fdivrl MO(one) // 1/x (now referred to as x)
143	negl %eax
144	adcl $`0`, %edx
145	negl %edx
146	`4`: fldl MO(one) // 1 : x
147	fxch
148
149	/ If y is even, take the absolute value of x. Otherwise,*
150	ensure all intermediate values that might overflow have the
151	sign of x. /*
152	testb $`1`, %al
153	jnz `6f`
154	fabs
155
156	`6`: shrdl $`1`, %edx, %eax
157	jnc `5f`
158	fxch
159	fabs
160	fmul %st(`1`) // x : STx*
161	fxch
162	`5`: fld %st // x : x : STx*
163	fabs // \|x\| : x : STx*
164	fmulp // \|x\|x : STx
165	shrl $`1`, %edx
166	movl %eax, %ecx
167	orl %edx, %ecx
168	jnz `6b`
169	fstp %st(`0`) // STx*
170	#ifdef PIC
171	LOAD_PIC_REG (cx)
172	#endif
173	DBL_NARROW_EVAL_UFLOW_NONNAN
174	ret
175
176	/ y is ±NAN /
177	`30`: fldl `4`(%esp) // x : y
178	fldl MO(one) // 1.0 : x : y
179	fucomp %st(`1`) // x : y
180	fnstsw
181	sahf
182	je `31f`
183	fxch // y : x
184	`31`: fstp %st(`1`)
185	ret
186
187	cfi_adjust_cfa_offset (`8`)
188	`32`: addl $`8`, %esp
189	cfi_adjust_cfa_offset (-`8`)
190	fstp %st(`1`)
191	ret
192
193	cfi_adjust_cfa_offset (`8`)
194	.align ALIGNARG(`4`)
195	`2`: // y is a large integer (absolute value at least 1L<<10), but
196	// may be odd unless at least 1L<<64. So it may be necessary
197	// to adjust the sign of a negative result afterwards.
198	fxch // x : y
199	fabs // \|x\| : y
200	fxch // y : x
201	.align ALIGNARG(`4`)
202	`3`: / y is a real number. /
203	fxch // x : y
204	fldl MO(one) // 1.0 : x : y
205	fldl MO(limit) // 0.29 : 1.0 : x : y
206	fld %st(`2`) // x : 0.29 : 1.0 : x : y
207	fsub %st(`2`) // x-1 : 0.29 : 1.0 : x : y
208	fabs // \|x-1\| : 0.29 : 1.0 : x : y
209	fucompp // 1.0 : x : y
210	fnstsw
211	fxch // x : 1.0 : y
212	sahf
213	ja `7f`
214	fsub %st(`1`) // x-1 : 1.0 : y
215	fyl2xp1 // log2(x) : y
216	jmp `8f`
217
218	`7`: fyl2x // log2(x) : y
219	`8`: fmul %st(`1`) // ylog2(x) : y*
220	fst %st(`1`) // ylog2(x) : ylog2(x)
221	frndint // int(ylog2(x)) : ylog2(x)
222	fsubr %st, %st(`1`) // int(ylog2(x)) : fract(ylog2(x))
223	fxch // fract(ylog2(x)) : int(ylog2(x))
224	f2xm1 // 2^fract(ylog2(x))-1 : int(ylog2(x))
225	faddl MO(one) // 2^fract(ylog2(x)) : int(ylog2(x))
226
227	// Before scaling, we must negate if x is negative and y is an
228	// odd integer.
229	testb $`2`, %dh
230	jz `291f`
231	// x is negative. If y is an odd integer, negate the result.
232	fldl `20`(%esp) // y : 2^fract(ylog2(x)) : int(ylog2(x))
233	fld %st // y : y : 2^fract(ylog2(x)) : int(ylog2(x))
234	fabs // \|y\| : y : 2^fract(ylog2(x)) : int(ylog2(x))
235	fcompl MO(p63) // y : 2^fract(ylog2(x)) : int(ylog2(x))
236	fnstsw
237	sahf
238	jnc `290f`
239
240	// We must find out whether y is an odd integer.
241	fld %st // y : y : 2^fract(ylog2(x)) : int(ylog2(x))
242	fistpll (%esp) // y : 2^fract(ylog2(x)) : int(ylog2(x))
243	fildll (%esp) // int(y) : y : 2^fract(ylog2(x)) : int(ylog2(x))
244	fucompp // 2^fract(ylog2(x)) : int(ylog2(x))
245	fnstsw
246	sahf
247	jne `291f`
248
249	// OK, the value is an integer, but is it odd?
250	popl %eax
251	cfi_adjust_cfa_offset (-`4`)
252	popl %edx
253	cfi_adjust_cfa_offset (-`4`)
254	andb $`1`, %al
255	jz `292f` // jump if not odd
256	// It's an odd integer.
257	fchs
258	jmp `292f`
259
260	cfi_adjust_cfa_offset (`8`)
261	`290`: fstp %st(`0`) // 2^fract(ylog2(x)) : int(ylog2(x))
262	`291`: addl $`8`, %esp
263	cfi_adjust_cfa_offset (-`8`)
264	`292`: fscale // +/- 2^fract(ylog2(x))2^int(ylog2(x)) : int(ylog2(x))
265	fstp %st(`1`) // +/- 2^fract(ylog2(x))2^int(ylog2(x))*
266	DBL_NARROW_EVAL_UFLOW_NONNAN
267	ret
268
269
270	// pow(x,±0) = 1
271	.align ALIGNARG(`4`)
272	`11`: fstp %st(`0`) // pop y
273	fldl MO(one)
274	ret
275
276	// y == ±inf
277	.align ALIGNARG(`4`)
278	`12`: fstp %st(`0`) // pop y
279	fldl MO(one) // 1
280	fldl `4`(%esp) // x : 1
281	fabs // abs(x) : 1
282	fucompp // < 1, == 1, or > 1
283	fnstsw
284	andb $`0x45`, %ah
285	cmpb $`0x45`, %ah
286	je `13f` // jump if x is NaN
287
288	cmpb $`0x40`, %ah
289	je `14f` // jump if \|x\| == 1
290
291	shlb $`1`, %ah
292	xorb %ah, %dl
293	andl $`2`, %edx
294	fldl MOX(inf_zero, %edx, `4`)
295	ret
296
297	.align ALIGNARG(`4`)
298	`14`: fldl MO(one)
299	ret
300
301	.align ALIGNARG(`4`)
302	`13`: fldl `4`(%esp) // load x == NaN
303	ret
304
305	cfi_adjust_cfa_offset (`8`)
306	.align ALIGNARG(`4`)
307	// x is ±inf
308	`15`: fstp %st(`0`) // y
309	testb $`2`, %dh
310	jz `16f` // jump if x == +inf
311
312	// fistpll raises invalid exception for \|y\| >= 1L<<63, so test
313	// that (in which case y is certainly even) before testing
314	// whether y is odd.
315	fld %st // y : y
316	fabs // \|y\| : y
317	fcompl MO(p63) // y
318	fnstsw
319	sahf
320	jnc `16f`
321
322	// We must find out whether y is an odd integer.
323	fld %st // y : y
324	fistpll (%esp) // y
325	fildll (%esp) // int(y) : y
326	fucompp // <empty>
327	fnstsw
328	sahf
329	jne `17f`
330
331	// OK, the value is an integer.
332	popl %eax
333	cfi_adjust_cfa_offset (-`4`)
334	popl %edx
335	cfi_adjust_cfa_offset (-`4`)
336	andb $`1`, %al
337	jz `18f` // jump if not odd
338	// It's an odd integer.
339	shrl $`31`, %edx
340	fldl MOX(minf_mzero, %edx, `8`)
341	ret
342
343	cfi_adjust_cfa_offset (`8`)
344	.align ALIGNARG(`4`)
345	`16`: fcompl MO(zero)
346	addl $`8`, %esp
347	cfi_adjust_cfa_offset (-`8`)
348	fnstsw
349	shrl $`5`, %eax
350	andl $`8`, %eax
351	fldl MOX(inf_zero, %eax, `1`)
352	ret
353
354	cfi_adjust_cfa_offset (`8`)
355	.align ALIGNARG(`4`)
356	`17`: shll $`30`, %edx // sign bit for y in right position
357	addl $`8`, %esp
358	cfi_adjust_cfa_offset (-`8`)
359	`18`: shrl $`31`, %edx
360	fldl MOX(inf_zero, %edx, `8`)
361	ret
362
363	cfi_adjust_cfa_offset (`8`)
364	.align ALIGNARG(`4`)
365	// x is ±0
366	`20`: fstp %st(`0`) // y
367	testb $`2`, %dl
368	jz `21f` // y > 0
369
370	// x is ±0 and y is < 0. We must find out whether y is an odd integer.
371	testb $`2`, %dh
372	jz `25f`
373
374	// fistpll raises invalid exception for \|y\| >= 1L<<63, so test
375	// that (in which case y is certainly even) before testing
376	// whether y is odd.
377	fld %st // y : y
378	fabs // \|y\| : y
379	fcompl MO(p63) // y
380	fnstsw
381	sahf
382	jnc `25f`
383
384	fld %st // y : y
385	fistpll (%esp) // y
386	fildll (%esp) // int(y) : y
387	fucompp // <empty>
388	fnstsw
389	sahf
390	jne `26f`
391
392	// OK, the value is an integer.
393	popl %eax
394	cfi_adjust_cfa_offset (-`4`)
395	popl %edx
396	cfi_adjust_cfa_offset (-`4`)
397	andb $`1`, %al
398	jz `27f` // jump if not odd
399	// It's an odd integer.
400	// Raise divide-by-zero exception and get minus infinity value.
401	fldl MO(one)
402	fdivl MO(zero)
403	fchs
404	ret
405
406	cfi_adjust_cfa_offset (`8`)
407	`25`: fstp %st(`0`)
408	`26`: addl $`8`, %esp
409	cfi_adjust_cfa_offset (-`8`)
410	`27`: // Raise divide-by-zero exception and get infinity value.
411	fldl MO(one)
412	fdivl MO(zero)
413	ret
414
415	cfi_adjust_cfa_offset (`8`)
416	.align ALIGNARG(`4`)
417	// x is ±0 and y is > 0. We must find out whether y is an odd integer.
418	`21`: testb $`2`, %dh
419	jz `22f`
420
421	// fistpll raises invalid exception for \|y\| >= 1L<<63, so test
422	// that (in which case y is certainly even) before testing
423	// whether y is odd.
424	fcoml MO(p63) // y
425	fnstsw
426	sahf
427	jnc `22f`
428
429	fld %st // y : y
430	fistpll (%esp) // y
431	fildll (%esp) // int(y) : y
432	fucompp // <empty>
433	fnstsw
434	sahf
435	jne `23f`
436
437	// OK, the value is an integer.
438	popl %eax
439	cfi_adjust_cfa_offset (-`4`)
440	popl %edx
441	cfi_adjust_cfa_offset (-`4`)
442	andb $`1`, %al
443	jz `24f` // jump if not odd
444	// It's an odd integer.
445	fldl MO(mzero)
446	ret
447
448	cfi_adjust_cfa_offset (`8`)
449	`22`: fstp %st(`0`)
450	`23`: addl $`8`, %esp // Don't use 2 x pop
451	cfi_adjust_cfa_offset (-`8`)
452	`24`: fldl MO(zero)
453	ret
454
455	END(__ieee754_pow)
456	libm_alias_finite (__ieee754_pow, __pow)
457

source code of glibc/sysdeps/i386/fpu/e_pow.S