1/* ix87 specific implementation of pow function.
2 Copyright (C) 1996-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <machine/asm.h>
20#include <x86_64-math-asm.h>
21#include <libm-alias-finite.h>
22
23 .section .rodata.cst8,"aM",@progbits,8
24
25 .p2align 3
26 .type one,@object
27one: .double 1.0
28 ASM_SIZE_DIRECTIVE(one)
29 .type p2,@object
30p2: .byte 0, 0, 0, 0, 0, 0, 0x10, 0x40
31 ASM_SIZE_DIRECTIVE(p2)
32 .type p63,@object
33p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
34 ASM_SIZE_DIRECTIVE(p63)
35 .type p64,@object
36p64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
37 ASM_SIZE_DIRECTIVE(p64)
38 .type p78,@object
39p78: .byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44
40 ASM_SIZE_DIRECTIVE(p78)
41 .type pm79,@object
42pm79: .byte 0, 0, 0, 0, 0, 0, 0, 0x3b
43 ASM_SIZE_DIRECTIVE(pm79)
44
45 .section .rodata.cst16,"aM",@progbits,16
46
47 .p2align 3
48 .type infinity,@object
49inf_zero:
50infinity:
51 .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f
52 ASM_SIZE_DIRECTIVE(infinity)
53 .type zero,@object
54zero: .double 0.0
55 ASM_SIZE_DIRECTIVE(zero)
56 .type minf_mzero,@object
57minf_mzero:
58minfinity:
59 .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff
60mzero:
61 .byte 0, 0, 0, 0, 0, 0, 0, 0x80
62 ASM_SIZE_DIRECTIVE(minf_mzero)
63DEFINE_LDBL_MIN
64
65#ifdef PIC
66# define MO(op) op##(%rip)
67#else
68# define MO(op) op
69#endif
70
71 .text
72ENTRY(__ieee754_powl)
73 fldt 24(%rsp) // y
74 fxam
75
76
77 fnstsw
78 movb %ah, %dl
79 andb $0x45, %ah
80 cmpb $0x40, %ah // is y == 0 ?
81 je 11f
82
83 cmpb $0x05, %ah // is y == ±inf ?
84 je 12f
85
86 cmpb $0x01, %ah // is y == NaN ?
87 je 30f
88
89 fldt 8(%rsp) // x : y
90
91 fxam
92 fnstsw
93 movb %ah, %dh
94 andb $0x45, %ah
95 cmpb $0x40, %ah
96 je 20f // x is ±0
97
98 cmpb $0x05, %ah
99 je 15f // x is ±inf
100
101 cmpb $0x01, %ah
102 je 31f // x is NaN
103
104 fxch // y : x
105
106 /* fistpll raises invalid exception for |y| >= 1L<<63. */
107 fldl MO(p63) // 1L<<63 : y : x
108 fld %st(1) // y : 1L<<63 : y : x
109 fabs // |y| : 1L<<63 : y : x
110 fcomip %st(1), %st // 1L<<63 : y : x
111 fstp %st(0) // y : x
112 jnc 2f
113
114 /* First see whether `y' is a natural number. In this case we
115 can use a more precise algorithm. */
116 fld %st // y : y : x
117 fistpll -8(%rsp) // y : x
118 fildll -8(%rsp) // int(y) : y : x
119 fucomip %st(1),%st // y : x
120 je 9f
121
122 // If y has absolute value at most 0x1p-79, then any finite
123 // nonzero x will result in 1. Saturate y to those bounds to
124 // avoid underflow in the calculation of y*log2(x).
125 fldl MO(pm79) // 0x1p-79 : y : x
126 fld %st(1) // y : 0x1p-79 : y : x
127 fabs // |y| : 0x1p-79 : y : x
128 fcomip %st(1), %st // 0x1p-79 : y : x
129 fstp %st(0) // y : x
130 jnc 3f
131 fstp %st(0) // pop y
132 fldl MO(pm79) // 0x1p-79 : x
133 testb $2, %dl
134 jnz 3f // y > 0
135 fchs // -0x1p-79 : x
136 jmp 3f
137
1389: /* OK, we have an integer value for y. Unless very small
139 (we use < 4), use the algorithm for real exponent to avoid
140 accumulation of errors. */
141 fldl MO(p2) // 4 : y : x
142 fld %st(1) // y : 4 : y : x
143 fabs // |y| : 4 : y : x
144 fcomip %st(1), %st // 4 : y : x
145 fstp %st(0) // y : x
146 jnc 3f
147 mov -8(%rsp),%eax
148 mov -4(%rsp),%edx
149 orl $0, %edx
150 fstp %st(0) // x
151 jns 4f // y >= 0, jump
152 fdivrl MO(one) // 1/x (now referred to as x)
153 negl %eax
154 adcl $0, %edx
155 negl %edx
1564: fldl MO(one) // 1 : x
157 fxch
158
159 /* If y is even, take the absolute value of x. Otherwise,
160 ensure all intermediate values that might overflow have the
161 sign of x. */
162 testb $1, %al
163 jnz 6f
164 fabs
165
1666: shrdl $1, %edx, %eax
167 jnc 5f
168 fxch
169 fabs
170 fmul %st(1) // x : ST*x
171 fxch
1725: fld %st // x : x : ST*x
173 fabs // |x| : x : ST*x
174 fmulp // |x|*x : ST*x
175 shrl $1, %edx
176 movl %eax, %ecx
177 orl %edx, %ecx
178 jnz 6b
179 fstp %st(0) // ST*x
180 LDBL_CHECK_FORCE_UFLOW_NONNAN
181 ret
182
183 /* y is ±NAN */
18430: fldt 8(%rsp) // x : y
185 fldl MO(one) // 1.0 : x : y
186 fucomip %st(1),%st // x : y
187 je 32f
18831: /* At least one argument NaN, and result should be NaN. */
189 faddp
190 ret
19132: jc 31b
192 /* pow (1, NaN); check if the NaN signaling. */
193 testb $0x40, 31(%rsp)
194 jz 31b
195 fstp %st(1)
196 ret
197
198 .align ALIGNARG(4)
1992: // y is a large integer (absolute value at least 1L<<63).
200 // If y has absolute value at least 1L<<78, then any finite
201 // nonzero x will result in 0 (underflow), 1 or infinity (overflow).
202 // Saturate y to those bounds to avoid overflow in the calculation
203 // of y*log2(x).
204 fldl MO(p78) // 1L<<78 : y : x
205 fld %st(1) // y : 1L<<78 : y : x
206 fabs // |y| : 1L<<78 : y : x
207 fcomip %st(1), %st // 1L<<78 : y : x
208 fstp %st(0) // y : x
209 jc 3f
210 fstp %st(0) // pop y
211 fldl MO(p78) // 1L<<78 : x
212 testb $2, %dl
213 jz 3f // y > 0
214 fchs // -(1L<<78) : x
215 .align ALIGNARG(4)
2163: /* y is a real number. */
217 subq $40, %rsp
218 cfi_adjust_cfa_offset (40)
219 fstpt 16(%rsp) // x
220 fstpt (%rsp) // <empty>
221 call HIDDEN_JUMPTARGET (__powl_helper) // <result>
222 addq $40, %rsp
223 cfi_adjust_cfa_offset (-40)
224 ret
225
226 // pow(x,±0) = 1, unless x is sNaN
227 .align ALIGNARG(4)
22811: fstp %st(0) // pop y
229 fldt 8(%rsp) // x
230 fxam
231 fnstsw
232 andb $0x45, %ah
233 cmpb $0x01, %ah
234 je 112f // x is NaN
235111: fstp %st(0)
236 fldl MO(one)
237 ret
238
239112: testb $0x40, 15(%rsp)
240 jnz 111b
241 fadd %st(0)
242 ret
243
244 // y == ±inf
245 .align ALIGNARG(4)
24612: fstp %st(0) // pop y
247 fldl MO(one) // 1
248 fldt 8(%rsp) // x : 1
249 fabs // abs(x) : 1
250 fucompp // < 1, == 1, or > 1
251 fnstsw
252 andb $0x45, %ah
253 cmpb $0x45, %ah
254 je 13f // jump if x is NaN
255
256 cmpb $0x40, %ah
257 je 14f // jump if |x| == 1
258
259 shlb $1, %ah
260 xorb %ah, %dl
261 andl $2, %edx
262#ifdef PIC
263 lea inf_zero(%rip),%rcx
264 fldl (%rcx, %rdx, 4)
265#else
266 fldl inf_zero(,%rdx, 4)
267#endif
268 ret
269
270 .align ALIGNARG(4)
27114: fldl MO(one)
272 ret
273
274 .align ALIGNARG(4)
27513: fldt 8(%rsp) // load x == NaN
276 fadd %st(0)
277 ret
278
279 .align ALIGNARG(4)
280 // x is ±inf
28115: fstp %st(0) // y
282 testb $2, %dh
283 jz 16f // jump if x == +inf
284
285 // fistpll raises invalid exception for |y| >= 1L<<63, but y
286 // may be odd unless we know |y| >= 1L<<64.
287 fldl MO(p64) // 1L<<64 : y
288 fld %st(1) // y : 1L<<64 : y
289 fabs // |y| : 1L<<64 : y
290 fcomip %st(1), %st // 1L<<64 : y
291 fstp %st(0) // y
292 jnc 16f
293 fldl MO(p63) // p63 : y
294 fxch // y : p63
295 fprem // y%p63 : p63
296 fstp %st(1) // y%p63
297
298 // We must find out whether y is an odd integer.
299 fld %st // y : y
300 fistpll -8(%rsp) // y
301 fildll -8(%rsp) // int(y) : y
302 fucomip %st(1),%st
303 ffreep %st // <empty>
304 jne 17f
305
306 // OK, the value is an integer, but is it odd?
307 mov -8(%rsp), %eax
308 mov -4(%rsp), %edx
309 andb $1, %al
310 jz 18f // jump if not odd
311 // It's an odd integer.
312 shrl $31, %edx
313#ifdef PIC
314 lea minf_mzero(%rip),%rcx
315 fldl (%rcx, %rdx, 8)
316#else
317 fldl minf_mzero(,%rdx, 8)
318#endif
319 ret
320
321 .align ALIGNARG(4)
32216: fcompl MO(zero)
323 fnstsw
324 shrl $5, %eax
325 andl $8, %eax
326#ifdef PIC
327 lea inf_zero(%rip),%rcx
328 fldl (%rcx, %rax, 1)
329#else
330 fldl inf_zero(,%rax, 1)
331#endif
332 ret
333
334 .align ALIGNARG(4)
33517: shll $30, %edx // sign bit for y in right position
33618: shrl $31, %edx
337#ifdef PIC
338 lea inf_zero(%rip),%rcx
339 fldl (%rcx, %rdx, 8)
340#else
341 fldl inf_zero(,%rdx, 8)
342#endif
343 ret
344
345 .align ALIGNARG(4)
346 // x is ±0
34720: fstp %st(0) // y
348 testb $2, %dl
349 jz 21f // y > 0
350
351 // x is ±0 and y is < 0. We must find out whether y is an odd integer.
352 testb $2, %dh
353 jz 25f
354
355 // fistpll raises invalid exception for |y| >= 1L<<63, but y
356 // may be odd unless we know |y| >= 1L<<64.
357 fldl MO(p64) // 1L<<64 : y
358 fld %st(1) // y : 1L<<64 : y
359 fabs // |y| : 1L<<64 : y
360 fcomip %st(1), %st // 1L<<64 : y
361 fstp %st(0) // y
362 jnc 25f
363 fldl MO(p63) // p63 : y
364 fxch // y : p63
365 fprem // y%p63 : p63
366 fstp %st(1) // y%p63
367
368 fld %st // y : y
369 fistpll -8(%rsp) // y
370 fildll -8(%rsp) // int(y) : y
371 fucomip %st(1),%st
372 ffreep %st // <empty>
373 jne 26f
374
375 // OK, the value is an integer, but is it odd?
376 mov -8(%rsp),%eax
377 mov -4(%rsp),%edx
378 andb $1, %al
379 jz 27f // jump if not odd
380 // It's an odd integer.
381 // Raise divide-by-zero exception and get minus infinity value.
382 fldl MO(one)
383 fdivl MO(zero)
384 fchs
385 ret
386
38725: fstp %st(0)
38826:
38927: // Raise divide-by-zero exception and get infinity value.
390 fldl MO(one)
391 fdivl MO(zero)
392 ret
393
394 .align ALIGNARG(4)
395 // x is ±0 and y is > 0. We must find out whether y is an odd integer.
39621: testb $2, %dh
397 jz 22f
398
399 // fistpll raises invalid exception for |y| >= 1L<<63, but y
400 // may be odd unless we know |y| >= 1L<<64.
401 fldl MO(p64) // 1L<<64 : y
402 fxch // y : 1L<<64
403 fcomi %st(1), %st // y : 1L<<64
404 fstp %st(1) // y
405 jnc 22f
406 fldl MO(p63) // p63 : y
407 fxch // y : p63
408 fprem // y%p63 : p63
409 fstp %st(1) // y%p63
410
411 fld %st // y : y
412 fistpll -8(%rsp) // y
413 fildll -8(%rsp) // int(y) : y
414 fucomip %st(1),%st
415 ffreep %st // <empty>
416 jne 23f
417
418 // OK, the value is an integer, but is it odd?
419 mov -8(%rsp),%eax
420 mov -4(%rsp),%edx
421 andb $1, %al
422 jz 24f // jump if not odd
423 // It's an odd integer.
424 fldl MO(mzero)
425 ret
426
42722: fstp %st(0)
42823:
42924: fldl MO(zero)
430 ret
431
432END(__ieee754_powl)
433libm_alias_finite (__ieee754_powl, __powl)
434

source code of glibc/sysdeps/x86_64/fpu/e_powl.S