1 | /* ix87 specific implementation of pow function. |
2 | Copyright (C) 1996-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <machine/asm.h> |
20 | #include <i386-math-asm.h> |
21 | #include <libm-alias-finite.h> |
22 | |
23 | .section .rodata.cst8,"aM" ,@progbits,8 |
24 | |
25 | .p2align 3 |
26 | .type one,@object |
27 | one: .double 1.0 |
28 | ASM_SIZE_DIRECTIVE(one) |
29 | .type p2,@object |
30 | p2: .byte 0, 0, 0, 0, 0, 0, 0x10, 0x40 |
31 | ASM_SIZE_DIRECTIVE(p2) |
32 | .type p63,@object |
33 | p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 |
34 | ASM_SIZE_DIRECTIVE(p63) |
35 | .type p64,@object |
36 | p64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43 |
37 | ASM_SIZE_DIRECTIVE(p64) |
38 | .type p78,@object |
39 | p78: .byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44 |
40 | ASM_SIZE_DIRECTIVE(p78) |
41 | .type pm79,@object |
42 | pm79: .byte 0, 0, 0, 0, 0, 0, 0, 0x3b |
43 | ASM_SIZE_DIRECTIVE(pm79) |
44 | |
45 | .section .rodata.cst16,"aM" ,@progbits,16 |
46 | |
47 | .p2align 3 |
48 | .type infinity,@object |
49 | inf_zero: |
50 | infinity: |
51 | .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f |
52 | ASM_SIZE_DIRECTIVE(infinity) |
53 | .type zero,@object |
54 | zero: .double 0.0 |
55 | ASM_SIZE_DIRECTIVE(zero) |
56 | .type minf_mzero,@object |
57 | minf_mzero: |
58 | minfinity: |
59 | .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff |
60 | mzero: |
61 | .byte 0, 0, 0, 0, 0, 0, 0, 0x80 |
62 | ASM_SIZE_DIRECTIVE(minf_mzero) |
63 | DEFINE_LDBL_MIN |
64 | |
65 | #ifdef PIC |
66 | # define MO(op) op##@GOTOFF(%ecx) |
67 | # define MOX(op,x,f) op##@GOTOFF(%ecx,x,f) |
68 | #else |
69 | # define MO(op) op |
70 | # define MOX(op,x,f) op(,x,f) |
71 | #endif |
72 | |
73 | .text |
74 | ENTRY(__ieee754_powl) |
75 | fldt 16(%esp) // y |
76 | fxam |
77 | |
78 | #ifdef PIC |
79 | LOAD_PIC_REG (cx) |
80 | #endif |
81 | |
82 | fnstsw |
83 | movb %ah, %dl |
84 | andb $0x45, %ah |
85 | cmpb $0x40, %ah // is y == 0 ? |
86 | je 11f |
87 | |
88 | cmpb $0x05, %ah // is y == ±inf ? |
89 | je 12f |
90 | |
91 | cmpb $0x01, %ah // is y == NaN ? |
92 | je 30f |
93 | |
94 | fldt 4(%esp) // x : y |
95 | |
96 | subl $8,%esp |
97 | cfi_adjust_cfa_offset (8) |
98 | |
99 | fxam |
100 | fnstsw |
101 | movb %ah, %dh |
102 | andb $0x45, %ah |
103 | cmpb $0x40, %ah |
104 | je 20f // x is ±0 |
105 | |
106 | cmpb $0x05, %ah |
107 | je 15f // x is ±inf |
108 | |
109 | cmpb $0x01, %ah |
110 | je 32f // x is NaN |
111 | |
112 | fxch // y : x |
113 | |
114 | /* fistpll raises invalid exception for |y| >= 1L<<63. */ |
115 | fld %st // y : y : x |
116 | fabs // |y| : y : x |
117 | fcompl MO(p63) // y : x |
118 | fnstsw |
119 | sahf |
120 | jnc 2f |
121 | |
122 | /* First see whether `y' is a natural number. In this case we |
123 | can use a more precise algorithm. */ |
124 | fld %st // y : y : x |
125 | fistpll (%esp) // y : x |
126 | fildll (%esp) // int(y) : y : x |
127 | fucomp %st(1) // y : x |
128 | fnstsw |
129 | sahf |
130 | je 9f |
131 | |
132 | // If y has absolute value at most 0x1p-79, then any finite |
133 | // nonzero x will result in 1. Saturate y to those bounds to |
134 | // avoid underflow in the calculation of y*log2(x). |
135 | fld %st // y : y : x |
136 | fabs // |y| : y : x |
137 | fcompl MO(pm79) // y : x |
138 | fnstsw |
139 | sahf |
140 | jnc 3f |
141 | fstp %st(0) // pop y |
142 | fldl MO(pm79) // 0x1p-79 : x |
143 | testb $2, %dl |
144 | jnz 3f // y > 0 |
145 | fchs // -0x1p-79 : x |
146 | jmp 3f |
147 | |
148 | 9: /* OK, we have an integer value for y. Unless very small |
149 | (we use < 4), use the algorithm for real exponent to avoid |
150 | accumulation of errors. */ |
151 | fld %st // y : y : x |
152 | fabs // |y| : y : x |
153 | fcompl MO(p2) // y : x |
154 | fnstsw |
155 | sahf |
156 | jnc 3f |
157 | popl %eax |
158 | cfi_adjust_cfa_offset (-4) |
159 | popl %edx |
160 | cfi_adjust_cfa_offset (-4) |
161 | orl $0, %edx |
162 | fstp %st(0) // x |
163 | jns 4f // y >= 0, jump |
164 | fdivrl MO(one) // 1/x (now referred to as x) |
165 | negl %eax |
166 | adcl $0, %edx |
167 | negl %edx |
168 | 4: fldl MO(one) // 1 : x |
169 | fxch |
170 | |
171 | /* If y is even, take the absolute value of x. Otherwise, |
172 | ensure all intermediate values that might overflow have the |
173 | sign of x. */ |
174 | testb $1, %al |
175 | jnz 6f |
176 | fabs |
177 | |
178 | 6: shrdl $1, %edx, %eax |
179 | jnc 5f |
180 | fxch |
181 | fabs |
182 | fmul %st(1) // x : ST*x |
183 | fxch |
184 | 5: fld %st // x : x : ST*x |
185 | fabs // |x| : x : ST*x |
186 | fmulp // |x|*x : ST*x |
187 | shrl $1, %edx |
188 | movl %eax, %ecx |
189 | orl %edx, %ecx |
190 | jnz 6b |
191 | fstp %st(0) // ST*x |
192 | #ifdef PIC |
193 | LOAD_PIC_REG (cx) |
194 | #endif |
195 | LDBL_CHECK_FORCE_UFLOW_NONNAN |
196 | ret |
197 | |
198 | /* y is ±NAN */ |
199 | 30: fldt 4(%esp) // x : y |
200 | fldl MO(one) // 1.0 : x : y |
201 | fucomp %st(1) // x : y |
202 | fnstsw |
203 | sahf |
204 | je 33f |
205 | 31: /* At least one argument NaN, and result should be NaN. */ |
206 | faddp |
207 | ret |
208 | 33: jp 31b |
209 | /* pow (1, NaN); check if the NaN signaling. */ |
210 | testb $0x40, 23(%esp) |
211 | jz 31b |
212 | fstp %st(1) |
213 | ret |
214 | |
215 | cfi_adjust_cfa_offset (8) |
216 | 32: addl $8, %esp |
217 | cfi_adjust_cfa_offset (-8) |
218 | faddp |
219 | ret |
220 | |
221 | cfi_adjust_cfa_offset (8) |
222 | .align ALIGNARG(4) |
223 | 2: // y is a large integer (absolute value at least 1L<<63). |
224 | // If y has absolute value at least 1L<<78, then any finite |
225 | // nonzero x will result in 0 (underflow), 1 or infinity (overflow). |
226 | // Saturate y to those bounds to avoid overflow in the calculation |
227 | // of y*log2(x). |
228 | fld %st // y : y : x |
229 | fabs // |y| : y : x |
230 | fcompl MO(p78) // y : x |
231 | fnstsw |
232 | sahf |
233 | jc 3f |
234 | fstp %st(0) // pop y |
235 | fldl MO(p78) // 1L<<78 : x |
236 | testb $2, %dl |
237 | jz 3f // y > 0 |
238 | fchs // -(1L<<78) : x |
239 | .align ALIGNARG(4) |
240 | 3: /* y is a real number. */ |
241 | subl $28, %esp |
242 | cfi_adjust_cfa_offset (28) |
243 | fstpt 12(%esp) // x |
244 | fstpt (%esp) // <empty> |
245 | call HIDDEN_JUMPTARGET (__powl_helper) // <result> |
246 | addl $36, %esp |
247 | cfi_adjust_cfa_offset (-36) |
248 | ret |
249 | |
250 | // pow(x,±0) = 1, unless x is sNaN |
251 | .align ALIGNARG(4) |
252 | 11: fstp %st(0) // pop y |
253 | fldt 4(%esp) // x |
254 | fxam |
255 | fnstsw |
256 | andb $0x45, %ah |
257 | cmpb $0x01, %ah |
258 | je 112f // x is NaN |
259 | 111: fstp %st(0) |
260 | fldl MO(one) |
261 | ret |
262 | |
263 | 112: testb $0x40, 11(%esp) |
264 | jnz 111b |
265 | fadd %st(0) |
266 | ret |
267 | |
268 | // y == ±inf |
269 | .align ALIGNARG(4) |
270 | 12: fstp %st(0) // pop y |
271 | fldl MO(one) // 1 |
272 | fldt 4(%esp) // x : 1 |
273 | fabs // abs(x) : 1 |
274 | fucompp // < 1, == 1, or > 1 |
275 | fnstsw |
276 | andb $0x45, %ah |
277 | cmpb $0x45, %ah |
278 | je 13f // jump if x is NaN |
279 | |
280 | cmpb $0x40, %ah |
281 | je 14f // jump if |x| == 1 |
282 | |
283 | shlb $1, %ah |
284 | xorb %ah, %dl |
285 | andl $2, %edx |
286 | fldl MOX(inf_zero, %edx, 4) |
287 | ret |
288 | |
289 | .align ALIGNARG(4) |
290 | 14: fldl MO(one) |
291 | ret |
292 | |
293 | .align ALIGNARG(4) |
294 | 13: fldt 4(%esp) // load x == NaN |
295 | fadd %st(0) |
296 | ret |
297 | |
298 | cfi_adjust_cfa_offset (8) |
299 | .align ALIGNARG(4) |
300 | // x is ±inf |
301 | 15: fstp %st(0) // y |
302 | testb $2, %dh |
303 | jz 16f // jump if x == +inf |
304 | |
305 | // fistpll raises invalid exception for |y| >= 1L<<63, but y |
306 | // may be odd unless we know |y| >= 1L<<64. |
307 | fld %st // y : y |
308 | fabs // |y| : y |
309 | fcompl MO(p64) // y |
310 | fnstsw |
311 | sahf |
312 | jnc 16f |
313 | fldl MO(p63) // p63 : y |
314 | fxch // y : p63 |
315 | fprem // y%p63 : p63 |
316 | fstp %st(1) // y%p63 |
317 | |
318 | // We must find out whether y is an odd integer. |
319 | fld %st // y : y |
320 | fistpll (%esp) // y |
321 | fildll (%esp) // int(y) : y |
322 | fucompp // <empty> |
323 | fnstsw |
324 | sahf |
325 | jne 17f |
326 | |
327 | // OK, the value is an integer, but is it odd? |
328 | popl %eax |
329 | cfi_adjust_cfa_offset (-4) |
330 | popl %edx |
331 | cfi_adjust_cfa_offset (-4) |
332 | andb $1, %al |
333 | jz 18f // jump if not odd |
334 | // It's an odd integer. |
335 | shrl $31, %edx |
336 | fldl MOX(minf_mzero, %edx, 8) |
337 | ret |
338 | |
339 | cfi_adjust_cfa_offset (8) |
340 | .align ALIGNARG(4) |
341 | 16: fcompl MO(zero) |
342 | addl $8, %esp |
343 | cfi_adjust_cfa_offset (-8) |
344 | fnstsw |
345 | shrl $5, %eax |
346 | andl $8, %eax |
347 | fldl MOX(inf_zero, %eax, 1) |
348 | ret |
349 | |
350 | cfi_adjust_cfa_offset (8) |
351 | .align ALIGNARG(4) |
352 | 17: shll $30, %edx // sign bit for y in right position |
353 | addl $8, %esp |
354 | cfi_adjust_cfa_offset (-8) |
355 | 18: shrl $31, %edx |
356 | fldl MOX(inf_zero, %edx, 8) |
357 | ret |
358 | |
359 | cfi_adjust_cfa_offset (8) |
360 | .align ALIGNARG(4) |
361 | // x is ±0 |
362 | 20: fstp %st(0) // y |
363 | testb $2, %dl |
364 | jz 21f // y > 0 |
365 | |
366 | // x is ±0 and y is < 0. We must find out whether y is an odd integer. |
367 | testb $2, %dh |
368 | jz 25f |
369 | |
370 | // fistpll raises invalid exception for |y| >= 1L<<63, but y |
371 | // may be odd unless we know |y| >= 1L<<64. |
372 | fld %st // y : y |
373 | fabs // |y| : y |
374 | fcompl MO(p64) // y |
375 | fnstsw |
376 | sahf |
377 | jnc 25f |
378 | fldl MO(p63) // p63 : y |
379 | fxch // y : p63 |
380 | fprem // y%p63 : p63 |
381 | fstp %st(1) // y%p63 |
382 | |
383 | fld %st // y : y |
384 | fistpll (%esp) // y |
385 | fildll (%esp) // int(y) : y |
386 | fucompp // <empty> |
387 | fnstsw |
388 | sahf |
389 | jne 26f |
390 | |
391 | // OK, the value is an integer, but is it odd? |
392 | popl %eax |
393 | cfi_adjust_cfa_offset (-4) |
394 | popl %edx |
395 | cfi_adjust_cfa_offset (-4) |
396 | andb $1, %al |
397 | jz 27f // jump if not odd |
398 | // It's an odd integer. |
399 | // Raise divide-by-zero exception and get minus infinity value. |
400 | fldl MO(one) |
401 | fdivl MO(zero) |
402 | fchs |
403 | ret |
404 | |
405 | cfi_adjust_cfa_offset (8) |
406 | 25: fstp %st(0) |
407 | 26: addl $8, %esp |
408 | cfi_adjust_cfa_offset (-8) |
409 | 27: // Raise divide-by-zero exception and get infinity value. |
410 | fldl MO(one) |
411 | fdivl MO(zero) |
412 | ret |
413 | |
414 | cfi_adjust_cfa_offset (8) |
415 | .align ALIGNARG(4) |
416 | // x is ±0 and y is > 0. We must find out whether y is an odd integer. |
417 | 21: testb $2, %dh |
418 | jz 22f |
419 | |
420 | // fistpll raises invalid exception for |y| >= 1L<<63, but y |
421 | // may be odd unless we know |y| >= 1L<<64. |
422 | fld %st // y : y |
423 | fcompl MO(p64) // y |
424 | fnstsw |
425 | sahf |
426 | jnc 22f |
427 | fldl MO(p63) // p63 : y |
428 | fxch // y : p63 |
429 | fprem // y%p63 : p63 |
430 | fstp %st(1) // y%p63 |
431 | |
432 | fld %st // y : y |
433 | fistpll (%esp) // y |
434 | fildll (%esp) // int(y) : y |
435 | fucompp // <empty> |
436 | fnstsw |
437 | sahf |
438 | jne 23f |
439 | |
440 | // OK, the value is an integer, but is it odd? |
441 | popl %eax |
442 | cfi_adjust_cfa_offset (-4) |
443 | popl %edx |
444 | cfi_adjust_cfa_offset (-4) |
445 | andb $1, %al |
446 | jz 24f // jump if not odd |
447 | // It's an odd integer. |
448 | fldl MO(mzero) |
449 | ret |
450 | |
451 | cfi_adjust_cfa_offset (8) |
452 | 22: fstp %st(0) |
453 | 23: addl $8, %esp // Don't use 2 x pop |
454 | cfi_adjust_cfa_offset (-8) |
455 | 24: fldl MO(zero) |
456 | ret |
457 | |
458 | END(__ieee754_powl) |
459 | libm_alias_finite (__ieee754_powl, __powl) |
460 | |