1 | /* ix87 specific implementation of pow function. |
2 | Copyright (C) 1996-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <machine/asm.h> |
20 | #include <i386-math-asm.h> |
21 | #include <libm-alias-finite.h> |
22 | |
23 | .section .rodata.cst8,"aM" ,@progbits,8 |
24 | |
25 | .p2align 3 |
26 | .type one,@object |
27 | one: .double 1.0 |
28 | ASM_SIZE_DIRECTIVE(one) |
29 | .type limit,@object |
30 | limit: .double 0.29 |
31 | ASM_SIZE_DIRECTIVE(limit) |
32 | .type p63,@object |
33 | p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 |
34 | ASM_SIZE_DIRECTIVE(p63) |
35 | .type p10,@object |
36 | p10: .byte 0, 0, 0, 0, 0, 0, 0x90, 0x40 |
37 | ASM_SIZE_DIRECTIVE(p10) |
38 | |
39 | .section .rodata.cst16,"aM" ,@progbits,16 |
40 | |
41 | .p2align 3 |
42 | .type infinity,@object |
43 | inf_zero: |
44 | infinity: |
45 | .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f |
46 | ASM_SIZE_DIRECTIVE(infinity) |
47 | .type zero,@object |
48 | zero: .double 0.0 |
49 | ASM_SIZE_DIRECTIVE(zero) |
50 | .type minf_mzero,@object |
51 | minf_mzero: |
52 | minfinity: |
53 | .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff |
54 | mzero: |
55 | .byte 0, 0, 0, 0, 0, 0, 0, 0x80 |
56 | ASM_SIZE_DIRECTIVE(minf_mzero) |
57 | DEFINE_DBL_MIN |
58 | |
59 | #ifdef PIC |
60 | # define MO(op) op##@GOTOFF(%ecx) |
61 | # define MOX(op,x,f) op##@GOTOFF(%ecx,x,f) |
62 | #else |
63 | # define MO(op) op |
64 | # define MOX(op,x,f) op(,x,f) |
65 | #endif |
66 | |
67 | .text |
68 | ENTRY(__ieee754_pow) |
69 | fldl 12(%esp) // y |
70 | fxam |
71 | |
72 | #ifdef PIC |
73 | LOAD_PIC_REG (cx) |
74 | #endif |
75 | |
76 | fnstsw |
77 | movb %ah, %dl |
78 | andb $0x45, %ah |
79 | cmpb $0x40, %ah // is y == 0 ? |
80 | je 11f |
81 | |
82 | cmpb $0x05, %ah // is y == ±inf ? |
83 | je 12f |
84 | |
85 | cmpb $0x01, %ah // is y == NaN ? |
86 | je 30f |
87 | |
88 | fldl 4(%esp) // x : y |
89 | |
90 | subl $8,%esp |
91 | cfi_adjust_cfa_offset (8) |
92 | |
93 | fxam |
94 | fnstsw |
95 | movb %ah, %dh |
96 | andb $0x45, %ah |
97 | cmpb $0x40, %ah |
98 | je 20f // x is ±0 |
99 | |
100 | cmpb $0x05, %ah |
101 | je 15f // x is ±inf |
102 | |
103 | cmpb $0x01, %ah |
104 | je 32f // x is NaN |
105 | |
106 | fxch // y : x |
107 | |
108 | /* fistpll raises invalid exception for |y| >= 1L<<63. */ |
109 | fld %st // y : y : x |
110 | fabs // |y| : y : x |
111 | fcompl MO(p63) // y : x |
112 | fnstsw |
113 | sahf |
114 | jnc 2f |
115 | |
116 | /* First see whether `y' is a natural number. In this case we |
117 | can use a more precise algorithm. */ |
118 | fld %st // y : y : x |
119 | fistpll (%esp) // y : x |
120 | fildll (%esp) // int(y) : y : x |
121 | fucomp %st(1) // y : x |
122 | fnstsw |
123 | sahf |
124 | jne 3f |
125 | |
126 | /* OK, we have an integer value for y. If large enough that |
127 | errors may propagate out of the 11 bits excess precision, use |
128 | the algorithm for real exponent instead. */ |
129 | fld %st // y : y : x |
130 | fabs // |y| : y : x |
131 | fcompl MO(p10) // y : x |
132 | fnstsw |
133 | sahf |
134 | jnc 2f |
135 | popl %eax |
136 | cfi_adjust_cfa_offset (-4) |
137 | popl %edx |
138 | cfi_adjust_cfa_offset (-4) |
139 | orl $0, %edx |
140 | fstp %st(0) // x |
141 | jns 4f // y >= 0, jump |
142 | fdivrl MO(one) // 1/x (now referred to as x) |
143 | negl %eax |
144 | adcl $0, %edx |
145 | negl %edx |
146 | 4: fldl MO(one) // 1 : x |
147 | fxch |
148 | |
149 | /* If y is even, take the absolute value of x. Otherwise, |
150 | ensure all intermediate values that might overflow have the |
151 | sign of x. */ |
152 | testb $1, %al |
153 | jnz 6f |
154 | fabs |
155 | |
156 | 6: shrdl $1, %edx, %eax |
157 | jnc 5f |
158 | fxch |
159 | fabs |
160 | fmul %st(1) // x : ST*x |
161 | fxch |
162 | 5: fld %st // x : x : ST*x |
163 | fabs // |x| : x : ST*x |
164 | fmulp // |x|*x : ST*x |
165 | shrl $1, %edx |
166 | movl %eax, %ecx |
167 | orl %edx, %ecx |
168 | jnz 6b |
169 | fstp %st(0) // ST*x |
170 | #ifdef PIC |
171 | LOAD_PIC_REG (cx) |
172 | #endif |
173 | DBL_NARROW_EVAL_UFLOW_NONNAN |
174 | ret |
175 | |
176 | /* y is ±NAN */ |
177 | 30: fldl 4(%esp) // x : y |
178 | fldl MO(one) // 1.0 : x : y |
179 | fucomp %st(1) // x : y |
180 | fnstsw |
181 | sahf |
182 | je 31f |
183 | fxch // y : x |
184 | 31: fstp %st(1) |
185 | ret |
186 | |
187 | cfi_adjust_cfa_offset (8) |
188 | 32: addl $8, %esp |
189 | cfi_adjust_cfa_offset (-8) |
190 | fstp %st(1) |
191 | ret |
192 | |
193 | cfi_adjust_cfa_offset (8) |
194 | .align ALIGNARG(4) |
195 | 2: // y is a large integer (absolute value at least 1L<<10), but |
196 | // may be odd unless at least 1L<<64. So it may be necessary |
197 | // to adjust the sign of a negative result afterwards. |
198 | fxch // x : y |
199 | fabs // |x| : y |
200 | fxch // y : x |
201 | .align ALIGNARG(4) |
202 | 3: /* y is a real number. */ |
203 | fxch // x : y |
204 | fldl MO(one) // 1.0 : x : y |
205 | fldl MO(limit) // 0.29 : 1.0 : x : y |
206 | fld %st(2) // x : 0.29 : 1.0 : x : y |
207 | fsub %st(2) // x-1 : 0.29 : 1.0 : x : y |
208 | fabs // |x-1| : 0.29 : 1.0 : x : y |
209 | fucompp // 1.0 : x : y |
210 | fnstsw |
211 | fxch // x : 1.0 : y |
212 | sahf |
213 | ja 7f |
214 | fsub %st(1) // x-1 : 1.0 : y |
215 | fyl2xp1 // log2(x) : y |
216 | jmp 8f |
217 | |
218 | 7: fyl2x // log2(x) : y |
219 | 8: fmul %st(1) // y*log2(x) : y |
220 | fst %st(1) // y*log2(x) : y*log2(x) |
221 | frndint // int(y*log2(x)) : y*log2(x) |
222 | fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) |
223 | fxch // fract(y*log2(x)) : int(y*log2(x)) |
224 | f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x)) |
225 | faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x)) |
226 | |
227 | // Before scaling, we must negate if x is negative and y is an |
228 | // odd integer. |
229 | testb $2, %dh |
230 | jz 291f |
231 | // x is negative. If y is an odd integer, negate the result. |
232 | fldl 20(%esp) // y : 2^fract(y*log2(x)) : int(y*log2(x)) |
233 | fld %st // y : y : 2^fract(y*log2(x)) : int(y*log2(x)) |
234 | fabs // |y| : y : 2^fract(y*log2(x)) : int(y*log2(x)) |
235 | fcompl MO(p63) // y : 2^fract(y*log2(x)) : int(y*log2(x)) |
236 | fnstsw |
237 | sahf |
238 | jnc 290f |
239 | |
240 | // We must find out whether y is an odd integer. |
241 | fld %st // y : y : 2^fract(y*log2(x)) : int(y*log2(x)) |
242 | fistpll (%esp) // y : 2^fract(y*log2(x)) : int(y*log2(x)) |
243 | fildll (%esp) // int(y) : y : 2^fract(y*log2(x)) : int(y*log2(x)) |
244 | fucompp // 2^fract(y*log2(x)) : int(y*log2(x)) |
245 | fnstsw |
246 | sahf |
247 | jne 291f |
248 | |
249 | // OK, the value is an integer, but is it odd? |
250 | popl %eax |
251 | cfi_adjust_cfa_offset (-4) |
252 | popl %edx |
253 | cfi_adjust_cfa_offset (-4) |
254 | andb $1, %al |
255 | jz 292f // jump if not odd |
256 | // It's an odd integer. |
257 | fchs |
258 | jmp 292f |
259 | |
260 | cfi_adjust_cfa_offset (8) |
261 | 290: fstp %st(0) // 2^fract(y*log2(x)) : int(y*log2(x)) |
262 | 291: addl $8, %esp |
263 | cfi_adjust_cfa_offset (-8) |
264 | 292: fscale // +/- 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x)) |
265 | fstp %st(1) // +/- 2^fract(y*log2(x))*2^int(y*log2(x)) |
266 | DBL_NARROW_EVAL_UFLOW_NONNAN |
267 | ret |
268 | |
269 | |
270 | // pow(x,±0) = 1 |
271 | .align ALIGNARG(4) |
272 | 11: fstp %st(0) // pop y |
273 | fldl MO(one) |
274 | ret |
275 | |
276 | // y == ±inf |
277 | .align ALIGNARG(4) |
278 | 12: fstp %st(0) // pop y |
279 | fldl MO(one) // 1 |
280 | fldl 4(%esp) // x : 1 |
281 | fabs // abs(x) : 1 |
282 | fucompp // < 1, == 1, or > 1 |
283 | fnstsw |
284 | andb $0x45, %ah |
285 | cmpb $0x45, %ah |
286 | je 13f // jump if x is NaN |
287 | |
288 | cmpb $0x40, %ah |
289 | je 14f // jump if |x| == 1 |
290 | |
291 | shlb $1, %ah |
292 | xorb %ah, %dl |
293 | andl $2, %edx |
294 | fldl MOX(inf_zero, %edx, 4) |
295 | ret |
296 | |
297 | .align ALIGNARG(4) |
298 | 14: fldl MO(one) |
299 | ret |
300 | |
301 | .align ALIGNARG(4) |
302 | 13: fldl 4(%esp) // load x == NaN |
303 | ret |
304 | |
305 | cfi_adjust_cfa_offset (8) |
306 | .align ALIGNARG(4) |
307 | // x is ±inf |
308 | 15: fstp %st(0) // y |
309 | testb $2, %dh |
310 | jz 16f // jump if x == +inf |
311 | |
312 | // fistpll raises invalid exception for |y| >= 1L<<63, so test |
313 | // that (in which case y is certainly even) before testing |
314 | // whether y is odd. |
315 | fld %st // y : y |
316 | fabs // |y| : y |
317 | fcompl MO(p63) // y |
318 | fnstsw |
319 | sahf |
320 | jnc 16f |
321 | |
322 | // We must find out whether y is an odd integer. |
323 | fld %st // y : y |
324 | fistpll (%esp) // y |
325 | fildll (%esp) // int(y) : y |
326 | fucompp // <empty> |
327 | fnstsw |
328 | sahf |
329 | jne 17f |
330 | |
331 | // OK, the value is an integer. |
332 | popl %eax |
333 | cfi_adjust_cfa_offset (-4) |
334 | popl %edx |
335 | cfi_adjust_cfa_offset (-4) |
336 | andb $1, %al |
337 | jz 18f // jump if not odd |
338 | // It's an odd integer. |
339 | shrl $31, %edx |
340 | fldl MOX(minf_mzero, %edx, 8) |
341 | ret |
342 | |
343 | cfi_adjust_cfa_offset (8) |
344 | .align ALIGNARG(4) |
345 | 16: fcompl MO(zero) |
346 | addl $8, %esp |
347 | cfi_adjust_cfa_offset (-8) |
348 | fnstsw |
349 | shrl $5, %eax |
350 | andl $8, %eax |
351 | fldl MOX(inf_zero, %eax, 1) |
352 | ret |
353 | |
354 | cfi_adjust_cfa_offset (8) |
355 | .align ALIGNARG(4) |
356 | 17: shll $30, %edx // sign bit for y in right position |
357 | addl $8, %esp |
358 | cfi_adjust_cfa_offset (-8) |
359 | 18: shrl $31, %edx |
360 | fldl MOX(inf_zero, %edx, 8) |
361 | ret |
362 | |
363 | cfi_adjust_cfa_offset (8) |
364 | .align ALIGNARG(4) |
365 | // x is ±0 |
366 | 20: fstp %st(0) // y |
367 | testb $2, %dl |
368 | jz 21f // y > 0 |
369 | |
370 | // x is ±0 and y is < 0. We must find out whether y is an odd integer. |
371 | testb $2, %dh |
372 | jz 25f |
373 | |
374 | // fistpll raises invalid exception for |y| >= 1L<<63, so test |
375 | // that (in which case y is certainly even) before testing |
376 | // whether y is odd. |
377 | fld %st // y : y |
378 | fabs // |y| : y |
379 | fcompl MO(p63) // y |
380 | fnstsw |
381 | sahf |
382 | jnc 25f |
383 | |
384 | fld %st // y : y |
385 | fistpll (%esp) // y |
386 | fildll (%esp) // int(y) : y |
387 | fucompp // <empty> |
388 | fnstsw |
389 | sahf |
390 | jne 26f |
391 | |
392 | // OK, the value is an integer. |
393 | popl %eax |
394 | cfi_adjust_cfa_offset (-4) |
395 | popl %edx |
396 | cfi_adjust_cfa_offset (-4) |
397 | andb $1, %al |
398 | jz 27f // jump if not odd |
399 | // It's an odd integer. |
400 | // Raise divide-by-zero exception and get minus infinity value. |
401 | fldl MO(one) |
402 | fdivl MO(zero) |
403 | fchs |
404 | ret |
405 | |
406 | cfi_adjust_cfa_offset (8) |
407 | 25: fstp %st(0) |
408 | 26: addl $8, %esp |
409 | cfi_adjust_cfa_offset (-8) |
410 | 27: // Raise divide-by-zero exception and get infinity value. |
411 | fldl MO(one) |
412 | fdivl MO(zero) |
413 | ret |
414 | |
415 | cfi_adjust_cfa_offset (8) |
416 | .align ALIGNARG(4) |
417 | // x is ±0 and y is > 0. We must find out whether y is an odd integer. |
418 | 21: testb $2, %dh |
419 | jz 22f |
420 | |
421 | // fistpll raises invalid exception for |y| >= 1L<<63, so test |
422 | // that (in which case y is certainly even) before testing |
423 | // whether y is odd. |
424 | fcoml MO(p63) // y |
425 | fnstsw |
426 | sahf |
427 | jnc 22f |
428 | |
429 | fld %st // y : y |
430 | fistpll (%esp) // y |
431 | fildll (%esp) // int(y) : y |
432 | fucompp // <empty> |
433 | fnstsw |
434 | sahf |
435 | jne 23f |
436 | |
437 | // OK, the value is an integer. |
438 | popl %eax |
439 | cfi_adjust_cfa_offset (-4) |
440 | popl %edx |
441 | cfi_adjust_cfa_offset (-4) |
442 | andb $1, %al |
443 | jz 24f // jump if not odd |
444 | // It's an odd integer. |
445 | fldl MO(mzero) |
446 | ret |
447 | |
448 | cfi_adjust_cfa_offset (8) |
449 | 22: fstp %st(0) |
450 | 23: addl $8, %esp // Don't use 2 x pop |
451 | cfi_adjust_cfa_offset (-8) |
452 | 24: fldl MO(zero) |
453 | ret |
454 | |
455 | END(__ieee754_pow) |
456 | libm_alias_finite (__ieee754_pow, __pow) |
457 | |