1/* Compute cubic root of float value.
2 Copyright (C) 1997-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <machine/asm.h>
20#include <libm-alias-float.h>
21
22 .section .rodata
23
24 .align ALIGNARG(4)
25 .type f3,@object
26f3: .double 0.191502161678719066
27 ASM_SIZE_DIRECTIVE(f3)
28 .type f2,@object
29f2: .double 0.697570460207922770
30 ASM_SIZE_DIRECTIVE(f2)
31 .type f1,@object
32f1: .double 0.492659620528969547
33 ASM_SIZE_DIRECTIVE(f1)
34
35#define CBRT2 1.2599210498948731648
36#define ONE_CBRT2 0.793700525984099737355196796584
37#define SQR_CBRT2 1.5874010519681994748
38#define ONE_SQR_CBRT2 0.629960524947436582364439673883
39
40 .type factor,@object
41 .align ALIGNARG(4)
42factor: .double ONE_SQR_CBRT2
43 .double ONE_CBRT2
44 .double 1.0
45 .double CBRT2
46 .double SQR_CBRT2
47 ASM_SIZE_DIRECTIVE(factor)
48
49 .type two25,@object
50two25: .byte 0, 0, 0, 0x4c
51 ASM_SIZE_DIRECTIVE(two25)
52
53#ifdef PIC
54#define MO(op) op##@GOTOFF(%ebx)
55#define MOX(op,x) op##@GOTOFF(%ebx,x,1)
56#else
57#define MO(op) op
58#define MOX(op,x) op(x)
59#endif
60
61 .text
62ENTRY(__cbrtf)
63 movl 4(%esp), %eax
64 xorl %ecx, %ecx
65 movl %eax, %edx
66 andl $0x7fffffff, %eax
67 jz 1f
68 cmpl $0x7f800000, %eax
69 jae 1f
70
71#ifdef PIC
72 pushl %ebx
73 cfi_adjust_cfa_offset (4)
74 cfi_rel_offset (ebx, 0)
75 LOAD_PIC_REG (bx)
76#endif
77
78 cmpl $0x00800000, %eax
79 jae 2f
80
81#ifdef PIC
82 flds 8(%esp)
83#else
84 flds 4(%esp)
85#endif
86 fmuls MO(two25)
87 movl $-25, %ecx
88#ifdef PIC
89 fstps 8(%esp)
90 movl 8(%esp), %eax
91#else
92 fstps 4(%esp)
93 movl 4(%esp), %eax
94#endif
95 movl %eax, %edx
96 andl $0x7fffffff, %eax
97
982: shrl $23, %eax
99 andl $0x807fffff, %edx
100 subl $126, %eax
101 orl $0x3f000000, %edx
102 addl %eax, %ecx
103#ifdef PIC
104 movl %edx, 8(%esp)
105
106 flds 8(%esp) /* xm */
107#else
108 movl %edx, 4(%esp)
109
110 flds 4(%esp) /* xm */
111#endif
112 fabs
113
114 /* The following code has two tracks:
115 a) compute the normalized cbrt value
116 b) compute xe/3 and xe%3
117 The right track computes the value for b) and this is done
118 in an optimized way by avoiding division.
119
120 But why two tracks at all? Very easy: efficiency. Some FP
121 instruction can overlap with a certain amount of integer (and
122 FP) instructions. So we get (except for the imull) all
123 instructions for free. */
124
125 fld %st(0) /* xm : xm */
126 fmull MO(f3) /* f3*xm : xm */
127 movl $1431655766, %eax
128 fsubrl MO(f2) /* f2-f3*xm : xm */
129 imull %ecx
130 fmul %st(1) /* (f2-f3*xm)*xm : xm */
131 movl %ecx, %eax
132 faddl MO(f1) /* u:=f1+(f2-f3*xm)*xm : xm */
133 sarl $31, %eax
134 fld %st /* u : u : xm */
135 subl %eax, %edx
136 fmul %st(1) /* u*u : u : xm */
137 fld %st(2) /* xm : u*u : u : xm */
138 fadd %st /* 2*xm : u*u : u : xm */
139 fxch %st(1) /* u*u : 2*xm : u : xm */
140 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
141 movl %edx, %eax
142 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
143 leal (%edx,%edx,2),%edx
144 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
145 subl %edx, %ecx
146 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
147 shll $3, %ecx
148 fmulp /* u*(t2+2*xm) : 2*t2+xm */
149 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
150 fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */
151 pushl %eax
152 cfi_adjust_cfa_offset (4)
153 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
154 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
155 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
156 popl %edx
157 cfi_adjust_cfa_offset (-4)
158#ifdef PIC
159 movl 8(%esp), %eax
160 popl %ebx
161 cfi_adjust_cfa_offset (-4)
162 cfi_restore (ebx)
163#else
164 movl 4(%esp), %eax
165#endif
166 testl %eax, %eax
167 fstp %st(1)
168 jns 4f
169 fchs
1704: ret
171
172 /* Return the argument. */
1731: flds 4(%esp)
174 ret
175END(__cbrtf)
176libm_alias_float (__cbrt, cbrt)
177

source code of glibc/sysdeps/i386/fpu/s_cbrtf.S