1/* Compute cubic root of double value.
2 Copyright (C) 1997-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <machine/asm.h>
20#include <libm-alias-double.h>
21
22 .section .rodata
23
24 .align ALIGNARG(4)
25 .type f7,@object
26f7: .double -0.145263899385486377
27 ASM_SIZE_DIRECTIVE(f7)
28 .type f6,@object
29f6: .double 0.784932344976639262
30 ASM_SIZE_DIRECTIVE(f6)
31 .type f5,@object
32f5: .double -1.83469277483613086
33 ASM_SIZE_DIRECTIVE(f5)
34 .type f4,@object
35f4: .double 2.44693122563534430
36 ASM_SIZE_DIRECTIVE(f4)
37 .type f3,@object
38f3: .double -2.11499494167371287
39 ASM_SIZE_DIRECTIVE(f3)
40 .type f2,@object
41f2: .double 1.50819193781584896
42 ASM_SIZE_DIRECTIVE(f2)
43 .type f1,@object
44f1: .double 0.354895765043919860
45 ASM_SIZE_DIRECTIVE(f1)
46
47#define CBRT2 1.2599210498948731648
48#define ONE_CBRT2 0.793700525984099737355196796584
49#define SQR_CBRT2 1.5874010519681994748
50#define ONE_SQR_CBRT2 0.629960524947436582364439673883
51
52 .type factor,@object
53factor: .double ONE_SQR_CBRT2
54 .double ONE_CBRT2
55 .double 1.0
56 .double CBRT2
57 .double SQR_CBRT2
58 ASM_SIZE_DIRECTIVE(factor)
59
60 .type two54,@object
61two54: .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43
62 ASM_SIZE_DIRECTIVE(two54)
63
64#ifdef PIC
65#define MO(op) op##@GOTOFF(%ebx)
66#define MOX(op,x) op##@GOTOFF(%ebx,x,1)
67#else
68#define MO(op) op
69#define MOX(op,x) op(x)
70#endif
71
72 .text
73ENTRY(__cbrt)
74 movl 4(%esp), %ecx
75 movl 8(%esp), %eax
76 movl %eax, %edx
77 andl $0x7fffffff, %eax
78 orl %eax, %ecx
79 jz 1f
80 xorl %ecx, %ecx
81 cmpl $0x7ff00000, %eax
82 jae 1f
83
84#ifdef PIC
85 pushl %ebx
86 cfi_adjust_cfa_offset (4)
87 cfi_rel_offset (ebx, 0)
88 LOAD_PIC_REG (bx)
89#endif
90
91 cmpl $0x00100000, %eax
92 jae 2f
93
94#ifdef PIC
95 fldl 8(%esp)
96#else
97 fldl 4(%esp)
98#endif
99 fmull MO(two54)
100 movl $-54, %ecx
101#ifdef PIC
102 fstpl 8(%esp)
103 movl 12(%esp), %eax
104#else
105 fstpl 4(%esp)
106 movl 8(%esp), %eax
107#endif
108 movl %eax, %edx
109 andl $0x7fffffff, %eax
110
1112: shrl $20, %eax
112 andl $0x800fffff, %edx
113 subl $1022, %eax
114 orl $0x3fe00000, %edx
115 addl %eax, %ecx
116#ifdef PIC
117 movl %edx, 12(%esp)
118
119 fldl 8(%esp) /* xm */
120#else
121 movl %edx, 8(%esp)
122
123 fldl 4(%esp) /* xm */
124#endif
125 fabs
126
127 /* The following code has two tracks:
128 a) compute the normalized cbrt value
129 b) compute xe/3 and xe%3
130 The right track computes the value for b) and this is done
131 in an optimized way by avoiding division.
132
133 But why two tracks at all? Very easy: efficiency. Some FP
134 instruction can overlap with a certain amount of integer (and
135 FP) instructions. So we get (except for the imull) all
136 instructions for free. */
137
138 fld %st(0) /* xm : xm */
139
140 fmull MO(f7) /* f7*xm : xm */
141 movl $1431655766, %eax
142 faddl MO(f6) /* f6+f7*xm : xm */
143 imull %ecx
144 fmul %st(1) /* (f6+f7*xm)*xm : xm */
145 movl %ecx, %eax
146 faddl MO(f5) /* f5+(f6+f7*xm)*xm : xm */
147 sarl $31, %eax
148 fmul %st(1) /* (f5+(f6+f7*xm)*xm)*xm : xm */
149 subl %eax, %edx
150 faddl MO(f4) /* f4+(f5+(f6+f7*xm)*xm)*xm : xm */
151 fmul %st(1) /* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
152 faddl MO(f3) /* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
153 fmul %st(1) /* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
154 faddl MO(f2) /* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
155 fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
156 faddl MO(f1) /* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
157
158 fld %st /* u : u : xm */
159 fmul %st(1) /* u*u : u : xm */
160 fld %st(2) /* xm : u*u : u : xm */
161 fadd %st /* 2*xm : u*u : u : xm */
162 fxch %st(1) /* u*u : 2*xm : u : xm */
163 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
164 movl %edx, %eax
165 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
166 leal (%edx,%edx,2),%edx
167 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
168 subl %edx, %ecx
169 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
170 shll $3, %ecx
171 fmulp /* u*(t2+2*xm) : 2*t2+xm */
172 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
173 fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */
174 pushl %eax
175 cfi_adjust_cfa_offset (4)
176 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
177 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
178 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
179 popl %edx
180 cfi_adjust_cfa_offset (-4)
181#ifdef PIC
182 movl 12(%esp), %eax
183 popl %ebx
184 cfi_adjust_cfa_offset (-4)
185 cfi_restore (ebx)
186#else
187 movl 8(%esp), %eax
188#endif
189 testl %eax, %eax
190 fstp %st(1)
191 jns 4f
192 fchs
1934: ret
194
195 /* Return the argument. */
1961: fldl 4(%esp)
197 ret
198END(__cbrt)
199libm_alias_double (__cbrt, cbrt)
200

source code of glibc/sysdeps/i386/fpu/s_cbrt.S