1/* Compute cubic root of long double value.
2 Copyright (C) 1997-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <libm-alias-ldouble.h>
20#include <machine/asm.h>
21
22 .section .rodata
23
24 .align ALIGNARG(4)
25 .type f8,@object
26f8: .tfloat 0.161617097923756032
27 ASM_SIZE_DIRECTIVE(f8)
28 .align ALIGNARG(4)
29 .type f7,@object
30f7: .tfloat -0.988553671195413709
31 ASM_SIZE_DIRECTIVE(f7)
32 .align ALIGNARG(4)
33 .type f6,@object
34f6: .tfloat 2.65298938441952296
35 ASM_SIZE_DIRECTIVE(f6)
36 .align ALIGNARG(4)
37 .type f5,@object
38f5: .tfloat -4.11151425200350531
39 ASM_SIZE_DIRECTIVE(f5)
40 .align ALIGNARG(4)
41 .type f4,@object
42f4: .tfloat 4.09559907378707839
43 ASM_SIZE_DIRECTIVE(f4)
44 .align ALIGNARG(4)
45 .type f3,@object
46f3: .tfloat -2.82414939754975962
47 ASM_SIZE_DIRECTIVE(f3)
48 .align ALIGNARG(4)
49 .type f2,@object
50f2: .tfloat 1.67595307700780102
51 ASM_SIZE_DIRECTIVE(f2)
52 .align ALIGNARG(4)
53 .type f1,@object
54f1: .tfloat 0.338058687610520237
55 ASM_SIZE_DIRECTIVE(f1)
56
57#define CBRT2 1.2599210498948731648
58#define ONE_CBRT2 0.793700525984099737355196796584
59#define SQR_CBRT2 1.5874010519681994748
60#define ONE_SQR_CBRT2 0.629960524947436582364439673883
61
62 /* We make the entries in the following table all 16 bytes
63 wide to avoid having to implement a multiplication by 10. */
64 .type factor,@object
65 .align ALIGNARG(4)
66factor: .tfloat ONE_SQR_CBRT2
67 .byte 0, 0, 0, 0, 0, 0
68 .tfloat ONE_CBRT2
69 .byte 0, 0, 0, 0, 0, 0
70 .tfloat 1.0
71 .byte 0, 0, 0, 0, 0, 0
72 .tfloat CBRT2
73 .byte 0, 0, 0, 0, 0, 0
74 .tfloat SQR_CBRT2
75 ASM_SIZE_DIRECTIVE(factor)
76
77 .type two64,@object
78 .align ALIGNARG(4)
79two64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
80 ASM_SIZE_DIRECTIVE(two64)
81
82#ifdef PIC
83#define MO(op) op##@GOTOFF(%ebx)
84#define MOX(op,x) op##@GOTOFF(%ebx,x,1)
85#else
86#define MO(op) op
87#define MOX(op,x) op(x)
88#endif
89
90 .text
91ENTRY(__cbrtl)
92 movl 4(%esp), %ecx
93 movl 12(%esp), %eax
94 orl 8(%esp), %ecx
95 movl %eax, %edx
96 andl $0x7fff, %eax
97 orl %eax, %ecx
98 jz 1f
99 xorl %ecx, %ecx
100 cmpl $0x7fff, %eax
101 je 1f
102
103#ifdef PIC
104 pushl %ebx
105 cfi_adjust_cfa_offset (4)
106 cfi_rel_offset (ebx, 0)
107 LOAD_PIC_REG (bx)
108#endif
109
110 cmpl $0, %eax
111 jne 2f
112
113#ifdef PIC
114 fldt 8(%esp)
115#else
116 fldt 4(%esp)
117#endif
118 fmull MO(two64)
119 movl $-64, %ecx
120#ifdef PIC
121 fstpt 8(%esp)
122 movl 16(%esp), %eax
123#else
124 fstpt 4(%esp)
125 movl 12(%esp), %eax
126#endif
127 movl %eax, %edx
128 andl $0x7fff, %eax
129
1302: andl $0x8000, %edx
131 subl $16382, %eax
132 orl $0x3ffe, %edx
133 addl %eax, %ecx
134#ifdef PIC
135 movl %edx, 16(%esp)
136
137 fldt 8(%esp) /* xm */
138#else
139 movl %edx, 12(%esp)
140
141 fldt 4(%esp) /* xm */
142#endif
143 fabs
144
145 /* The following code has two tracks:
146 a) compute the normalized cbrt value
147 b) compute xe/3 and xe%3
148 The right track computes the value for b) and this is done
149 in an optimized way by avoiding division.
150
151 But why two tracks at all? Very easy: efficiency. Some FP
152 instruction can overlap with a certain amount of integer (and
153 FP) instructions. So we get (except for the imull) all
154 instructions for free. */
155
156 fldt MO(f8) /* f8 : xm */
157 fmul %st(1) /* f8*xm : xm */
158
159 fldt MO(f7)
160 faddp /* f7+f8*xm : xm */
161 fmul %st(1) /* (f7+f8*xm)*xm : xm */
162 movl $1431655766, %eax
163 fldt MO(f6)
164 faddp /* f6+(f7+f8*xm)*xm : xm */
165 imull %ecx
166 fmul %st(1) /* (f6+(f7+f8*xm)*xm)*xm : xm */
167 movl %ecx, %eax
168 fldt MO(f5)
169 faddp /* f5+(f6+(f7+f8*xm)*xm)*xm : xm */
170 sarl $31, %eax
171 fmul %st(1) /* (f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */
172 subl %eax, %edx
173 fldt MO(f4)
174 faddp /* f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */
175 fmul %st(1) /* (f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */
176 fldt MO(f3)
177 faddp /* f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */
178 fmul %st(1) /* (f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */
179 fldt MO(f2)
180 faddp /* f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */
181 fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */
182 fldt MO(f1)
183 faddp /* u:=f1+(f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */
184
185 fld %st /* u : u : xm */
186 fmul %st(1) /* u*u : u : xm */
187 fld %st(2) /* xm : u*u : u : xm */
188 fadd %st /* 2*xm : u*u : u : xm */
189 fxch %st(1) /* u*u : 2*xm : u : xm */
190 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */
191 movl %edx, %eax
192 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */
193 leal (%edx,%edx,2),%edx
194 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */
195 subl %edx, %ecx
196 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */
197 shll $4, %ecx
198 fmulp /* u*(t2+2*xm) : 2*t2+xm */
199 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */
200 fldt MOX(32+factor,%ecx)
201 fmulp /* u*(t2+2*xm)/(2*t2+xm)*FACT */
202 pushl %eax
203 cfi_adjust_cfa_offset (4)
204 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
205 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
206 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
207 popl %edx
208 cfi_adjust_cfa_offset (-4)
209#ifdef PIC
210 movl 16(%esp), %eax
211 popl %ebx
212 cfi_adjust_cfa_offset (-4)
213 cfi_restore (ebx)
214#else
215 movl 12(%esp), %eax
216#endif
217 testl $0x8000, %eax
218 fstp %st(1)
219 jz 4f
220 fchs
2214: ret
222
223 /* Return the argument. */
2241: fldt 4(%esp)
225 fadd %st
226 ret
227END(__cbrtl)
228libm_alias_ldouble (__cbrt, cbrt)
229

source code of glibc/sysdeps/i386/fpu/s_cbrtl.S