1 | /* Function atan2 vectorized with AVX2. |
2 | Copyright (C) 2021-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | https://www.gnu.org/licenses/. */ |
18 | |
19 | /* |
20 | * ALGORITHM DESCRIPTION: |
21 | * For 0.0 <= x <= 7.0/16.0: atan(x) = atan(0.0) + atan(s), where s=(x-0.0)/(1.0+0.0*x) |
22 | * For 7.0/16.0 <= x <= 11.0/16.0: atan(x) = atan(0.5) + atan(s), where s=(x-0.5)/(1.0+0.5*x) |
23 | * For 11.0/16.0 <= x <= 19.0/16.0: atan(x) = atan(1.0) + atan(s), where s=(x-1.0)/(1.0+1.0*x) |
24 | * For 19.0/16.0 <= x <= 39.0/16.0: atan(x) = atan(1.5) + atan(s), where s=(x-1.5)/(1.0+1.5*x) |
25 | * For 39.0/16.0 <= x <= inf : atan(x) = atan(inf) + atan(s), where s=-1.0/x |
26 | * Where atan(s) ~= s+s^3*Poly11(s^2) on interval |s|<7.0/0.16. |
27 | * |
28 | * |
29 | */ |
30 | |
31 | /* Offsets for data table __svml_datan2_data_internal |
32 | */ |
33 | #define dPI 0 |
34 | #define dPIO2 32 |
35 | #define dA19 64 |
36 | #define dA18 96 |
37 | #define dA17 128 |
38 | #define dA16 160 |
39 | #define dA15 192 |
40 | #define dA14 224 |
41 | #define dA13 256 |
42 | #define dA12 288 |
43 | #define dA11 320 |
44 | #define dA10 352 |
45 | #define dA09 384 |
46 | #define dA08 416 |
47 | #define dA07 448 |
48 | #define dA06 480 |
49 | #define dA05 512 |
50 | #define dA04 544 |
51 | #define dA03 576 |
52 | #define dA02 608 |
53 | #define dA01 640 |
54 | #define dA00 672 |
55 | #define dSIGN_MASK 704 |
56 | #define iCHK_WORK_SUB 736 |
57 | #define iCHK_WORK_CMP 768 |
58 | #define dABS_MASK 800 |
59 | #define dZERO 832 |
60 | |
61 | #include <sysdep.h> |
62 | |
63 | .section .text.avx2, "ax" , @progbits |
64 | ENTRY(_ZGVdN4vv_atan2_avx2) |
65 | pushq %rbp |
66 | cfi_def_cfa_offset(16) |
67 | movq %rsp, %rbp |
68 | cfi_def_cfa(6, 16) |
69 | cfi_offset(6, -16) |
70 | andq $-32, %rsp |
71 | subq $128, %rsp |
72 | xorl %edx, %edx |
73 | |
74 | /* |
75 | * #define NO_VECTOR_ZERO_ATAN2_ARGS |
76 | * Declarations |
77 | * Variables |
78 | * Constants |
79 | * The end of declarations |
80 | * Implementation |
81 | * Get r0~=1/B |
82 | * Cannot be replaced by VQRCP(D, dR0, dB); |
83 | * Argument Absolute values |
84 | */ |
85 | vmovupd dABS_MASK+__svml_datan2_data_internal(%rip), %ymm5 |
86 | |
87 | /* Argument signs */ |
88 | vmovupd dSIGN_MASK+__svml_datan2_data_internal(%rip), %ymm4 |
89 | vmovups iCHK_WORK_SUB+__svml_datan2_data_internal(%rip), %xmm13 |
90 | vmovupd %ymm0, (%rsp) |
91 | vmovapd %ymm1, %ymm8 |
92 | vandpd %ymm5, %ymm8, %ymm2 |
93 | vandpd %ymm5, %ymm0, %ymm1 |
94 | vcmpnlt_uqpd %ymm2, %ymm1, %ymm15 |
95 | |
96 | /* |
97 | * 1) If y<x then a= y, b=x, PIO2=0 |
98 | * 2) If y>x then a=-x, b=y, PIO2=Pi/2 |
99 | */ |
100 | vorpd %ymm4, %ymm2, %ymm6 |
101 | vblendvpd %ymm15, %ymm6, %ymm1, %ymm3 |
102 | vblendvpd %ymm15, %ymm1, %ymm2, %ymm6 |
103 | vdivpd %ymm6, %ymm3, %ymm14 |
104 | vmovups iCHK_WORK_CMP+__svml_datan2_data_internal(%rip), %xmm3 |
105 | vmovupd %ymm6, 32(%rsp) |
106 | vandpd %ymm4, %ymm0, %ymm7 |
107 | vandpd %ymm4, %ymm8, %ymm5 |
108 | vandpd dPIO2+__svml_datan2_data_internal(%rip), %ymm15, %ymm4 |
109 | |
110 | /* Check if y and x are on main path. */ |
111 | vextractf128 $1, %ymm2, %xmm9 |
112 | vextractf128 $1, %ymm1, %xmm10 |
113 | vshufps $221, %xmm9, %xmm2, %xmm11 |
114 | vshufps $221, %xmm10, %xmm1, %xmm12 |
115 | vpsubd %xmm13, %xmm11, %xmm0 |
116 | vpsubd %xmm13, %xmm12, %xmm9 |
117 | vpcmpgtd %xmm3, %xmm0, %xmm15 |
118 | vpcmpeqd %xmm3, %xmm0, %xmm6 |
119 | vpcmpgtd %xmm3, %xmm9, %xmm10 |
120 | vpcmpeqd %xmm3, %xmm9, %xmm3 |
121 | vpor %xmm6, %xmm15, %xmm11 |
122 | vpor %xmm3, %xmm10, %xmm12 |
123 | |
124 | /* Polynomial. */ |
125 | vmulpd %ymm14, %ymm14, %ymm10 |
126 | vpor %xmm12, %xmm11, %xmm3 |
127 | vmovupd dA18+__svml_datan2_data_internal(%rip), %ymm9 |
128 | vmovupd dA17+__svml_datan2_data_internal(%rip), %ymm12 |
129 | vmovupd dA16+__svml_datan2_data_internal(%rip), %ymm15 |
130 | vmulpd %ymm10, %ymm10, %ymm11 |
131 | |
132 | /* if x<0, dPI = Pi, else dPI =0 */ |
133 | vcmple_oqpd dZERO+__svml_datan2_data_internal(%rip), %ymm8, %ymm13 |
134 | vmovmskps %xmm3, %eax |
135 | vmulpd %ymm11, %ymm11, %ymm0 |
136 | vandpd __svml_datan2_data_internal(%rip), %ymm13, %ymm6 |
137 | vmovupd dA19+__svml_datan2_data_internal(%rip), %ymm13 |
138 | vfmadd213pd dA14+__svml_datan2_data_internal(%rip), %ymm0, %ymm9 |
139 | vfmadd213pd dA13+__svml_datan2_data_internal(%rip), %ymm0, %ymm12 |
140 | vfmadd213pd dA12+__svml_datan2_data_internal(%rip), %ymm0, %ymm15 |
141 | vfmadd213pd dA15+__svml_datan2_data_internal(%rip), %ymm0, %ymm13 |
142 | vfmadd213pd dA10+__svml_datan2_data_internal(%rip), %ymm0, %ymm9 |
143 | vfmadd213pd dA09+__svml_datan2_data_internal(%rip), %ymm0, %ymm12 |
144 | vfmadd213pd dA08+__svml_datan2_data_internal(%rip), %ymm0, %ymm15 |
145 | vfmadd213pd dA11+__svml_datan2_data_internal(%rip), %ymm0, %ymm13 |
146 | vfmadd213pd dA06+__svml_datan2_data_internal(%rip), %ymm0, %ymm9 |
147 | vfmadd213pd dA05+__svml_datan2_data_internal(%rip), %ymm0, %ymm12 |
148 | vfmadd213pd dA04+__svml_datan2_data_internal(%rip), %ymm0, %ymm15 |
149 | vfmadd213pd dA07+__svml_datan2_data_internal(%rip), %ymm0, %ymm13 |
150 | vfmadd213pd dA02+__svml_datan2_data_internal(%rip), %ymm0, %ymm9 |
151 | vfmadd213pd dA01+__svml_datan2_data_internal(%rip), %ymm0, %ymm12 |
152 | vfmadd213pd dA03+__svml_datan2_data_internal(%rip), %ymm0, %ymm13 |
153 | |
154 | /* A00=1.0, account for it later VQFMA(D, dP4, dP4, dR8, dA00); */ |
155 | vmulpd %ymm15, %ymm0, %ymm0 |
156 | vfmadd213pd %ymm9, %ymm10, %ymm13 |
157 | vfmadd213pd %ymm0, %ymm10, %ymm12 |
158 | vfmadd213pd %ymm12, %ymm11, %ymm13 |
159 | |
160 | /* |
161 | * Reconstruction. |
162 | * dP=(R+R*dP) + dPIO2 |
163 | */ |
164 | vfmadd213pd %ymm14, %ymm14, %ymm13 |
165 | vaddpd %ymm13, %ymm4, %ymm14 |
166 | vorpd %ymm5, %ymm14, %ymm0 |
167 | vaddpd %ymm0, %ymm6, %ymm9 |
168 | vorpd %ymm7, %ymm9, %ymm0 |
169 | |
170 | /* Special branch for fast (vector) processing of zero arguments */ |
171 | testl %eax, %eax |
172 | |
173 | /* Go to auxiliary branch */ |
174 | jne L(AUX_BRANCH) |
175 | # LOE rbx r12 r13 r14 r15 edx xmm3 ymm0 ymm1 ymm2 ymm4 ymm5 ymm6 ymm7 ymm8 |
176 | |
177 | /* Return from auxiliary branch |
178 | * for out of main path inputs |
179 | */ |
180 | |
181 | L(AUX_BRANCH_RETURN): |
182 | /* |
183 | * Special branch for fast (vector) processing of zero arguments |
184 | * The end of implementation |
185 | */ |
186 | testl %edx, %edx |
187 | |
188 | /* Go to special inputs processing branch */ |
189 | jne L(SPECIAL_VALUES_BRANCH) |
190 | # LOE rbx r12 r13 r14 r15 edx ymm0 ymm8 |
191 | |
192 | /* Restore registers |
193 | * and exit the function |
194 | */ |
195 | |
196 | L(EXIT): |
197 | movq %rbp, %rsp |
198 | popq %rbp |
199 | cfi_def_cfa(7, 8) |
200 | cfi_restore(6) |
201 | ret |
202 | cfi_def_cfa(6, 16) |
203 | cfi_offset(6, -16) |
204 | |
205 | /* Branch to process |
206 | * special inputs |
207 | */ |
208 | |
209 | L(SPECIAL_VALUES_BRANCH): |
210 | vmovupd (%rsp), %ymm1 |
211 | vmovupd %ymm8, 64(%rsp) |
212 | vmovupd %ymm0, 96(%rsp) |
213 | vmovupd %ymm1, 32(%rsp) |
214 | # LOE rbx r12 r13 r14 r15 edx ymm0 |
215 | |
216 | xorl %eax, %eax |
217 | # LOE rbx r12 r13 r14 r15 eax edx |
218 | |
219 | vzeroupper |
220 | movq %r12, 16(%rsp) |
221 | /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -112; DW_OP_plus) */ |
222 | .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x90, 0xff, 0xff, 0xff, 0x22 |
223 | movl %eax, %r12d |
224 | movq %r13, 8(%rsp) |
225 | /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -120; DW_OP_plus) */ |
226 | .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x88, 0xff, 0xff, 0xff, 0x22 |
227 | movl %edx, %r13d |
228 | movq %r14, (%rsp) |
229 | /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -128; DW_OP_plus) */ |
230 | .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22 |
231 | # LOE rbx r15 r12d r13d |
232 | |
233 | /* Range mask |
234 | * bits check |
235 | */ |
236 | |
237 | L(RANGEMASK_CHECK): |
238 | btl %r12d, %r13d |
239 | |
240 | /* Call scalar math function */ |
241 | jc L(SCALAR_MATH_CALL) |
242 | # LOE rbx r15 r12d r13d |
243 | |
244 | /* Special inputs |
245 | * processing loop |
246 | */ |
247 | |
248 | L(SPECIAL_VALUES_LOOP): |
249 | incl %r12d |
250 | cmpl $4, %r12d |
251 | |
252 | /* Check bits in range mask */ |
253 | jl L(RANGEMASK_CHECK) |
254 | # LOE rbx r15 r12d r13d |
255 | |
256 | movq 16(%rsp), %r12 |
257 | cfi_restore(12) |
258 | movq 8(%rsp), %r13 |
259 | cfi_restore(13) |
260 | movq (%rsp), %r14 |
261 | cfi_restore(14) |
262 | vmovupd 96(%rsp), %ymm0 |
263 | |
264 | /* Go to exit */ |
265 | jmp L(EXIT) |
266 | /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -112; DW_OP_plus) */ |
267 | .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x90, 0xff, 0xff, 0xff, 0x22 |
268 | /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -120; DW_OP_plus) */ |
269 | .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x88, 0xff, 0xff, 0xff, 0x22 |
270 | /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -128; DW_OP_plus) */ |
271 | .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x80, 0xff, 0xff, 0xff, 0x22 |
272 | # LOE rbx r12 r13 r14 r15 ymm0 |
273 | |
274 | /* Scalar math function call |
275 | * to process special input |
276 | */ |
277 | |
278 | L(SCALAR_MATH_CALL): |
279 | movl %r12d, %r14d |
280 | vmovsd 32(%rsp, %r14, 8), %xmm0 |
281 | vmovsd 64(%rsp, %r14, 8), %xmm1 |
282 | call atan2@PLT |
283 | # LOE rbx r14 r15 r12d r13d xmm0 |
284 | |
285 | vmovsd %xmm0, 96(%rsp, %r14, 8) |
286 | |
287 | /* Process special inputs in loop */ |
288 | jmp L(SPECIAL_VALUES_LOOP) |
289 | cfi_restore(12) |
290 | cfi_restore(13) |
291 | cfi_restore(14) |
292 | # LOE rbx r15 r12d r13d |
293 | |
294 | /* Auxiliary branch |
295 | * for out of main path inputs |
296 | */ |
297 | |
298 | L(AUX_BRANCH): |
299 | vmovupd (%rsp), %ymm11 |
300 | |
301 | /* Check if at least on of Y or Y is zero: iAXAYZERO */ |
302 | vmovupd dZERO+__svml_datan2_data_internal(%rip), %ymm10 |
303 | |
304 | /* Check if both X & Y are not NaNs: iXYnotNAN */ |
305 | vcmpordpd %ymm8, %ymm8, %ymm12 |
306 | vcmpordpd %ymm11, %ymm11, %ymm13 |
307 | vcmpeqpd %ymm10, %ymm2, %ymm2 |
308 | vcmpeqpd %ymm10, %ymm1, %ymm1 |
309 | vandpd %ymm13, %ymm12, %ymm14 |
310 | vorpd %ymm1, %ymm2, %ymm2 |
311 | vextractf128 $1, %ymm14, %xmm15 |
312 | vextractf128 $1, %ymm2, %xmm11 |
313 | vshufps $221, %xmm15, %xmm14, %xmm9 |
314 | vshufps $221, %xmm11, %xmm2, %xmm12 |
315 | |
316 | /* |
317 | * Path for zero arguments (at least one of both) |
318 | * Check if both args are zeros (den. is zero) |
319 | */ |
320 | vcmpeqpd 32(%rsp), %ymm10, %ymm2 |
321 | |
322 | /* Check if at least on of Y or Y is zero and not NaN: iAXAYZEROnotNAN */ |
323 | vpand %xmm9, %xmm12, %xmm1 |
324 | |
325 | /* Exclude from previous callout mask zero (and not NaN) arguments */ |
326 | vpandn %xmm3, %xmm1, %xmm3 |
327 | |
328 | /* Go to callout */ |
329 | vmovmskps %xmm3, %edx |
330 | |
331 | /* Set sPIO2 to zero if den. is zero */ |
332 | vblendvpd %ymm2, %ymm10, %ymm4, %ymm4 |
333 | vorpd %ymm5, %ymm4, %ymm5 |
334 | |
335 | /* Res = sign(Y)*(X<0)?(PIO2+PI):PIO2 */ |
336 | vextractf128 $1, %ymm10, %xmm2 |
337 | vextractf128 $1, %ymm8, %xmm3 |
338 | vshufps $221, %xmm2, %xmm10, %xmm4 |
339 | vshufps $221, %xmm3, %xmm8, %xmm9 |
340 | vpcmpgtd %xmm9, %xmm4, %xmm12 |
341 | vpshufd $80, %xmm12, %xmm11 |
342 | vpshufd $250, %xmm12, %xmm13 |
343 | vinsertf128 $1, %xmm13, %ymm11, %ymm14 |
344 | vandpd %ymm6, %ymm14, %ymm6 |
345 | vaddpd %ymm6, %ymm5, %ymm2 |
346 | vorpd %ymm7, %ymm2, %ymm2 |
347 | |
348 | /* Merge results from main and spec path */ |
349 | vpshufd $80, %xmm1, %xmm7 |
350 | vpshufd $250, %xmm1, %xmm1 |
351 | vinsertf128 $1, %xmm1, %ymm7, %ymm3 |
352 | vblendvpd %ymm3, %ymm2, %ymm0, %ymm0 |
353 | |
354 | /* Return to main vector processing path */ |
355 | jmp L(AUX_BRANCH_RETURN) |
356 | # LOE rbx r12 r13 r14 r15 edx ymm0 ymm8 |
357 | END(_ZGVdN4vv_atan2_avx2) |
358 | |
359 | .section .rodata, "a" |
360 | .align 32 |
361 | |
362 | #ifdef __svml_datan2_data_internal_typedef |
363 | typedef unsigned int VUINT32; |
364 | typedef struct { |
365 | __declspec(align(32)) VUINT32 dPI[4][2]; |
366 | __declspec(align(32)) VUINT32 dPIO2[4][2]; |
367 | __declspec(align(32)) VUINT32 dA19[4][2]; |
368 | __declspec(align(32)) VUINT32 dA18[4][2]; |
369 | __declspec(align(32)) VUINT32 dA17[4][2]; |
370 | __declspec(align(32)) VUINT32 dA16[4][2]; |
371 | __declspec(align(32)) VUINT32 dA15[4][2]; |
372 | __declspec(align(32)) VUINT32 dA14[4][2]; |
373 | __declspec(align(32)) VUINT32 dA13[4][2]; |
374 | __declspec(align(32)) VUINT32 dA12[4][2]; |
375 | __declspec(align(32)) VUINT32 dA11[4][2]; |
376 | __declspec(align(32)) VUINT32 dA10[4][2]; |
377 | __declspec(align(32)) VUINT32 dA09[4][2]; |
378 | __declspec(align(32)) VUINT32 dA08[4][2]; |
379 | __declspec(align(32)) VUINT32 dA07[4][2]; |
380 | __declspec(align(32)) VUINT32 dA06[4][2]; |
381 | __declspec(align(32)) VUINT32 dA05[4][2]; |
382 | __declspec(align(32)) VUINT32 dA04[4][2]; |
383 | __declspec(align(32)) VUINT32 dA03[4][2]; |
384 | __declspec(align(32)) VUINT32 dA02[4][2]; |
385 | __declspec(align(32)) VUINT32 dA01[4][2]; |
386 | __declspec(align(32)) VUINT32 dA00[4][2]; |
387 | __declspec(align(32)) VUINT32 dSIGN_MASK[4][2]; |
388 | __declspec(align(32)) VUINT32 iCHK_WORK_SUB[8][1]; |
389 | __declspec(align(32)) VUINT32 iCHK_WORK_CMP[8][1]; |
390 | __declspec(align(32)) VUINT32 dABS_MASK[4][2]; |
391 | __declspec(align(32)) VUINT32 dZERO[4][2]; |
392 | } __svml_datan2_data_internal; |
393 | #endif |
394 | __svml_datan2_data_internal: |
395 | .quad 0x400921FB54442D18, 0x400921FB54442D18, 0x400921FB54442D18, 0x400921FB54442D18 // dPI |
396 | .align 32 |
397 | .quad 0x3FF921FB54442D18, 0x3FF921FB54442D18, 0x3FF921FB54442D18, 0x3FF921FB54442D18 // dPIO2 |
398 | .align 32 |
399 | .quad 0xBEF4FDB537ABC7A3, 0xBEF4FDB537ABC7A3, 0xBEF4FDB537ABC7A3, 0xBEF4FDB537ABC7A3 // dA19 |
400 | .align 32 |
401 | .quad 0x3F2CED0A36665209, 0x3F2CED0A36665209, 0x3F2CED0A36665209, 0x3F2CED0A36665209 // dA18 |
402 | .align 32 |
403 | .quad 0xBF52E67C93954C23, 0xBF52E67C93954C23, 0xBF52E67C93954C23, 0xBF52E67C93954C23 // dA17 |
404 | .align 32 |
405 | .quad 0x3F6F5A1DAE82AFB3, 0x3F6F5A1DAE82AFB3, 0x3F6F5A1DAE82AFB3, 0x3F6F5A1DAE82AFB3 // dA16 |
406 | .align 32 |
407 | .quad 0xBF82B2EC618E4BAD, 0xBF82B2EC618E4BAD, 0xBF82B2EC618E4BAD, 0xBF82B2EC618E4BAD // dA15 |
408 | .align 32 |
409 | .quad 0x3F914F4C661116A5, 0x3F914F4C661116A5, 0x3F914F4C661116A5, 0x3F914F4C661116A5 // dA14 |
410 | .align 32 |
411 | .quad 0xBF9A5E83B081F69C, 0xBF9A5E83B081F69C, 0xBF9A5E83B081F69C, 0xBF9A5E83B081F69C // dA13 |
412 | .align 32 |
413 | .quad 0x3FA169980CB6AD4F, 0x3FA169980CB6AD4F, 0x3FA169980CB6AD4F, 0x3FA169980CB6AD4F // dA12 |
414 | .align 32 |
415 | .quad 0xBFA4EFA2E563C1BC, 0xBFA4EFA2E563C1BC, 0xBFA4EFA2E563C1BC, 0xBFA4EFA2E563C1BC // dA11 |
416 | .align 32 |
417 | .quad 0x3FA7EC0FBC50683B, 0x3FA7EC0FBC50683B, 0x3FA7EC0FBC50683B, 0x3FA7EC0FBC50683B // dA10 |
418 | .align 32 |
419 | .quad 0xBFAAD261EAA09954, 0xBFAAD261EAA09954, 0xBFAAD261EAA09954, 0xBFAAD261EAA09954 // dA09 |
420 | .align 32 |
421 | .quad 0x3FAE1749BD612DCF, 0x3FAE1749BD612DCF, 0x3FAE1749BD612DCF, 0x3FAE1749BD612DCF // dA08 |
422 | .align 32 |
423 | .quad 0xBFB11084009435E0, 0xBFB11084009435E0, 0xBFB11084009435E0, 0xBFB11084009435E0 // dA07 |
424 | .align 32 |
425 | .quad 0x3FB3B12A49295651, 0x3FB3B12A49295651, 0x3FB3B12A49295651, 0x3FB3B12A49295651 // dA06 |
426 | .align 32 |
427 | .quad 0xBFB745D009BADA94, 0xBFB745D009BADA94, 0xBFB745D009BADA94, 0xBFB745D009BADA94 // dA05 |
428 | .align 32 |
429 | .quad 0x3FBC71C707F7D5B5, 0x3FBC71C707F7D5B5, 0x3FBC71C707F7D5B5, 0x3FBC71C707F7D5B5 // dA04 |
430 | .align 32 |
431 | .quad 0xBFC2492491EE55C7, 0xBFC2492491EE55C7, 0xBFC2492491EE55C7, 0xBFC2492491EE55C7 // dA03 |
432 | .align 32 |
433 | .quad 0x3FC999999997EE34, 0x3FC999999997EE34, 0x3FC999999997EE34, 0x3FC999999997EE34 // dA02 |
434 | .align 32 |
435 | .quad 0xBFD55555555553C5, 0xBFD55555555553C5, 0xBFD55555555553C5, 0xBFD55555555553C5 // dA01 |
436 | .align 32 |
437 | .quad 0x3FF0000000000000, 0x3FF0000000000000, 0x3FF0000000000000, 0x3FF0000000000000 // dA00 |
438 | .align 32 |
439 | .quad 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000 // dSIGN_MASK |
440 | .align 32 |
441 | .long 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000 // iCHK_WORK_SUB |
442 | .align 32 |
443 | .long 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000 // iCHK_WORK_CMP |
444 | .align 32 |
445 | .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff // dABS_MASK |
446 | .align 32 |
447 | .quad 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 // dZERO |
448 | .align 32 |
449 | .type __svml_datan2_data_internal, @object |
450 | .size __svml_datan2_data_internal, .-__svml_datan2_data_internal |
451 | |