1/* Function atan2 vectorized with AVX-512.
2 Copyright (C) 2021-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 https://www.gnu.org/licenses/. */
18
19/*
20 * ALGORITHM DESCRIPTION:
21 * For 0.0 <= x <= 7.0/16.0: atan(x) = atan(0.0) + atan(s), where s=(x-0.0)/(1.0+0.0*x)
22 * For 7.0/16.0 <= x <= 11.0/16.0: atan(x) = atan(0.5) + atan(s), where s=(x-0.5)/(1.0+0.5*x)
23 * For 11.0/16.0 <= x <= 19.0/16.0: atan(x) = atan(1.0) + atan(s), where s=(x-1.0)/(1.0+1.0*x)
24 * For 19.0/16.0 <= x <= 39.0/16.0: atan(x) = atan(1.5) + atan(s), where s=(x-1.5)/(1.0+1.5*x)
25 * For 39.0/16.0 <= x <= inf : atan(x) = atan(inf) + atan(s), where s=-1.0/x
26 * Where atan(s) ~= s+s^3*Poly11(s^2) on interval |s|<7.0/0.16.
27 *
28 *
29 */
30
31/* Offsets for data table __svml_datan2_data_internal
32 */
33#define dPI 0
34#define dPIO2 64
35#define dA19 128
36#define dA18 192
37#define dA17 256
38#define dA16 320
39#define dA15 384
40#define dA14 448
41#define dA13 512
42#define dA12 576
43#define dA11 640
44#define dA10 704
45#define dA09 768
46#define dA08 832
47#define dA07 896
48#define dA06 960
49#define dA05 1024
50#define dA04 1088
51#define dA03 1152
52#define dA02 1216
53#define dA01 1280
54#define dA00 1344
55#define dSIGN_MASK 1408
56#define iCHK_WORK_SUB 1472
57#define iCHK_WORK_CMP 1536
58#define dABS_MASK 1600
59#define dZERO 1664
60
61#include <sysdep.h>
62
63 .section .text.evex512, "ax", @progbits
64ENTRY(_ZGVeN8vv_atan2_skx)
65 pushq %rbp
66 cfi_def_cfa_offset(16)
67 movq %rsp, %rbp
68 cfi_def_cfa(6, 16)
69 cfi_offset(6, -16)
70 andq $-64, %rsp
71 subq $256, %rsp
72 xorl %edx, %edx
73
74 /*
75 * #define NO_VECTOR_ZERO_ATAN2_ARGS
76 * Declarations
77 * Variables
78 * Constants
79 * The end of declarations
80 * Implementation
81 * Get r0~=1/B
82 * Cannot be replaced by VQRCP(D, dR0, dB);
83 * Argument Absolute values
84 */
85 vmovups dABS_MASK+__svml_datan2_data_internal(%rip), %zmm4
86
87 /* Argument signs */
88 vmovups dSIGN_MASK+__svml_datan2_data_internal(%rip), %zmm6
89
90 /*
91 * 1) If y<x then a= y, b=x, PIO2=0
92 * 2) If y>x then a=-x, b=y, PIO2=Pi/2
93 */
94 vmovups dPIO2+__svml_datan2_data_internal(%rip), %zmm3
95 vandpd %zmm4, %zmm0, %zmm11
96 vmovaps %zmm1, %zmm7
97 vandpd %zmm4, %zmm7, %zmm2
98 vandpd %zmm6, %zmm7, %zmm5
99 vandpd %zmm6, %zmm0, %zmm4
100 vorpd %zmm6, %zmm2, %zmm12
101 vcmppd $17, {sae}, %zmm2, %zmm11, %k1
102 vmovdqu iCHK_WORK_CMP+__svml_datan2_data_internal(%rip), %ymm6
103 vmovups %zmm11, 64(%rsp)
104
105 /* Check if y and x are on main path. */
106 vpsrlq $32, %zmm2, %zmm9
107 vblendmpd %zmm11, %zmm12, %zmm13{%k1}
108 vblendmpd %zmm2, %zmm11, %zmm15{%k1}
109 vpsrlq $32, %zmm11, %zmm8
110 vmovdqu iCHK_WORK_SUB+__svml_datan2_data_internal(%rip), %ymm12
111 vdivpd {rn-sae}, %zmm15, %zmm13, %zmm1
112 vmovups %zmm15, (%rsp)
113 vpmovqd %zmm9, %ymm14
114 vpmovqd %zmm8, %ymm10
115 vxorpd %zmm3, %zmm3, %zmm3{%k1}
116 vpsubd %ymm12, %ymm14, %ymm13
117 vpsubd %ymm12, %ymm10, %ymm9
118
119 /* Polynomial. */
120 vmulpd {rn-sae}, %zmm1, %zmm1, %zmm12
121 vpcmpgtd %ymm6, %ymm13, %ymm15
122 vpcmpeqd %ymm6, %ymm13, %ymm11
123 vmulpd {rn-sae}, %zmm12, %zmm12, %zmm13
124 vpor %ymm11, %ymm15, %ymm8
125 vmovups dA19+__svml_datan2_data_internal(%rip), %zmm11
126 vmovups dA15+__svml_datan2_data_internal(%rip), %zmm15
127 vpcmpgtd %ymm6, %ymm9, %ymm14
128 vpcmpeqd %ymm6, %ymm9, %ymm6
129 vpor %ymm6, %ymm14, %ymm10
130 vmulpd {rn-sae}, %zmm13, %zmm13, %zmm14
131 vmovups dA18+__svml_datan2_data_internal(%rip), %zmm9
132 vpor %ymm10, %ymm8, %ymm6
133 vmovups dA17+__svml_datan2_data_internal(%rip), %zmm10
134 vfmadd231pd {rn-sae}, %zmm14, %zmm11, %zmm15
135 vmovups dA14+__svml_datan2_data_internal(%rip), %zmm11
136 vmovups dA12+__svml_datan2_data_internal(%rip), %zmm8
137 vfmadd231pd {rn-sae}, %zmm14, %zmm9, %zmm11
138 vmovups dA13+__svml_datan2_data_internal(%rip), %zmm9
139 vfmadd231pd {rn-sae}, %zmm14, %zmm10, %zmm9
140 vmovups dA16+__svml_datan2_data_internal(%rip), %zmm10
141 vfmadd231pd {rn-sae}, %zmm14, %zmm10, %zmm8
142 vmovups dA11+__svml_datan2_data_internal(%rip), %zmm10
143 vfmadd213pd {rn-sae}, %zmm10, %zmm14, %zmm15
144 vmovups dA10+__svml_datan2_data_internal(%rip), %zmm10
145 vfmadd213pd {rn-sae}, %zmm10, %zmm14, %zmm11
146 vmovups dA09+__svml_datan2_data_internal(%rip), %zmm10
147 vfmadd213pd {rn-sae}, %zmm10, %zmm14, %zmm9
148 vmovups dA08+__svml_datan2_data_internal(%rip), %zmm10
149 vfmadd213pd {rn-sae}, %zmm10, %zmm14, %zmm8
150 vmovups dA07+__svml_datan2_data_internal(%rip), %zmm10
151 vfmadd213pd {rn-sae}, %zmm10, %zmm14, %zmm15
152 vmovups dA06+__svml_datan2_data_internal(%rip), %zmm10
153 vfmadd213pd {rn-sae}, %zmm10, %zmm14, %zmm11
154 vmovups dA05+__svml_datan2_data_internal(%rip), %zmm10
155 vfmadd213pd {rn-sae}, %zmm10, %zmm14, %zmm9
156 vmovups dA04+__svml_datan2_data_internal(%rip), %zmm10
157 vfmadd213pd {rn-sae}, %zmm10, %zmm14, %zmm8
158 vmovups dA03+__svml_datan2_data_internal(%rip), %zmm10
159
160 /* A00=1.0, account for it later VQFMA(D, dP4, dP4, dR8, dA00); */
161 vmulpd {rn-sae}, %zmm14, %zmm8, %zmm8
162 vfmadd213pd {rn-sae}, %zmm10, %zmm14, %zmm15
163 vmovups dA02+__svml_datan2_data_internal(%rip), %zmm10
164 vfmadd213pd {rn-sae}, %zmm10, %zmm14, %zmm11
165 vmovups dA01+__svml_datan2_data_internal(%rip), %zmm10
166 vfmadd213pd {rn-sae}, %zmm11, %zmm12, %zmm15
167 vfmadd213pd {rn-sae}, %zmm10, %zmm14, %zmm9
168 vfmadd213pd {rn-sae}, %zmm8, %zmm12, %zmm9
169 vmovups __svml_datan2_data_internal(%rip), %zmm8
170 vfmadd213pd {rn-sae}, %zmm9, %zmm13, %zmm15
171
172 /*
173 * Reconstruction.
174 * dP=(R+R*dP) + dPIO2
175 */
176 vfmadd213pd {rn-sae}, %zmm1, %zmm1, %zmm15
177 vaddpd {rn-sae}, %zmm3, %zmm15, %zmm1
178 vorpd %zmm5, %zmm1, %zmm9
179
180 /* if x<0, dPI = Pi, else dPI =0 */
181 vmovups dZERO+__svml_datan2_data_internal(%rip), %zmm1
182 vcmppd $18, {sae}, %zmm1, %zmm7, %k2
183 vaddpd {rn-sae}, %zmm8, %zmm9, %zmm9{%k2}
184 vmovmskps %ymm6, %eax
185 vorpd %zmm4, %zmm9, %zmm11
186
187 /* Special branch for fast (vector) processing of zero arguments */
188 vmovups 64(%rsp), %zmm9
189 testl %eax, %eax
190
191 /* Go to auxiliary branch */
192 jne L(AUX_BRANCH)
193 # LOE rbx r12 r13 r14 r15 edx ymm6 zmm0 zmm2 zmm3 zmm4 zmm5 zmm7 zmm9 zmm11
194
195 /* Return from auxiliary branch
196 * for out of main path inputs
197 */
198
199L(AUX_BRANCH_RETURN):
200 /*
201 * Special branch for fast (vector) processing of zero arguments
202 * The end of implementation
203 */
204 testl %edx, %edx
205
206 /* Go to special inputs processing branch */
207 jne L(SPECIAL_VALUES_BRANCH)
208 # LOE rbx r12 r13 r14 r15 edx zmm0 zmm7 zmm11
209
210 /* Restore registers
211 * and exit the function
212 */
213
214L(EXIT):
215 vmovaps %zmm11, %zmm0
216 movq %rbp, %rsp
217 popq %rbp
218 cfi_def_cfa(7, 8)
219 cfi_restore(6)
220 ret
221 cfi_def_cfa(6, 16)
222 cfi_offset(6, -16)
223
224 /* Branch to process
225 * special inputs
226 */
227
228L(SPECIAL_VALUES_BRANCH):
229 vmovups %zmm0, 64(%rsp)
230 vmovups %zmm7, 128(%rsp)
231 vmovups %zmm11, 192(%rsp)
232 # LOE rbx r12 r13 r14 r15 edx zmm11
233
234 xorl %eax, %eax
235 # LOE rbx r12 r13 r14 r15 eax edx
236
237 vzeroupper
238 movq %r12, 16(%rsp)
239 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -240; DW_OP_plus) */
240 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x10, 0xff, 0xff, 0xff, 0x22
241 movl %eax, %r12d
242 movq %r13, 8(%rsp)
243 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -248; DW_OP_plus) */
244 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x08, 0xff, 0xff, 0xff, 0x22
245 movl %edx, %r13d
246 movq %r14, (%rsp)
247 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -256; DW_OP_plus) */
248 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x00, 0xff, 0xff, 0xff, 0x22
249 # LOE rbx r15 r12d r13d
250
251 /* Range mask
252 * bits check
253 */
254
255L(RANGEMASK_CHECK):
256 btl %r12d, %r13d
257
258 /* Call scalar math function */
259 jc L(SCALAR_MATH_CALL)
260 # LOE rbx r15 r12d r13d
261
262 /* Special inputs
263 * processing loop
264 */
265
266L(SPECIAL_VALUES_LOOP):
267 incl %r12d
268 cmpl $8, %r12d
269
270 /* Check bits in range mask */
271 jl L(RANGEMASK_CHECK)
272 # LOE rbx r15 r12d r13d
273
274 movq 16(%rsp), %r12
275 cfi_restore(12)
276 movq 8(%rsp), %r13
277 cfi_restore(13)
278 movq (%rsp), %r14
279 cfi_restore(14)
280 vmovups 192(%rsp), %zmm11
281
282 /* Go to exit */
283 jmp L(EXIT)
284 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -240; DW_OP_plus) */
285 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x10, 0xff, 0xff, 0xff, 0x22
286 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -248; DW_OP_plus) */
287 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x08, 0xff, 0xff, 0xff, 0x22
288 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -256; DW_OP_plus) */
289 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x00, 0xff, 0xff, 0xff, 0x22
290 # LOE rbx r12 r13 r14 r15 zmm11
291
292 /* Scalar math function call
293 * to process special input
294 */
295
296L(SCALAR_MATH_CALL):
297 movl %r12d, %r14d
298 vmovsd 64(%rsp, %r14, 8), %xmm0
299 vmovsd 128(%rsp, %r14, 8), %xmm1
300 call atan2@PLT
301 # LOE rbx r14 r15 r12d r13d xmm0
302
303 vmovsd %xmm0, 192(%rsp, %r14, 8)
304
305 /* Process special inputs in loop */
306 jmp L(SPECIAL_VALUES_LOOP)
307 cfi_restore(12)
308 cfi_restore(13)
309 cfi_restore(14)
310 # LOE rbx r15 r12d r13d
311
312 /* Auxiliary branch
313 * for out of main path inputs
314 */
315
316L(AUX_BRANCH):
317 /* Check if at least on of Y or Y is zero: iAXAYZERO */
318 vmovups dZERO+__svml_datan2_data_internal(%rip), %zmm8
319
320 /* Check if both X & Y are not NaNs: iXYnotNAN */
321 vcmppd $3, {sae}, %zmm7, %zmm7, %k1
322 vcmppd $3, {sae}, %zmm0, %zmm0, %k2
323 vcmppd $4, {sae}, %zmm8, %zmm2, %k3
324 vcmppd $4, {sae}, %zmm8, %zmm9, %k4
325
326 /* Res = sign(Y)*(X<0)?(PIO2+PI):PIO2 */
327 vpcmpgtq %zmm7, %zmm8, %k6
328 vpternlogd $0xff, %zmm1, %zmm1, %zmm10
329 vmovaps %zmm10, %zmm15
330 vmovaps %zmm10, %zmm12
331 vmovaps %zmm10, %zmm13
332 vpandnq %zmm2, %zmm2, %zmm15{%k3}
333 vmovaps %zmm10, %zmm2
334 vpandnq %zmm7, %zmm7, %zmm12{%k1}
335 vpandnq %zmm0, %zmm0, %zmm13{%k2}
336 vpandnq %zmm9, %zmm9, %zmm2{%k4}
337 vandpd %zmm13, %zmm12, %zmm14
338 vorpd %zmm2, %zmm15, %zmm9
339 vpsrlq $32, %zmm14, %zmm1
340 vpsrlq $32, %zmm9, %zmm2
341 vpmovqd %zmm1, %ymm1
342 vpmovqd %zmm2, %ymm9
343
344 /* Check if at least on of Y or Y is zero and not NaN: iAXAYZEROnotNAN */
345 vpand %ymm1, %ymm9, %ymm2
346
347 /*
348 * Path for zero arguments (at least one of both)
349 * Check if both args are zeros (den. is zero)
350 */
351 vmovups (%rsp), %zmm1
352
353 /* Exclude from previous callout mask zero (and not NaN) arguments */
354 vpandn %ymm6, %ymm2, %ymm6
355 vcmppd $4, {sae}, %zmm8, %zmm1, %k5
356
357 /* Go to callout */
358 vmovmskps %ymm6, %edx
359 vpandnq %zmm1, %zmm1, %zmm10{%k5}
360
361 /* Set sPIO2 to zero if den. is zero */
362 vpandnq %zmm3, %zmm10, %zmm3
363 vpandq %zmm10, %zmm8, %zmm1
364 vporq %zmm1, %zmm3, %zmm3
365 vorpd %zmm5, %zmm3, %zmm1
366 vmovups __svml_datan2_data_internal(%rip), %zmm5
367 vaddpd {rn-sae}, %zmm5, %zmm1, %zmm1{%k6}
368 vorpd %zmm4, %zmm1, %zmm1
369
370 /* Merge results from main and spec path */
371 vpmovzxdq %ymm2, %zmm4
372 vpsllq $32, %zmm4, %zmm2
373 vpord %zmm2, %zmm4, %zmm3
374 vpandnq %zmm11, %zmm3, %zmm11
375 vpandq %zmm3, %zmm1, %zmm1
376 vporq %zmm1, %zmm11, %zmm11
377
378 /* Return to main vector processing path */
379 jmp L(AUX_BRANCH_RETURN)
380 # LOE rbx r12 r13 r14 r15 edx zmm0 zmm7 zmm11
381END(_ZGVeN8vv_atan2_skx)
382
383 .section .rodata, "a"
384 .align 64
385
386#ifdef __svml_datan2_data_internal_typedef
387typedef unsigned int VUINT32;
388typedef struct {
389 __declspec(align(64)) VUINT32 dPI[8][2];
390 __declspec(align(64)) VUINT32 dPIO2[8][2];
391 __declspec(align(64)) VUINT32 dA19[8][2];
392 __declspec(align(64)) VUINT32 dA18[8][2];
393 __declspec(align(64)) VUINT32 dA17[8][2];
394 __declspec(align(64)) VUINT32 dA16[8][2];
395 __declspec(align(64)) VUINT32 dA15[8][2];
396 __declspec(align(64)) VUINT32 dA14[8][2];
397 __declspec(align(64)) VUINT32 dA13[8][2];
398 __declspec(align(64)) VUINT32 dA12[8][2];
399 __declspec(align(64)) VUINT32 dA11[8][2];
400 __declspec(align(64)) VUINT32 dA10[8][2];
401 __declspec(align(64)) VUINT32 dA09[8][2];
402 __declspec(align(64)) VUINT32 dA08[8][2];
403 __declspec(align(64)) VUINT32 dA07[8][2];
404 __declspec(align(64)) VUINT32 dA06[8][2];
405 __declspec(align(64)) VUINT32 dA05[8][2];
406 __declspec(align(64)) VUINT32 dA04[8][2];
407 __declspec(align(64)) VUINT32 dA03[8][2];
408 __declspec(align(64)) VUINT32 dA02[8][2];
409 __declspec(align(64)) VUINT32 dA01[8][2];
410 __declspec(align(64)) VUINT32 dA00[8][2];
411 __declspec(align(64)) VUINT32 dSIGN_MASK[8][2];
412 __declspec(align(64)) VUINT32 iCHK_WORK_SUB[16][1];
413 __declspec(align(64)) VUINT32 iCHK_WORK_CMP[16][1];
414 __declspec(align(64)) VUINT32 dABS_MASK[8][2];
415 __declspec(align(64)) VUINT32 dZERO[8][2];
416} __svml_datan2_data_internal;
417#endif
418__svml_datan2_data_internal:
419 .quad 0x400921FB54442D18, 0x400921FB54442D18, 0x400921FB54442D18, 0x400921FB54442D18, 0x400921FB54442D18, 0x400921FB54442D18, 0x400921FB54442D18, 0x400921FB54442D18 // dPI
420 .align 64
421 .quad 0x3FF921FB54442D18, 0x3FF921FB54442D18, 0x3FF921FB54442D18, 0x3FF921FB54442D18, 0x3FF921FB54442D18, 0x3FF921FB54442D18, 0x3FF921FB54442D18, 0x3FF921FB54442D18 // dPIO2
422 .align 64
423 .quad 0xBEF4FDB537ABC7A3, 0xBEF4FDB537ABC7A3, 0xBEF4FDB537ABC7A3, 0xBEF4FDB537ABC7A3, 0xBEF4FDB537ABC7A3, 0xBEF4FDB537ABC7A3, 0xBEF4FDB537ABC7A3, 0xBEF4FDB537ABC7A3 // dA19
424 .align 64
425 .quad 0x3F2CED0A36665209, 0x3F2CED0A36665209, 0x3F2CED0A36665209, 0x3F2CED0A36665209, 0x3F2CED0A36665209, 0x3F2CED0A36665209, 0x3F2CED0A36665209, 0x3F2CED0A36665209 // dA18
426 .align 64
427 .quad 0xBF52E67C93954C23, 0xBF52E67C93954C23, 0xBF52E67C93954C23, 0xBF52E67C93954C23, 0xBF52E67C93954C23, 0xBF52E67C93954C23, 0xBF52E67C93954C23, 0xBF52E67C93954C23 // dA17
428 .align 64
429 .quad 0x3F6F5A1DAE82AFB3, 0x3F6F5A1DAE82AFB3, 0x3F6F5A1DAE82AFB3, 0x3F6F5A1DAE82AFB3, 0x3F6F5A1DAE82AFB3, 0x3F6F5A1DAE82AFB3, 0x3F6F5A1DAE82AFB3, 0x3F6F5A1DAE82AFB3 // dA16
430 .align 64
431 .quad 0xBF82B2EC618E4BAD, 0xBF82B2EC618E4BAD, 0xBF82B2EC618E4BAD, 0xBF82B2EC618E4BAD, 0xBF82B2EC618E4BAD, 0xBF82B2EC618E4BAD, 0xBF82B2EC618E4BAD, 0xBF82B2EC618E4BAD // dA15
432 .align 64
433 .quad 0x3F914F4C661116A5, 0x3F914F4C661116A5, 0x3F914F4C661116A5, 0x3F914F4C661116A5, 0x3F914F4C661116A5, 0x3F914F4C661116A5, 0x3F914F4C661116A5, 0x3F914F4C661116A5 // dA14
434 .align 64
435 .quad 0xBF9A5E83B081F69C, 0xBF9A5E83B081F69C, 0xBF9A5E83B081F69C, 0xBF9A5E83B081F69C, 0xBF9A5E83B081F69C, 0xBF9A5E83B081F69C, 0xBF9A5E83B081F69C, 0xBF9A5E83B081F69C // dA13
436 .align 64
437 .quad 0x3FA169980CB6AD4F, 0x3FA169980CB6AD4F, 0x3FA169980CB6AD4F, 0x3FA169980CB6AD4F, 0x3FA169980CB6AD4F, 0x3FA169980CB6AD4F, 0x3FA169980CB6AD4F, 0x3FA169980CB6AD4F // dA12
438 .align 64
439 .quad 0xBFA4EFA2E563C1BC, 0xBFA4EFA2E563C1BC, 0xBFA4EFA2E563C1BC, 0xBFA4EFA2E563C1BC, 0xBFA4EFA2E563C1BC, 0xBFA4EFA2E563C1BC, 0xBFA4EFA2E563C1BC, 0xBFA4EFA2E563C1BC // dA11
440 .align 64
441 .quad 0x3FA7EC0FBC50683B, 0x3FA7EC0FBC50683B, 0x3FA7EC0FBC50683B, 0x3FA7EC0FBC50683B, 0x3FA7EC0FBC50683B, 0x3FA7EC0FBC50683B, 0x3FA7EC0FBC50683B, 0x3FA7EC0FBC50683B // dA10
442 .align 64
443 .quad 0xBFAAD261EAA09954, 0xBFAAD261EAA09954, 0xBFAAD261EAA09954, 0xBFAAD261EAA09954, 0xBFAAD261EAA09954, 0xBFAAD261EAA09954, 0xBFAAD261EAA09954, 0xBFAAD261EAA09954 // dA09
444 .align 64
445 .quad 0x3FAE1749BD612DCF, 0x3FAE1749BD612DCF, 0x3FAE1749BD612DCF, 0x3FAE1749BD612DCF, 0x3FAE1749BD612DCF, 0x3FAE1749BD612DCF, 0x3FAE1749BD612DCF, 0x3FAE1749BD612DCF // dA08
446 .align 64
447 .quad 0xBFB11084009435E0, 0xBFB11084009435E0, 0xBFB11084009435E0, 0xBFB11084009435E0, 0xBFB11084009435E0, 0xBFB11084009435E0, 0xBFB11084009435E0, 0xBFB11084009435E0 // dA07
448 .align 64
449 .quad 0x3FB3B12A49295651, 0x3FB3B12A49295651, 0x3FB3B12A49295651, 0x3FB3B12A49295651, 0x3FB3B12A49295651, 0x3FB3B12A49295651, 0x3FB3B12A49295651, 0x3FB3B12A49295651 // dA06
450 .align 64
451 .quad 0xBFB745D009BADA94, 0xBFB745D009BADA94, 0xBFB745D009BADA94, 0xBFB745D009BADA94, 0xBFB745D009BADA94, 0xBFB745D009BADA94, 0xBFB745D009BADA94, 0xBFB745D009BADA94 // dA05
452 .align 64
453 .quad 0x3FBC71C707F7D5B5, 0x3FBC71C707F7D5B5, 0x3FBC71C707F7D5B5, 0x3FBC71C707F7D5B5, 0x3FBC71C707F7D5B5, 0x3FBC71C707F7D5B5, 0x3FBC71C707F7D5B5, 0x3FBC71C707F7D5B5 // dA04
454 .align 64
455 .quad 0xBFC2492491EE55C7, 0xBFC2492491EE55C7, 0xBFC2492491EE55C7, 0xBFC2492491EE55C7, 0xBFC2492491EE55C7, 0xBFC2492491EE55C7, 0xBFC2492491EE55C7, 0xBFC2492491EE55C7 // dA03
456 .align 64
457 .quad 0x3FC999999997EE34, 0x3FC999999997EE34, 0x3FC999999997EE34, 0x3FC999999997EE34, 0x3FC999999997EE34, 0x3FC999999997EE34, 0x3FC999999997EE34, 0x3FC999999997EE34 // dA02
458 .align 64
459 .quad 0xBFD55555555553C5, 0xBFD55555555553C5, 0xBFD55555555553C5, 0xBFD55555555553C5, 0xBFD55555555553C5, 0xBFD55555555553C5, 0xBFD55555555553C5, 0xBFD55555555553C5 // dA01
460 .align 64
461 .quad 0x3FF0000000000000, 0x3FF0000000000000, 0x3FF0000000000000, 0x3FF0000000000000, 0x3FF0000000000000, 0x3FF0000000000000, 0x3FF0000000000000, 0x3FF0000000000000 // dA00
462 .align 64
463 .quad 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000 // dSIGN_MASK
464 .align 64
465 .long 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000, 0x80300000 // iCHK_WORK_SUB
466 .align 64
467 .long 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000, 0xfdd00000 // iCHK_WORK_CMP
468 .align 64
469 .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff // dABS_MASK
470 .align 64
471 .quad 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 // dZERO
472 .align 64
473 .type __svml_datan2_data_internal, @object
474 .size __svml_datan2_data_internal, .-__svml_datan2_data_internal
475

source code of glibc/sysdeps/x86_64/fpu/multiarch/svml_d_atan28_core_avx512.S