1/* Function atan2f vectorized with AVX-512.
2 Copyright (C) 2021-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 https://www.gnu.org/licenses/. */
18
19/*
20 * ALGORITHM DESCRIPTION:
21 * For 0.0 <= x <= 7.0/16.0: atan(x) = atan(0.0) + atan(s), where s=(x-0.0)/(1.0+0.0*x)
22 * For 7.0/16.0 <= x <= 11.0/16.0: atan(x) = atan(0.5) + atan(s), where s=(x-0.5)/(1.0+0.5*x)
23 * For 11.0/16.0 <= x <= 19.0/16.0: atan(x) = atan(1.0) + atan(s), where s=(x-1.0)/(1.0+1.0*x)
24 * For 19.0/16.0 <= x <= 39.0/16.0: atan(x) = atan(1.5) + atan(s), where s=(x-1.5)/(1.0+1.5*x)
25 * For 39.0/16.0 <= x <= inf : atan(x) = atan(inf) + atan(s), where s=-1.0/x
26 * Where atan(s) ~= s+s^3*Poly11(s^2) on interval |s|<7.0/0.16.
27 *
28 *
29 */
30
31/* Offsets for data table __svml_satan2_data_internal
32 */
33#define sZERO 0
34#define sONE 64
35#define sSIGN_MASK 128
36#define sABS_MASK 192
37#define sPIO2 256
38#define sPI 320
39#define sPC8 384
40#define sPC7 448
41#define sPC6 512
42#define sPC5 576
43#define sPC4 640
44#define sPC3 704
45#define sPC2 768
46#define sPC1 832
47#define sPC0 896
48#define iCHK_WORK_SUB 960
49#define iCHK_WORK_CMP 1024
50
51#include <sysdep.h>
52
53 .section .text.evex512, "ax", @progbits
54ENTRY(_ZGVeN16vv_atan2f_skx)
55 pushq %rbp
56 cfi_def_cfa_offset(16)
57 movq %rsp, %rbp
58 cfi_def_cfa(6, 16)
59 cfi_offset(6, -16)
60 andq $-64, %rsp
61 subq $256, %rsp
62 xorl %edx, %edx
63
64 /*
65 * #define NO_VECTOR_ZERO_ATAN2_ARGS
66 * Declarations
67 * Variables
68 * Constants
69 * The end of declarations
70 * Implementation
71 * Arguments signs
72 */
73 vmovups sABS_MASK+__svml_satan2_data_internal(%rip), %zmm6
74 vmovups sONE+__svml_satan2_data_internal(%rip), %zmm3
75
76 /* Testing on working interval. */
77 vmovups iCHK_WORK_SUB+__svml_satan2_data_internal(%rip), %zmm9
78 vmovups iCHK_WORK_CMP+__svml_satan2_data_internal(%rip), %zmm14
79
80 /*
81 * 1) If y<x then a= y, b=x, PIO2=0
82 * 2) If y>x then a=-x, b=y, PIO2=Pi/2
83 */
84 vmovups sPIO2+__svml_satan2_data_internal(%rip), %zmm4
85 vpternlogd $255, %zmm13, %zmm13, %zmm13
86 vmovaps %zmm1, %zmm8
87 vandps %zmm6, %zmm8, %zmm2
88 vandps %zmm6, %zmm0, %zmm1
89 vorps sSIGN_MASK+__svml_satan2_data_internal(%rip), %zmm2, %zmm5
90 vpsubd %zmm9, %zmm2, %zmm10
91 vpsubd %zmm9, %zmm1, %zmm12
92 vxorps %zmm2, %zmm8, %zmm7
93 vxorps %zmm1, %zmm0, %zmm6
94 vcmpps $17, {sae}, %zmm2, %zmm1, %k1
95 vpcmpgtd %zmm10, %zmm14, %k2
96 vpcmpgtd %zmm12, %zmm14, %k3
97 vmovups sPC6+__svml_satan2_data_internal(%rip), %zmm14
98 vblendmps %zmm1, %zmm5, %zmm11{%k1}
99 vblendmps %zmm2, %zmm1, %zmm5{%k1}
100 vxorps %zmm4, %zmm4, %zmm4{%k1}
101
102 /*
103 * Division a/b.
104 * Enabled when FMA is available and
105 * performance is better with NR iteration
106 */
107 vrcp14ps %zmm5, %zmm15
108 vfnmadd231ps {rn-sae}, %zmm5, %zmm15, %zmm3
109 vfmadd213ps {rn-sae}, %zmm15, %zmm3, %zmm15
110 vmulps {rn-sae}, %zmm15, %zmm11, %zmm3
111 vfnmadd231ps {rn-sae}, %zmm5, %zmm3, %zmm11
112 vfmadd213ps {rn-sae}, %zmm3, %zmm11, %zmm15
113 vmovups sPC8+__svml_satan2_data_internal(%rip), %zmm11
114 vpternlogd $255, %zmm3, %zmm3, %zmm3
115
116 /* Polynomial. */
117 vmulps {rn-sae}, %zmm15, %zmm15, %zmm9
118 vpandnd %zmm10, %zmm10, %zmm13{%k2}
119 vmulps {rn-sae}, %zmm9, %zmm9, %zmm10
120 vfmadd231ps {rn-sae}, %zmm10, %zmm11, %zmm14
121 vmovups sPC5+__svml_satan2_data_internal(%rip), %zmm11
122 vpandnd %zmm12, %zmm12, %zmm3{%k3}
123 vpord %zmm3, %zmm13, %zmm3
124 vmovups sPC4+__svml_satan2_data_internal(%rip), %zmm13
125 vmovups sPC7+__svml_satan2_data_internal(%rip), %zmm12
126 vptestmd %zmm3, %zmm3, %k0
127 vfmadd213ps {rn-sae}, %zmm13, %zmm10, %zmm14
128 vfmadd231ps {rn-sae}, %zmm10, %zmm12, %zmm11
129 vmovups sPC3+__svml_satan2_data_internal(%rip), %zmm12
130 vmovups sPC2+__svml_satan2_data_internal(%rip), %zmm13
131
132 /* Special branch for fast (vector) processing of zero arguments */
133 kortestw %k0, %k0
134 vfmadd213ps {rn-sae}, %zmm12, %zmm10, %zmm11
135 vmovups sPC1+__svml_satan2_data_internal(%rip), %zmm12
136 vfmadd213ps {rn-sae}, %zmm13, %zmm10, %zmm14
137 vmovups sPC0+__svml_satan2_data_internal(%rip), %zmm13
138 vfmadd213ps {rn-sae}, %zmm12, %zmm10, %zmm11
139 vfmadd213ps {rn-sae}, %zmm13, %zmm10, %zmm14
140 vfmadd213ps {rn-sae}, %zmm14, %zmm9, %zmm11
141
142 /* Reconstruction. */
143 vfmadd213ps {rn-sae}, %zmm4, %zmm15, %zmm11
144
145 /* if x<0, sPI = Pi, else sPI =0 */
146 vmovups __svml_satan2_data_internal(%rip), %zmm15
147 vorps %zmm7, %zmm11, %zmm9
148 vcmpps $18, {sae}, %zmm15, %zmm8, %k4
149 vmovups sPI+__svml_satan2_data_internal(%rip), %zmm11
150 vaddps {rn-sae}, %zmm11, %zmm9, %zmm9{%k4}
151 vorps %zmm6, %zmm9, %zmm10
152
153 /* Go to auxiliary branch */
154 jne L(AUX_BRANCH)
155 # LOE rbx r12 r13 r14 r15 edx zmm0 zmm1 zmm2 zmm3 zmm4 zmm5 zmm6 zmm7 zmm8 zmm10 zmm11
156
157 /* Return from auxiliary branch
158 * for out of main path inputs
159 */
160
161L(AUX_BRANCH_RETURN):
162 /*
163 * Special branch for fast (vector) processing of zero arguments
164 * The end of implementation
165 */
166 testl %edx, %edx
167
168 /* Go to special inputs processing branch */
169 jne L(SPECIAL_VALUES_BRANCH)
170 # LOE rbx r12 r13 r14 r15 edx zmm0 zmm8 zmm10
171
172 /* Restore registers
173 * and exit the function
174 */
175
176L(EXIT):
177 vmovaps %zmm10, %zmm0
178 movq %rbp, %rsp
179 popq %rbp
180 cfi_def_cfa(7, 8)
181 cfi_restore(6)
182 ret
183 cfi_def_cfa(6, 16)
184 cfi_offset(6, -16)
185
186 /* Branch to process
187 * special inputs
188 */
189
190L(SPECIAL_VALUES_BRANCH):
191 vmovups %zmm0, 64(%rsp)
192 vmovups %zmm8, 128(%rsp)
193 vmovups %zmm10, 192(%rsp)
194 # LOE rbx r12 r13 r14 r15 edx zmm10
195
196 xorl %eax, %eax
197 # LOE rbx r12 r13 r14 r15 eax edx
198
199 vzeroupper
200 movq %r12, 16(%rsp)
201 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -240; DW_OP_plus) */
202 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x10, 0xff, 0xff, 0xff, 0x22
203 movl %eax, %r12d
204 movq %r13, 8(%rsp)
205 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -248; DW_OP_plus) */
206 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x08, 0xff, 0xff, 0xff, 0x22
207 movl %edx, %r13d
208 movq %r14, (%rsp)
209 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -256; DW_OP_plus) */
210 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x00, 0xff, 0xff, 0xff, 0x22
211 # LOE rbx r15 r12d r13d
212
213 /* Range mask
214 * bits check
215 */
216
217L(RANGEMASK_CHECK):
218 btl %r12d, %r13d
219
220 /* Call scalar math function */
221 jc L(SCALAR_MATH_CALL)
222 # LOE rbx r15 r12d r13d
223
224 /* Special inputs
225 * processing loop
226 */
227
228L(SPECIAL_VALUES_LOOP):
229 incl %r12d
230 cmpl $16, %r12d
231
232 /* Check bits in range mask */
233 jl L(RANGEMASK_CHECK)
234 # LOE rbx r15 r12d r13d
235
236 movq 16(%rsp), %r12
237 cfi_restore(12)
238 movq 8(%rsp), %r13
239 cfi_restore(13)
240 movq (%rsp), %r14
241 cfi_restore(14)
242 vmovups 192(%rsp), %zmm10
243
244 /* Go to exit */
245 jmp L(EXIT)
246 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -240; DW_OP_plus) */
247 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x10, 0xff, 0xff, 0xff, 0x22
248 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -248; DW_OP_plus) */
249 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x08, 0xff, 0xff, 0xff, 0x22
250 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -256; DW_OP_plus) */
251 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x00, 0xff, 0xff, 0xff, 0x22
252 # LOE rbx r12 r13 r14 r15 zmm10
253
254 /* Scalar math function call
255 * to process special input
256 */
257
258L(SCALAR_MATH_CALL):
259 movl %r12d, %r14d
260 vmovss 64(%rsp, %r14, 4), %xmm0
261 vmovss 128(%rsp, %r14, 4), %xmm1
262 call atan2f@PLT
263 # LOE rbx r14 r15 r12d r13d xmm0
264
265 vmovss %xmm0, 192(%rsp, %r14, 4)
266
267 /* Process special inputs in loop */
268 jmp L(SPECIAL_VALUES_LOOP)
269 cfi_restore(12)
270 cfi_restore(13)
271 cfi_restore(14)
272 # LOE rbx r15 r12d r13d
273
274 /* Auxiliary branch
275 * for out of main path inputs
276 */
277
278L(AUX_BRANCH):
279 /* Check if at least on of Y or Y is zero: iAXAYZERO */
280 vmovups __svml_satan2_data_internal(%rip), %zmm9
281
282 /* Check if both X & Y are not NaNs: iXYnotNAN */
283 vcmpps $3, {sae}, %zmm8, %zmm8, %k1
284 vcmpps $3, {sae}, %zmm0, %zmm0, %k2
285 vpcmpd $4, %zmm9, %zmm2, %k3
286 vpcmpd $4, %zmm9, %zmm1, %k4
287
288 /*
289 * Path for zero arguments (at least one of both)
290 * Check if both args are zeros (den. is zero)
291 */
292 vcmpps $4, {sae}, %zmm9, %zmm5, %k5
293
294 /* Res = sign(Y)*(X<0)?(PIO2+PI):PIO2 */
295 vpcmpgtd %zmm8, %zmm9, %k6
296 vpternlogd $255, %zmm14, %zmm14, %zmm14
297 vpternlogd $255, %zmm12, %zmm12, %zmm12
298 vpternlogd $255, %zmm13, %zmm13, %zmm13
299 vpandnd %zmm2, %zmm2, %zmm14{%k3}
300 vpternlogd $255, %zmm2, %zmm2, %zmm2
301 vpandnd %zmm1, %zmm1, %zmm2{%k4}
302 vpord %zmm2, %zmm14, %zmm15
303 vpternlogd $255, %zmm2, %zmm2, %zmm2
304 vpandnd %zmm5, %zmm5, %zmm2{%k5}
305
306 /* Set sPIO2 to zero if den. is zero */
307 vpandnd %zmm4, %zmm2, %zmm4
308 vpandd %zmm2, %zmm9, %zmm5
309 vpord %zmm5, %zmm4, %zmm2
310 vorps %zmm7, %zmm2, %zmm7
311 vaddps {rn-sae}, %zmm11, %zmm7, %zmm7{%k6}
312 vorps %zmm6, %zmm7, %zmm6
313 vpandnd %zmm8, %zmm8, %zmm12{%k1}
314 vpandnd %zmm0, %zmm0, %zmm13{%k2}
315 vandps %zmm13, %zmm12, %zmm12
316
317 /* Check if at least on of Y or Y is zero and not NaN: iAXAYZEROnotNAN */
318 vpandd %zmm12, %zmm15, %zmm1
319
320 /* Exclude from previous callout mask zero (and not NaN) arguments */
321 vpandnd %zmm3, %zmm1, %zmm3
322
323 /* Go to callout */
324 vptestmd %zmm3, %zmm3, %k0
325 kmovw %k0, %edx
326
327 /* Merge results from main and spec path */
328 vpandnd %zmm10, %zmm1, %zmm10
329 vpandd %zmm1, %zmm6, %zmm11
330 vpord %zmm11, %zmm10, %zmm10
331
332 /* Return to main vector processing path */
333 jmp L(AUX_BRANCH_RETURN)
334 # LOE rbx r12 r13 r14 r15 edx zmm0 zmm8 zmm10
335END(_ZGVeN16vv_atan2f_skx)
336
337 .section .rodata, "a"
338 .align 64
339
340#ifdef __svml_satan2_data_internal_typedef
341typedef unsigned int VUINT32;
342typedef struct {
343 __declspec(align(64)) VUINT32 sZERO[16][1];
344 __declspec(align(64)) VUINT32 sONE[16][1];
345 __declspec(align(64)) VUINT32 sSIGN_MASK[16][1];
346 __declspec(align(64)) VUINT32 sABS_MASK[16][1];
347 __declspec(align(64)) VUINT32 sPIO2[16][1];
348 __declspec(align(64)) VUINT32 sPI[16][1];
349 __declspec(align(64)) VUINT32 sPC8[16][1];
350 __declspec(align(64)) VUINT32 sPC7[16][1];
351 __declspec(align(64)) VUINT32 sPC6[16][1];
352 __declspec(align(64)) VUINT32 sPC5[16][1];
353 __declspec(align(64)) VUINT32 sPC4[16][1];
354 __declspec(align(64)) VUINT32 sPC3[16][1];
355 __declspec(align(64)) VUINT32 sPC2[16][1];
356 __declspec(align(64)) VUINT32 sPC1[16][1];
357 __declspec(align(64)) VUINT32 sPC0[16][1];
358 __declspec(align(64)) VUINT32 iCHK_WORK_SUB[16][1];
359 __declspec(align(64)) VUINT32 iCHK_WORK_CMP[16][1];
360} __svml_satan2_data_internal;
361#endif
362__svml_satan2_data_internal:
363 .long 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 // sZERO
364 .align 64
365 .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 // sONE
366 .align 64
367 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000 // sSIGN_MASK
368 .align 64
369 .long 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF // sABS_MASK
370 .align 64
371 .long 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB // sPIO2
372 .align 64
373 .long 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB // sPI
374 .align 64
375 .long 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0 // sA08
376 .align 64
377 .long 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631 // sA07
378 .align 64
379 .long 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384 // sA06
380 .align 64
381 .long 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629 // sA05
382 .align 64
383 .long 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474 // sA04
384 .align 64
385 .long 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8 // sA03
386 .align 64
387 .long 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F // sA02
388 .align 64
389 .long 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49 // sA01
390 .align 64
391 .long 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000 // sA00
392 .align 64
393 .long 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000 // iCHK_WORK_SUB
394 .align 64
395 .long 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000 // iCHK_WORK_CMP
396 .align 64
397 .type __svml_satan2_data_internal, @object
398 .size __svml_satan2_data_internal, .-__svml_satan2_data_internal
399

source code of glibc/sysdeps/x86_64/fpu/multiarch/svml_s_atan2f16_core_avx512.S