1/* Function asinhf vectorized with AVX-512.
2 Copyright (C) 2021-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 https://www.gnu.org/licenses/. */
18
19/*
20 * ALGORITHM DESCRIPTION:
21 *
22 * Compute asinh(x) as log(x + sqrt(x*x + 1))
23 * using RSQRT instructions for starting the
24 * square root approximation, and small table lookups for log
25 * that map to AVX-512 permute instructions
26 *
27 * Special cases:
28 *
29 * asinh(NaN) = quiet NaN, and raise invalid exception
30 * asinh(INF) = that INF
31 * asinh(0) = that 0
32 *
33 */
34
35/* Offsets for data table __svml_sasinh_data_internal_avx512
36 */
37#define Log_tbl_H 0
38#define Log_tbl_L 128
39#define One 256
40#define AbsMask 320
41#define SmallThreshold 384
42#define Threshold 448
43#define LargeThreshold 512
44#define ca1 576
45#define c2s 640
46#define c1s 704
47#define AddB5 768
48#define RcpBitMask 832
49#define OneEighth 896
50#define Four 960
51#define poly_coeff3 1024
52#define poly_coeff2 1088
53#define poly_coeff1 1152
54#define L2H 1216
55#define L2L 1280
56
57#include <sysdep.h>
58
59 .section .text.evex512, "ax", @progbits
60ENTRY(_ZGVeN16v_asinhf_skx)
61 pushq %rbp
62 cfi_def_cfa_offset(16)
63 movq %rsp, %rbp
64 cfi_def_cfa(6, 16)
65 cfi_offset(6, -16)
66 andq $-64, %rsp
67 subq $192, %rsp
68 vmovaps %zmm0, %zmm10
69
70 /* x^2 */
71 vmulps {rn-sae}, %zmm10, %zmm10, %zmm0
72 vmovups One+__svml_sasinh_data_internal_avx512(%rip), %zmm2
73
74 /* polynomial computation for small inputs */
75 vmovups ca1+__svml_sasinh_data_internal_avx512(%rip), %zmm1
76
77 /* not a very small input ? */
78 vmovups SmallThreshold+__svml_sasinh_data_internal_avx512(%rip), %zmm11
79
80 /* 1+x^2 */
81 vaddps {rn-sae}, %zmm2, %zmm0, %zmm7
82
83 /* |input| */
84 vandps AbsMask+__svml_sasinh_data_internal_avx512(%rip), %zmm10, %zmm12
85
86 /* A=max(x^2, 1); */
87 vmaxps {sae}, %zmm0, %zmm2, %zmm14
88 vrsqrt14ps %zmm7, %zmm8
89
90 /* B=min(x^2, 1); */
91 vminps {sae}, %zmm0, %zmm2, %zmm15
92 vcmpps $21, {sae}, %zmm11, %zmm12, %k2
93
94 /* B_high */
95 vsubps {rn-sae}, %zmm14, %zmm7, %zmm9
96
97 /* sign bit */
98 vxorps %zmm10, %zmm12, %zmm13
99
100 /* Sh ~sqrt(1+x^2) */
101 vmulps {rn-sae}, %zmm8, %zmm7, %zmm6
102 vmovups LargeThreshold+__svml_sasinh_data_internal_avx512(%rip), %zmm14
103
104 /* B_low */
105 vsubps {rn-sae}, %zmm9, %zmm15, %zmm3
106
107 /* Sh+x */
108 vaddps {rn-sae}, %zmm12, %zmm6, %zmm15
109
110 /* (Yh*R0)_low */
111 vfmsub213ps {rn-sae}, %zmm6, %zmm8, %zmm7
112 vmulps {rn-sae}, %zmm1, %zmm0, %zmm9
113 vcmpps $22, {sae}, %zmm14, %zmm12, %k0
114 vmovups c1s+__svml_sasinh_data_internal_avx512(%rip), %zmm1
115
116 /* polynomial computation for small inputs */
117 vfmadd213ps {rn-sae}, %zmm12, %zmm12, %zmm9
118 kmovw %k0, %edx
119
120 /* (x^2)_low */
121 vmovaps %zmm10, %zmm4
122 vfmsub213ps {rn-sae}, %zmm0, %zmm10, %zmm4
123
124 /* Yl = (x^2)_low + B_low */
125 vaddps {rn-sae}, %zmm4, %zmm3, %zmm5
126
127 /* rel. error term: Eh=1-Sh*R0 */
128 vmovaps %zmm2, %zmm0
129 vfnmadd231ps {rn-sae}, %zmm6, %zmm8, %zmm0
130
131 /* Sl = (Yh*R0)_low+(R0*Yl) */
132 vfmadd213ps {rn-sae}, %zmm7, %zmm8, %zmm5
133
134 /* very large inputs ? */
135 vmovups Threshold+__svml_sasinh_data_internal_avx512(%rip), %zmm7
136
137 /* rel. error term: Eh=(1-Sh*R0)-Sl*R0 */
138 vfnmadd231ps {rn-sae}, %zmm5, %zmm8, %zmm0
139
140 /* sqrt(1+x^2) ~ Sh + Sl + Sh*Eh*poly_s */
141 vmovups c2s+__svml_sasinh_data_internal_avx512(%rip), %zmm8
142 vcmpps $21, {sae}, %zmm7, %zmm12, %k1
143
144 /* Sh*Eh */
145 vmulps {rn-sae}, %zmm0, %zmm6, %zmm4
146 vfmadd231ps {rn-sae}, %zmm0, %zmm8, %zmm1
147
148 /* Sl + Sh*Eh*poly_s */
149 vfmadd213ps {rn-sae}, %zmm5, %zmm1, %zmm4
150
151 /* Xh */
152 vsubps {rn-sae}, %zmm6, %zmm15, %zmm5
153
154 /* fixup for very large inputs */
155 vmovups OneEighth+__svml_sasinh_data_internal_avx512(%rip), %zmm6
156
157 /* Xin0+Sl+Sh*Eh*poly_s ~ x+sqrt(1+x^2) */
158 vaddps {rn-sae}, %zmm4, %zmm15, %zmm3
159
160 /* Xl */
161 vsubps {rn-sae}, %zmm5, %zmm12, %zmm5
162
163 /* Sl_high */
164 vsubps {rn-sae}, %zmm15, %zmm3, %zmm0
165 vmulps {rn-sae}, %zmm6, %zmm12, %zmm3{%k1}
166
167 /* -K*L2H + Th */
168 vmovups L2H+__svml_sasinh_data_internal_avx512(%rip), %zmm15
169
170 /* Sl_l */
171 vsubps {rn-sae}, %zmm0, %zmm4, %zmm1
172 vrcp14ps %zmm3, %zmm6
173
174 /* Table lookups */
175 vmovups __svml_sasinh_data_internal_avx512(%rip), %zmm0
176
177 /* Xin_low */
178 vaddps {rn-sae}, %zmm5, %zmm1, %zmm7
179
180 /* round reciprocal to 1+4b mantissas */
181 vpaddd AddB5+__svml_sasinh_data_internal_avx512(%rip), %zmm6, %zmm4
182 vmovups poly_coeff1+__svml_sasinh_data_internal_avx512(%rip), %zmm5
183 vandps RcpBitMask+__svml_sasinh_data_internal_avx512(%rip), %zmm4, %zmm8
184
185 /* fixup for very large inputs */
186 vxorps %zmm7, %zmm7, %zmm7{%k1}
187
188 /* polynomial */
189 vmovups poly_coeff3+__svml_sasinh_data_internal_avx512(%rip), %zmm4
190
191 /* reduced argument for log(): (Rcp*Xin-1)+Rcp*Xin_low */
192 vfmsub231ps {rn-sae}, %zmm8, %zmm3, %zmm2
193 vmovups Four+__svml_sasinh_data_internal_avx512(%rip), %zmm3
194
195 /* exponents */
196 vgetexpps {sae}, %zmm8, %zmm1
197
198 /* Prepare table index */
199 vpsrld $18, %zmm8, %zmm14
200 vfmadd231ps {rn-sae}, %zmm8, %zmm7, %zmm2
201 vmovups poly_coeff2+__svml_sasinh_data_internal_avx512(%rip), %zmm7
202 vsubps {rn-sae}, %zmm3, %zmm1, %zmm1{%k1}
203 vpermt2ps Log_tbl_H+64+__svml_sasinh_data_internal_avx512(%rip), %zmm14, %zmm0
204 vmovups Log_tbl_L+__svml_sasinh_data_internal_avx512(%rip), %zmm3
205 vfmadd231ps {rn-sae}, %zmm2, %zmm4, %zmm7
206 vfnmadd231ps {rn-sae}, %zmm1, %zmm15, %zmm0
207
208 /* R^2 */
209 vmulps {rn-sae}, %zmm2, %zmm2, %zmm6
210 vfmadd213ps {rn-sae}, %zmm5, %zmm2, %zmm7
211 vpermt2ps Log_tbl_L+64+__svml_sasinh_data_internal_avx512(%rip), %zmm14, %zmm3
212
213 /* -K*L2L + Tl */
214 vmovups L2L+__svml_sasinh_data_internal_avx512(%rip), %zmm14
215 vfnmadd213ps {rn-sae}, %zmm3, %zmm14, %zmm1
216
217 /* Tl + R^2*Poly */
218 vfmadd213ps {rn-sae}, %zmm1, %zmm6, %zmm7
219
220 /* R+Tl + R^2*Poly */
221 vaddps {rn-sae}, %zmm2, %zmm7, %zmm2
222 vaddps {rn-sae}, %zmm2, %zmm0, %zmm9{%k2}
223 vxorps %zmm13, %zmm9, %zmm0
224 testl %edx, %edx
225
226 /* Go to special inputs processing branch */
227 jne L(SPECIAL_VALUES_BRANCH)
228 # LOE rbx r12 r13 r14 r15 edx zmm0 zmm10
229
230 /* Restore registers
231 * and exit the function
232 */
233
234L(EXIT):
235 movq %rbp, %rsp
236 popq %rbp
237 cfi_def_cfa(7, 8)
238 cfi_restore(6)
239 ret
240 cfi_def_cfa(6, 16)
241 cfi_offset(6, -16)
242
243 /* Branch to process
244 * special inputs
245 */
246
247L(SPECIAL_VALUES_BRANCH):
248 vmovups %zmm10, 64(%rsp)
249 vmovups %zmm0, 128(%rsp)
250 # LOE rbx r12 r13 r14 r15 edx zmm0
251
252 xorl %eax, %eax
253 # LOE rbx r12 r13 r14 r15 eax edx
254
255 vzeroupper
256 movq %r12, 16(%rsp)
257 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
258 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
259 movl %eax, %r12d
260 movq %r13, 8(%rsp)
261 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
262 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
263 movl %edx, %r13d
264 movq %r14, (%rsp)
265 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
266 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
267 # LOE rbx r15 r12d r13d
268
269 /* Range mask
270 * bits check
271 */
272
273L(RANGEMASK_CHECK):
274 btl %r12d, %r13d
275
276 /* Call scalar math function */
277 jc L(SCALAR_MATH_CALL)
278 # LOE rbx r15 r12d r13d
279
280 /* Special inputs
281 * processing loop
282 */
283
284L(SPECIAL_VALUES_LOOP):
285 incl %r12d
286 cmpl $16, %r12d
287
288 /* Check bits in range mask */
289 jl L(RANGEMASK_CHECK)
290 # LOE rbx r15 r12d r13d
291
292 movq 16(%rsp), %r12
293 cfi_restore(12)
294 movq 8(%rsp), %r13
295 cfi_restore(13)
296 movq (%rsp), %r14
297 cfi_restore(14)
298 vmovups 128(%rsp), %zmm0
299
300 /* Go to exit */
301 jmp L(EXIT)
302 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
303 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
304 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
305 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
306 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
307 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
308 # LOE rbx r12 r13 r14 r15 zmm0
309
310 /* Scalar math function call
311 * to process special input
312 */
313
314L(SCALAR_MATH_CALL):
315 movl %r12d, %r14d
316 vmovss 64(%rsp, %r14, 4), %xmm0
317 call asinhf@PLT
318 # LOE rbx r14 r15 r12d r13d xmm0
319
320 vmovss %xmm0, 128(%rsp, %r14, 4)
321
322 /* Process special inputs in loop */
323 jmp L(SPECIAL_VALUES_LOOP)
324 # LOE rbx r15 r12d r13d
325END(_ZGVeN16v_asinhf_skx)
326
327 .section .rodata, "a"
328 .align 64
329
330#ifdef __svml_sasinh_data_internal_avx512_typedef
331typedef unsigned int VUINT32;
332typedef struct {
333 __declspec(align(64)) VUINT32 Log_tbl_H[32][1];
334 __declspec(align(64)) VUINT32 Log_tbl_L[32][1];
335 __declspec(align(64)) VUINT32 One[16][1];
336 __declspec(align(64)) VUINT32 AbsMask[16][1];
337 __declspec(align(64)) VUINT32 SmallThreshold[16][1];
338 __declspec(align(64)) VUINT32 Threshold[16][1];
339 __declspec(align(64)) VUINT32 LargeThreshold[16][1];
340 __declspec(align(64)) VUINT32 ca1[16][1];
341 __declspec(align(64)) VUINT32 c2s[16][1];
342 __declspec(align(64)) VUINT32 c1s[16][1];
343 __declspec(align(64)) VUINT32 AddB5[16][1];
344 __declspec(align(64)) VUINT32 RcpBitMask[16][1];
345 __declspec(align(64)) VUINT32 OneEighth[16][1];
346 __declspec(align(64)) VUINT32 Four[16][1];
347 __declspec(align(64)) VUINT32 poly_coeff3[16][1];
348 __declspec(align(64)) VUINT32 poly_coeff2[16][1];
349 __declspec(align(64)) VUINT32 poly_coeff1[16][1];
350 __declspec(align(64)) VUINT32 L2H[16][1];
351 __declspec(align(64)) VUINT32 L2L[16][1];
352} __svml_sasinh_data_internal_avx512;
353#endif
354__svml_sasinh_data_internal_avx512:
355 /* Log_tbl_H */
356 .long 0x00000000
357 .long 0xbcfc0000
358 .long 0xbd788000
359 .long 0xbdb78000
360 .long 0xbdf14000
361 .long 0xbe14a000
362 .long 0xbe300000
363 .long 0xbe4aa000
364 .long 0xbe648000
365 .long 0xbe7dc000
366 .long 0xbe8b4000
367 .long 0xbe974000
368 .long 0xbea31000
369 .long 0xbeae9000
370 .long 0xbeb9d000
371 .long 0xbec4d000
372 .long 0xbecfa000
373 .long 0xbeda2000
374 .long 0xbee48000
375 .long 0xbeeea000
376 .long 0xbef89000
377 .long 0xbf012800
378 .long 0xbf05f000
379 .long 0xbf0aa800
380 .long 0xbf0f4000
381 .long 0xbf13c800
382 .long 0xbf184000
383 .long 0xbf1ca000
384 .long 0xbf20f000
385 .long 0xbf252800
386 .long 0xbf295000
387 .long 0xbf2d6800
388 /* Log_tbl_L */
389 .align 64
390 .long 0x80000000
391 .long 0xb726c39e
392 .long 0x3839e7fe
393 .long 0xb7528ae5
394 .long 0x377891d5
395 .long 0xb8297c10
396 .long 0x37cf8f58
397 .long 0x3852b186
398 .long 0x35838656
399 .long 0xb80c36af
400 .long 0x38235454
401 .long 0xb862bae1
402 .long 0x37e87bc7
403 .long 0x37848150
404 .long 0x37202511
405 .long 0xb74e1b05
406 .long 0x385c1340
407 .long 0xb8777bcd
408 .long 0x36038656
409 .long 0xb7d40984
410 .long 0xb80f5faf
411 .long 0xb8254b4c
412 .long 0xb865c84a
413 .long 0x37f0b42d
414 .long 0xb83ebce1
415 .long 0xb83c2513
416 .long 0x37a332c4
417 .long 0x3779654f
418 .long 0x38602f73
419 .long 0x367449f8
420 .long 0xb7b4996f
421 .long 0xb800986b
422 /* One */
423 .align 64
424 .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
425 /* AbsMask */
426 .align 64
427 .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
428 /* SmallThreshold */
429 .align 64
430 .long 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000
431 /* Threshold */
432 .align 64
433 .long 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000
434 /* LargeThreshold */
435 .align 64
436 .long 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff
437 /* ca1 */
438 .align 64
439 .long 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE
440 /* c2s */
441 .align 64
442 .long 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000
443 /* c1s */
444 .align 64
445 .long 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000
446 /* AddB5 */
447 .align 64
448 .long 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000
449 /* RcpBitMask */
450 .align 64
451 .long 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000
452 /* OneEighth */
453 .align 64
454 .long 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000
455 /* Four */
456 .align 64
457 .long 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000
458 /* poly_coeff3 */
459 .align 64
460 .long 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810
461 /* poly_coeff2 */
462 .align 64
463 .long 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e
464 /* poly_coeff1 */
465 .align 64
466 .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000
467 /* L2H = log(2)_high */
468 .align 64
469 .long 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000
470 /* L2L = log(2)_low */
471 .align 64
472 .long 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4
473 .align 64
474 .type __svml_sasinh_data_internal_avx512, @object
475 .size __svml_sasinh_data_internal_avx512, .-__svml_sasinh_data_internal_avx512
476

source code of glibc/sysdeps/x86_64/fpu/multiarch/svml_s_asinhf16_core_avx512.S