1/* Function asinh vectorized with AVX-512.
2 Copyright (C) 2021-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 https://www.gnu.org/licenses/. */
18
19/*
20 * ALGORITHM DESCRIPTION:
21 *
22 * Compute asinh(x) as log(x + sqrt(x*x + 1))
23 * using RSQRT instructions for starting the
24 * square root approximation, and small table lookups for log
25 * that map to AVX-512 permute instructions
26 *
27 * Special cases:
28 *
29 * asinh(NaN) = quiet NaN, and raise invalid exception
30 * asinh(INF) = that INF
31 * asinh(0) = that 0
32 *
33 */
34
35/* Offsets for data table __svml_dasinh_data_internal_avx512
36 */
37#define Log_tbl_H 0
38#define Log_tbl_L 128
39#define One 256
40#define AbsMask 320
41#define SmallThreshold 384
42#define Threshold 448
43#define LargeThreshold 512
44#define ca2 576
45#define ca1 640
46#define c4s 704
47#define c3s 768
48#define c2s 832
49#define c1s 896
50#define AddB5 960
51#define RcpBitMask 1024
52#define OneEighth 1088
53#define Four 1152
54#define poly_coeff9 1216
55#define poly_coeff8 1280
56#define poly_coeff7 1344
57#define poly_coeff6 1408
58#define poly_coeff5 1472
59#define poly_coeff4 1536
60#define poly_coeff3 1600
61#define poly_coeff2 1664
62#define poly_coeff1 1728
63#define L2H 1792
64#define L2L 1856
65
66#include <sysdep.h>
67
68 .section .text.evex512, "ax", @progbits
69ENTRY(_ZGVeN8v_asinh_skx)
70 pushq %rbp
71 cfi_def_cfa_offset(16)
72 movq %rsp, %rbp
73 cfi_def_cfa(6, 16)
74 cfi_offset(6, -16)
75 andq $-64, %rsp
76 subq $192, %rsp
77 vmovaps %zmm0, %zmm3
78
79 /* x^2 */
80 vmulpd {rn-sae}, %zmm3, %zmm3, %zmm14
81 vmovups One+__svml_dasinh_data_internal_avx512(%rip), %zmm9
82
83 /* polynomial computation for small inputs */
84 vmovups ca2+__svml_dasinh_data_internal_avx512(%rip), %zmm10
85 vmovups ca1+__svml_dasinh_data_internal_avx512(%rip), %zmm11
86
87 /* not a very small input ? */
88 vmovups SmallThreshold+__svml_dasinh_data_internal_avx512(%rip), %zmm0
89
90 /* A=max(x^2, 1); */
91 vmaxpd {sae}, %zmm14, %zmm9, %zmm4
92
93 /* B=min(x^2, 1); */
94 vminpd {sae}, %zmm14, %zmm9, %zmm5
95 vfmadd231pd {rn-sae}, %zmm14, %zmm10, %zmm11
96
97 /* 1+x^2 */
98 vaddpd {rn-sae}, %zmm9, %zmm14, %zmm8
99
100 /* |input| */
101 vandpd AbsMask+__svml_dasinh_data_internal_avx512(%rip), %zmm3, %zmm1
102 vrsqrt14pd %zmm8, %zmm6
103 vcmppd $21, {sae}, %zmm0, %zmm1, %k2
104
105 /* B_high */
106 vsubpd {rn-sae}, %zmm4, %zmm8, %zmm7
107
108 /* sign bit */
109 vxorpd %zmm3, %zmm1, %zmm2
110 vmulpd {rn-sae}, %zmm14, %zmm11, %zmm4
111
112 /* B_low */
113 vsubpd {rn-sae}, %zmm7, %zmm5, %zmm13
114 vmovups c2s+__svml_dasinh_data_internal_avx512(%rip), %zmm5
115 vmovups c1s+__svml_dasinh_data_internal_avx512(%rip), %zmm7
116
117 /* polynomial computation for small inputs */
118 vfmadd213pd {rn-sae}, %zmm1, %zmm1, %zmm4
119
120 /* (x^2)_low */
121 vmovaps %zmm3, %zmm15
122 vfmsub213pd {rn-sae}, %zmm14, %zmm3, %zmm15
123
124 /* Sh ~sqrt(1+x^2) */
125 vmulpd {rn-sae}, %zmm6, %zmm8, %zmm14
126
127 /* Yl = (x^2)_low + B_low */
128 vaddpd {rn-sae}, %zmm15, %zmm13, %zmm13
129
130 /* very large inputs ? */
131 vmovups Threshold+__svml_dasinh_data_internal_avx512(%rip), %zmm15
132
133 /* (Yh*R0)_low */
134 vfmsub213pd {rn-sae}, %zmm14, %zmm6, %zmm8
135 vcmppd $21, {sae}, %zmm15, %zmm1, %k1
136
137 /* Sl = (Yh*R0)_low+(R0*Yl) */
138 vfmadd213pd {rn-sae}, %zmm8, %zmm6, %zmm13
139 vmovups LargeThreshold+__svml_dasinh_data_internal_avx512(%rip), %zmm8
140
141 /* rel. error term: Eh=1-Sh*R0 */
142 vmovaps %zmm9, %zmm12
143 vfnmadd231pd {rn-sae}, %zmm14, %zmm6, %zmm12
144 vcmppd $22, {sae}, %zmm8, %zmm1, %k0
145
146 /* rel. error term: Eh=(1-Sh*R0)-Sl*R0 */
147 vfnmadd231pd {rn-sae}, %zmm13, %zmm6, %zmm12
148
149 /*
150 * sqrt(1+x^2) ~ Sh + Sl + Sh*Eh*poly_s
151 * poly_s = c1+c2*Eh+c3*Eh^2
152 */
153 vmovups c4s+__svml_dasinh_data_internal_avx512(%rip), %zmm6
154 vmovups c3s+__svml_dasinh_data_internal_avx512(%rip), %zmm8
155
156 /* Sh*Eh */
157 vmulpd {rn-sae}, %zmm12, %zmm14, %zmm11
158 vfmadd231pd {rn-sae}, %zmm12, %zmm6, %zmm8
159
160 /* Sh+x */
161 vaddpd {rn-sae}, %zmm1, %zmm14, %zmm6
162 kmovw %k0, %edx
163 vfmadd213pd {rn-sae}, %zmm5, %zmm12, %zmm8
164 vfmadd213pd {rn-sae}, %zmm7, %zmm12, %zmm8
165
166 /* Xh */
167 vsubpd {rn-sae}, %zmm14, %zmm6, %zmm12
168
169 /* Sl + Sh*Eh*poly_s */
170 vfmadd213pd {rn-sae}, %zmm13, %zmm8, %zmm11
171
172 /* fixup for very large inputs */
173 vmovups OneEighth+__svml_dasinh_data_internal_avx512(%rip), %zmm8
174
175 /* Xl */
176 vsubpd {rn-sae}, %zmm12, %zmm1, %zmm12
177
178 /* Xin0+Sl+Sh*Eh*poly_s ~ x+sqrt(1+x^2) */
179 vaddpd {rn-sae}, %zmm11, %zmm6, %zmm10
180
181 /* Sl_high */
182 vsubpd {rn-sae}, %zmm6, %zmm10, %zmm5
183 vmulpd {rn-sae}, %zmm8, %zmm1, %zmm10{%k1}
184
185 /* Table lookups */
186 vmovups __svml_dasinh_data_internal_avx512(%rip), %zmm6
187
188 /* Sl_l */
189 vsubpd {rn-sae}, %zmm5, %zmm11, %zmm7
190 vrcp14pd %zmm10, %zmm13
191
192 /* Xin_low */
193 vaddpd {rn-sae}, %zmm12, %zmm7, %zmm14
194 vmovups Log_tbl_L+__svml_dasinh_data_internal_avx512(%rip), %zmm7
195 vmovups poly_coeff6+__svml_dasinh_data_internal_avx512(%rip), %zmm12
196
197 /* round reciprocal to 1+4b mantissas */
198 vpaddq AddB5+__svml_dasinh_data_internal_avx512(%rip), %zmm13, %zmm11
199
200 /* fixup for very large inputs */
201 vxorpd %zmm14, %zmm14, %zmm14{%k1}
202 vmovups poly_coeff5+__svml_dasinh_data_internal_avx512(%rip), %zmm13
203 vandpd RcpBitMask+__svml_dasinh_data_internal_avx512(%rip), %zmm11, %zmm15
204 vmovups poly_coeff7+__svml_dasinh_data_internal_avx512(%rip), %zmm11
205
206 /* Prepare table index */
207 vpsrlq $48, %zmm15, %zmm5
208
209 /* reduced argument for log(): (Rcp*Xin-1)+Rcp*Xin_low */
210 vfmsub231pd {rn-sae}, %zmm15, %zmm10, %zmm9
211
212 /* exponents */
213 vgetexppd {sae}, %zmm15, %zmm8
214 vmovups Four+__svml_dasinh_data_internal_avx512(%rip), %zmm10
215 vpermt2pd Log_tbl_H+64+__svml_dasinh_data_internal_avx512(%rip), %zmm5, %zmm6
216 vpermt2pd Log_tbl_L+64+__svml_dasinh_data_internal_avx512(%rip), %zmm5, %zmm7
217 vsubpd {rn-sae}, %zmm10, %zmm8, %zmm8{%k1}
218 vfmadd231pd {rn-sae}, %zmm15, %zmm14, %zmm9
219
220 /* polynomials */
221 vmovups poly_coeff9+__svml_dasinh_data_internal_avx512(%rip), %zmm10
222 vmovups poly_coeff8+__svml_dasinh_data_internal_avx512(%rip), %zmm5
223 vmovups poly_coeff4+__svml_dasinh_data_internal_avx512(%rip), %zmm14
224
225 /* -K*L2H + Th */
226 vmovups L2H+__svml_dasinh_data_internal_avx512(%rip), %zmm15
227 vfmadd231pd {rn-sae}, %zmm9, %zmm10, %zmm5
228
229 /* -K*L2L + Tl */
230 vmovups L2L+__svml_dasinh_data_internal_avx512(%rip), %zmm10
231 vfnmadd231pd {rn-sae}, %zmm8, %zmm15, %zmm6
232 vfmadd213pd {rn-sae}, %zmm11, %zmm9, %zmm5
233 vfnmadd213pd {rn-sae}, %zmm7, %zmm10, %zmm8
234 vmovups poly_coeff3+__svml_dasinh_data_internal_avx512(%rip), %zmm7
235 vmovups poly_coeff1+__svml_dasinh_data_internal_avx512(%rip), %zmm10
236
237 /* R^2 */
238 vmulpd {rn-sae}, %zmm9, %zmm9, %zmm11
239 vfmadd213pd {rn-sae}, %zmm12, %zmm9, %zmm5
240 vfmadd213pd {rn-sae}, %zmm13, %zmm9, %zmm5
241 vfmadd213pd {rn-sae}, %zmm14, %zmm9, %zmm5
242 vfmadd213pd {rn-sae}, %zmm7, %zmm9, %zmm5
243 vmovups poly_coeff2+__svml_dasinh_data_internal_avx512(%rip), %zmm7
244 vfmadd213pd {rn-sae}, %zmm7, %zmm9, %zmm5
245 vfmadd213pd {rn-sae}, %zmm10, %zmm9, %zmm5
246
247 /* Tl + R^2*Poly */
248 vfmadd213pd {rn-sae}, %zmm8, %zmm11, %zmm5
249
250 /* R+Tl + R^2*Poly */
251 vaddpd {rn-sae}, %zmm9, %zmm5, %zmm9
252 vaddpd {rn-sae}, %zmm9, %zmm6, %zmm4{%k2}
253 vxorpd %zmm2, %zmm4, %zmm0
254 testl %edx, %edx
255
256 /* Go to special inputs processing branch */
257 jne L(SPECIAL_VALUES_BRANCH)
258 # LOE rbx r12 r13 r14 r15 edx zmm0 zmm3
259
260 /* Restore registers
261 * and exit the function
262 */
263
264L(EXIT):
265 movq %rbp, %rsp
266 popq %rbp
267 cfi_def_cfa(7, 8)
268 cfi_restore(6)
269 ret
270 cfi_def_cfa(6, 16)
271 cfi_offset(6, -16)
272
273 /* Branch to process
274 * special inputs
275 */
276
277L(SPECIAL_VALUES_BRANCH):
278 vmovups %zmm3, 64(%rsp)
279 vmovups %zmm0, 128(%rsp)
280 # LOE rbx r12 r13 r14 r15 edx zmm0
281
282 xorl %eax, %eax
283 # LOE rbx r12 r13 r14 r15 eax edx
284
285 vzeroupper
286 movq %r12, 16(%rsp)
287 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
288 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
289 movl %eax, %r12d
290 movq %r13, 8(%rsp)
291 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
292 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
293 movl %edx, %r13d
294 movq %r14, (%rsp)
295 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
296 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
297 # LOE rbx r15 r12d r13d
298
299 /* Range mask
300 * bits check
301 */
302
303L(RANGEMASK_CHECK):
304 btl %r12d, %r13d
305
306 /* Call scalar math function */
307 jc L(SCALAR_MATH_CALL)
308 # LOE rbx r15 r12d r13d
309
310 /* Special inputs
311 * processing loop
312 */
313
314L(SPECIAL_VALUES_LOOP):
315 incl %r12d
316 cmpl $8, %r12d
317
318 /* Check bits in range mask */
319 jl L(RANGEMASK_CHECK)
320 # LOE rbx r15 r12d r13d
321
322 movq 16(%rsp), %r12
323 cfi_restore(12)
324 movq 8(%rsp), %r13
325 cfi_restore(13)
326 movq (%rsp), %r14
327 cfi_restore(14)
328 vmovups 128(%rsp), %zmm0
329
330 /* Go to exit */
331 jmp L(EXIT)
332 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
333 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
334 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
335 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
336 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
337 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
338 # LOE rbx r12 r13 r14 r15 zmm0
339
340 /* Scalar math function call
341 * to process special input
342 */
343
344L(SCALAR_MATH_CALL):
345 movl %r12d, %r14d
346 vmovsd 64(%rsp, %r14, 8), %xmm0
347 call asinh@PLT
348 # LOE rbx r14 r15 r12d r13d xmm0
349
350 vmovsd %xmm0, 128(%rsp, %r14, 8)
351
352 /* Process special inputs in loop */
353 jmp L(SPECIAL_VALUES_LOOP)
354 # LOE rbx r15 r12d r13d
355END(_ZGVeN8v_asinh_skx)
356
357 .section .rodata, "a"
358 .align 64
359
360#ifdef __svml_dasinh_data_internal_avx512_typedef
361typedef unsigned int VUINT32;
362typedef struct {
363 __declspec(align(64)) VUINT32 Log_tbl_H[16][2];
364 __declspec(align(64)) VUINT32 Log_tbl_L[16][2];
365 __declspec(align(64)) VUINT32 One[8][2];
366 __declspec(align(64)) VUINT32 AbsMask[8][2];
367 __declspec(align(64)) VUINT32 SmallThreshold[8][2];
368 __declspec(align(64)) VUINT32 Threshold[8][2];
369 __declspec(align(64)) VUINT32 LargeThreshold[8][2];
370 __declspec(align(64)) VUINT32 ca2[8][2];
371 __declspec(align(64)) VUINT32 ca1[8][2];
372 __declspec(align(64)) VUINT32 c4s[8][2];
373 __declspec(align(64)) VUINT32 c3s[8][2];
374 __declspec(align(64)) VUINT32 c2s[8][2];
375 __declspec(align(64)) VUINT32 c1s[8][2];
376 __declspec(align(64)) VUINT32 AddB5[8][2];
377 __declspec(align(64)) VUINT32 RcpBitMask[8][2];
378 __declspec(align(64)) VUINT32 OneEighth[8][2];
379 __declspec(align(64)) VUINT32 Four[8][2];
380 __declspec(align(64)) VUINT32 poly_coeff9[8][2];
381 __declspec(align(64)) VUINT32 poly_coeff8[8][2];
382 __declspec(align(64)) VUINT32 poly_coeff7[8][2];
383 __declspec(align(64)) VUINT32 poly_coeff6[8][2];
384 __declspec(align(64)) VUINT32 poly_coeff5[8][2];
385 __declspec(align(64)) VUINT32 poly_coeff4[8][2];
386 __declspec(align(64)) VUINT32 poly_coeff3[8][2];
387 __declspec(align(64)) VUINT32 poly_coeff2[8][2];
388 __declspec(align(64)) VUINT32 poly_coeff1[8][2];
389 __declspec(align(64)) VUINT32 L2H[8][2];
390 __declspec(align(64)) VUINT32 L2L[8][2];
391} __svml_dasinh_data_internal_avx512;
392#endif
393__svml_dasinh_data_internal_avx512:
394 /* Log_tbl_H */
395 .quad 0x0000000000000000
396 .quad 0xbfaf0a30c0120000
397 .quad 0xbfbe27076e2b0000
398 .quad 0xbfc5ff3070a78000
399 .quad 0xbfcc8ff7c79a8000
400 .quad 0xbfd1675cababc000
401 .quad 0xbfd4618bc21c4000
402 .quad 0xbfd739d7f6bbc000
403 .quad 0xbfd9f323ecbf8000
404 .quad 0xbfdc8ff7c79a8000
405 .quad 0xbfdf128f5faf0000
406 .quad 0xbfe0be72e4252000
407 .quad 0xbfe1e85f5e704000
408 .quad 0xbfe307d7334f2000
409 .quad 0xbfe41d8fe8468000
410 .quad 0xbfe52a2d265bc000
411 /* Log_tbl_L */
412 .align 64
413 .quad 0x0000000000000000
414 .quad 0x3d53ab33d066d1d2
415 .quad 0x3d2a342c2af0003c
416 .quad 0xbd43d3c873e20a07
417 .quad 0xbd4a21ac25d81ef3
418 .quad 0x3d59f1fc63382a8f
419 .quad 0xbd5ec27d0b7b37b3
420 .quad 0xbd50069ce24c53fb
421 .quad 0xbd584bf2b68d766f
422 .quad 0xbd5a21ac25d81ef3
423 .quad 0xbd3bb2cd720ec44c
424 .quad 0xbd55056d312f7668
425 .quad 0xbd1a07bd8b34be7c
426 .quad 0x3d5e83c094debc15
427 .quad 0x3d5aa33736867a17
428 .quad 0xbd46abb9df22bc57
429 /* One */
430 .align 64
431 .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000
432 /* AbsMask */
433 .align 64
434 .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff
435 /* SmallThreshold */
436 .align 64
437 .quad 0x3f70000000000000, 0x3f70000000000000, 0x3f70000000000000, 0x3f70000000000000, 0x3f70000000000000, 0x3f70000000000000, 0x3f70000000000000, 0x3f70000000000000
438 /* Threshold */
439 .align 64
440 .quad 0x5fe0000000000000, 0x5fe0000000000000, 0x5fe0000000000000, 0x5fe0000000000000, 0x5fe0000000000000, 0x5fe0000000000000, 0x5fe0000000000000, 0x5fe0000000000000
441 /* LargeThreshold */
442 .align 64
443 .quad 0x7fefffffffffffff, 0x7fefffffffffffff, 0x7fefffffffffffff, 0x7fefffffffffffff, 0x7fefffffffffffff, 0x7fefffffffffffff, 0x7fefffffffffffff, 0x7fefffffffffffff
444 /* ca2 */
445 .align 64
446 .quad 0x3fb333220eaf02e7, 0x3fb333220eaf02e7, 0x3fb333220eaf02e7, 0x3fb333220eaf02e7, 0x3fb333220eaf02e7, 0x3fb333220eaf02e7, 0x3fb333220eaf02e7, 0x3fb333220eaf02e7
447 /* ca1 */
448 .align 64
449 .quad 0xbfc5555555521e7e, 0xbfc5555555521e7e, 0xbfc5555555521e7e, 0xbfc5555555521e7e, 0xbfc5555555521e7e, 0xbfc5555555521e7e, 0xbfc5555555521e7e, 0xbfc5555555521e7e
450 /* c4s */
451 .align 64
452 .quad 0x3fd1800001943612, 0x3fd1800001943612, 0x3fd1800001943612, 0x3fd1800001943612, 0x3fd1800001943612, 0x3fd1800001943612, 0x3fd1800001943612, 0x3fd1800001943612
453 /* c3s */
454 .align 64
455 .quad 0x3fd40000013b0000, 0x3fd40000013b0000, 0x3fd40000013b0000, 0x3fd40000013b0000, 0x3fd40000013b0000, 0x3fd40000013b0000, 0x3fd40000013b0000, 0x3fd40000013b0000
456 /* c2s */
457 .align 64
458 .quad 0x3fd8000000000000, 0x3fd8000000000000, 0x3fd8000000000000, 0x3fd8000000000000, 0x3fd8000000000000, 0x3fd8000000000000, 0x3fd8000000000000, 0x3fd8000000000000
459 /* c1s */
460 .align 64
461 .quad 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000
462 /* AddB5 */
463 .align 64
464 .quad 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000
465 /* RcpBitMask */
466 .align 64
467 .quad 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000
468 /* OneEighth */
469 .align 64
470 .quad 0x3fc0000000000000, 0x3fc0000000000000, 0x3fc0000000000000, 0x3fc0000000000000, 0x3fc0000000000000, 0x3fc0000000000000, 0x3fc0000000000000, 0x3fc0000000000000
471 /* Four */
472 .align 64
473 .quad 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, 0x4010000000000000
474 /* poly_coeff9 */
475 .align 64
476 .quad 0xbfb9a9b040214368, 0xbfb9a9b040214368, 0xbfb9a9b040214368, 0xbfb9a9b040214368, 0xbfb9a9b040214368, 0xbfb9a9b040214368, 0xbfb9a9b040214368, 0xbfb9a9b040214368
477 /* poly_coeff8 */
478 .align 64
479 .quad 0x3fbc80666e249778, 0x3fbc80666e249778, 0x3fbc80666e249778, 0x3fbc80666e249778, 0x3fbc80666e249778, 0x3fbc80666e249778, 0x3fbc80666e249778, 0x3fbc80666e249778
480 /* poly_coeff7 */
481 .align 64
482 .quad 0xbfbffffb8a054bc9, 0xbfbffffb8a054bc9, 0xbfbffffb8a054bc9, 0xbfbffffb8a054bc9, 0xbfbffffb8a054bc9, 0xbfbffffb8a054bc9, 0xbfbffffb8a054bc9, 0xbfbffffb8a054bc9
483 /* poly_coeff6 */
484 .align 64
485 .quad 0x3fc24922f71256f1, 0x3fc24922f71256f1, 0x3fc24922f71256f1, 0x3fc24922f71256f1, 0x3fc24922f71256f1, 0x3fc24922f71256f1, 0x3fc24922f71256f1, 0x3fc24922f71256f1
486 /* poly_coeff5 */
487 .align 64
488 .quad 0xbfc55555559ba736, 0xbfc55555559ba736, 0xbfc55555559ba736, 0xbfc55555559ba736, 0xbfc55555559ba736, 0xbfc55555559ba736, 0xbfc55555559ba736, 0xbfc55555559ba736
489 /* poly_coeff4 */
490 .align 64
491 .quad 0x3fc9999999be77af, 0x3fc9999999be77af, 0x3fc9999999be77af, 0x3fc9999999be77af, 0x3fc9999999be77af, 0x3fc9999999be77af, 0x3fc9999999be77af, 0x3fc9999999be77af
492 /* poly_coeff3 */
493 .align 64
494 .quad 0xbfcffffffffffc65, 0xbfcffffffffffc65, 0xbfcffffffffffc65, 0xbfcffffffffffc65, 0xbfcffffffffffc65, 0xbfcffffffffffc65, 0xbfcffffffffffc65, 0xbfcffffffffffc65
495 /* poly_coeff2 */
496 .align 64
497 .quad 0x3fd55555555554c1, 0x3fd55555555554c1, 0x3fd55555555554c1, 0x3fd55555555554c1, 0x3fd55555555554c1, 0x3fd55555555554c1, 0x3fd55555555554c1, 0x3fd55555555554c1
498 /* poly_coeff1 */
499 .align 64
500 .quad 0xbfe0000000000000, 0xbfe0000000000000, 0xbfe0000000000000, 0xbfe0000000000000, 0xbfe0000000000000, 0xbfe0000000000000, 0xbfe0000000000000, 0xbfe0000000000000
501 /* L2H = log(2)_high */
502 .align 64
503 .quad 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000
504 /* L2L = log(2)_low */
505 .align 64
506 .quad 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000
507 .align 64
508 .type __svml_dasinh_data_internal_avx512, @object
509 .size __svml_dasinh_data_internal_avx512, .-__svml_dasinh_data_internal_avx512
510

source code of glibc/sysdeps/x86_64/fpu/multiarch/svml_d_asinh8_core_avx512.S