1/* Function acosh vectorized with AVX-512.
2 Copyright (C) 2021-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 https://www.gnu.org/licenses/. */
18
19/*
20 * ALGORITHM DESCRIPTION:
21 *
22 * Compute acosh(x) as log(x + sqrt(x*x - 1))
23 * using RSQRT instructions for starting the
24 * square root approximation, and small table lookups for log
25 * that map to AVX-512 permute instructions
26 *
27 * Special cases:
28 *
29 * acosh(NaN) = quiet NaN, and raise invalid exception
30 * acosh(-INF) = NaN
31 * acosh(+INF) = +INF
32 * acosh(x) = NaN if x < 1
33 * acosh(1) = +0
34 *
35 */
36
37/* Offsets for data table __svml_dacosh_data_internal_avx512
38 */
39#define Log_tbl_H 0
40#define Log_tbl_L 128
41#define One 256
42#define SmallThreshold 320
43#define Threshold 384
44#define LargeThreshold 448
45#define ca2 512
46#define ca1 576
47#define c4s 640
48#define c3s 704
49#define c2s 768
50#define c1s 832
51#define AddB5 896
52#define RcpBitMask 960
53#define OneEighth 1024
54#define Four 1088
55#define poly_coeff9 1152
56#define poly_coeff8 1216
57#define poly_coeff7 1280
58#define poly_coeff6 1344
59#define poly_coeff5 1408
60#define poly_coeff4 1472
61#define poly_coeff3 1536
62#define poly_coeff2 1600
63#define poly_coeff1 1664
64#define L2H 1728
65#define L2L 1792
66
67#include <sysdep.h>
68
69 .section .text.evex512, "ax", @progbits
70ENTRY(_ZGVeN8v_acosh_skx)
71 pushq %rbp
72 cfi_def_cfa_offset(16)
73 movq %rsp, %rbp
74 cfi_def_cfa(6, 16)
75 cfi_offset(6, -16)
76 andq $-64, %rsp
77 subq $192, %rsp
78 vmovups One+__svml_dacosh_data_internal_avx512(%rip), %zmm5
79
80 /* polynomial computation for small inputs */
81 vmovups ca2+__svml_dacosh_data_internal_avx512(%rip), %zmm13
82 vmovups ca1+__svml_dacosh_data_internal_avx512(%rip), %zmm14
83
84 /*
85 * sqrt(1+x^2) ~ Sh + Sl + Sh*Eh*poly_s
86 * poly_s = c1+c2*Eh+c3*Eh^2
87 */
88 vmovups c4s+__svml_dacosh_data_internal_avx512(%rip), %zmm1
89 vmovups c2s+__svml_dacosh_data_internal_avx512(%rip), %zmm2
90 vmovups c1s+__svml_dacosh_data_internal_avx512(%rip), %zmm6
91
92 /* very large inputs ? */
93 vmovups Threshold+__svml_dacosh_data_internal_avx512(%rip), %zmm15
94
95 /* out of range inputs? */
96 vmovups LargeThreshold+__svml_dacosh_data_internal_avx512(%rip), %zmm3
97
98 /* not a very small input ? */
99 vmovups SmallThreshold+__svml_dacosh_data_internal_avx512(%rip), %zmm10
100 vmovaps %zmm0, %zmm12
101
102 /* x^2 - 1 */
103 vmovaps %zmm5, %zmm11
104 vfmsub231pd {rn-sae}, %zmm12, %zmm12, %zmm11
105 vcmppd $21, {sae}, %zmm15, %zmm12, %k2
106 vcmppd $22, {sae}, %zmm3, %zmm12, %k0
107 vcmppd $18, {sae}, %zmm5, %zmm12, %k1
108 vrsqrt14pd %zmm11, %zmm4
109 vcmppd $21, {sae}, %zmm10, %zmm11, %k3
110 vfmadd231pd {rn-sae}, %zmm11, %zmm13, %zmm14
111 vmovups c3s+__svml_dacosh_data_internal_avx512(%rip), %zmm13
112
113 /* Sh ~sqrt(-1+x^2) */
114 vmulpd {rn-sae}, %zmm4, %zmm11, %zmm9
115 vmulpd {rn-sae}, %zmm11, %zmm14, %zmm8
116
117 /* Sh+x */
118 vaddpd {rn-sae}, %zmm12, %zmm9, %zmm15
119
120 /* Shh */
121 vsubpd {rn-sae}, %zmm12, %zmm15, %zmm14
122
123 /* (Yh*R0)_low */
124 vmovaps %zmm11, %zmm0
125 korw %k0, %k1, %k0
126
127 /* rel. error term: Eh=1-Sh*R0 */
128 vmovaps %zmm5, %zmm7
129 vfmsub213pd {rn-sae}, %zmm9, %zmm4, %zmm0
130 vfnmadd231pd {rn-sae}, %zmm9, %zmm4, %zmm7
131
132 /* rel. error term: Eh=(1-Sh*R0)-Sl*R0 */
133 vfnmadd231pd {rn-sae}, %zmm0, %zmm4, %zmm7
134
135 /* Shl */
136 vsubpd {rn-sae}, %zmm14, %zmm9, %zmm4
137 vmovups poly_coeff7+__svml_dacosh_data_internal_avx512(%rip), %zmm14
138 vfmadd231pd {rn-sae}, %zmm7, %zmm1, %zmm13
139 vfmadd213pd {rn-sae}, %zmm2, %zmm7, %zmm13
140 vfmadd213pd {rn-sae}, %zmm6, %zmm7, %zmm13
141
142 /* Sh*Eh */
143 vmulpd {rn-sae}, %zmm7, %zmm9, %zmm7
144
145 /* Sl + Sh*Eh*poly_s */
146 vfmadd213pd {rn-sae}, %zmm0, %zmm13, %zmm7
147
148 /* polynomials */
149 vmovups poly_coeff9+__svml_dacosh_data_internal_avx512(%rip), %zmm13
150
151 /* polynomial computation for small inputs */
152 vaddpd {rn-sae}, %zmm7, %zmm9, %zmm0
153
154 /* Xin0+Sl+Sh*Eh*poly_s ~ x+sqrt(1+x^2) */
155 vaddpd {rn-sae}, %zmm7, %zmm15, %zmm6
156 vfmadd231pd {rn-sae}, %zmm0, %zmm8, %zmm0
157
158 /* fixup for very large inputs */
159 vmovups OneEighth+__svml_dacosh_data_internal_avx512(%rip), %zmm8
160
161 /* Sl_high */
162 vsubpd {rn-sae}, %zmm15, %zmm6, %zmm9
163 vmovups poly_coeff6+__svml_dacosh_data_internal_avx512(%rip), %zmm15
164 vmulpd {rn-sae}, %zmm8, %zmm12, %zmm6{%k2}
165
166 /* Sl_l */
167 vsubpd {rn-sae}, %zmm9, %zmm7, %zmm3
168 vrcp14pd %zmm6, %zmm1
169
170 /* Xin_low */
171 vaddpd {rn-sae}, %zmm4, %zmm3, %zmm7
172
173 /* Table lookups */
174 vmovups __svml_dacosh_data_internal_avx512(%rip), %zmm3
175
176 /* round reciprocal to 1+4b mantissas */
177 vpaddq AddB5+__svml_dacosh_data_internal_avx512(%rip), %zmm1, %zmm2
178
179 /* fixup for very large inputs */
180 vxorpd %zmm7, %zmm7, %zmm7{%k2}
181 vmovups poly_coeff8+__svml_dacosh_data_internal_avx512(%rip), %zmm1
182 vandpd RcpBitMask+__svml_dacosh_data_internal_avx512(%rip), %zmm2, %zmm8
183 vmovups Log_tbl_L+__svml_dacosh_data_internal_avx512(%rip), %zmm2
184
185 /* Prepare table index */
186 vpsrlq $48, %zmm8, %zmm9
187
188 /* reduced argument for log(): (Rcp*Xin-1)+Rcp*Xin_low */
189 vfmsub231pd {rn-sae}, %zmm8, %zmm6, %zmm5
190
191 /* exponents */
192 vgetexppd {sae}, %zmm8, %zmm4
193 vmovups Four+__svml_dacosh_data_internal_avx512(%rip), %zmm6
194 vpermt2pd Log_tbl_H+64+__svml_dacosh_data_internal_avx512(%rip), %zmm9, %zmm3
195 vpermt2pd Log_tbl_L+64+__svml_dacosh_data_internal_avx512(%rip), %zmm9, %zmm2
196 vsubpd {rn-sae}, %zmm6, %zmm4, %zmm4{%k2}
197 vfmadd231pd {rn-sae}, %zmm8, %zmm7, %zmm5
198 vmovups poly_coeff5+__svml_dacosh_data_internal_avx512(%rip), %zmm6
199 vmovups poly_coeff4+__svml_dacosh_data_internal_avx512(%rip), %zmm7
200
201 /* -K*L2H + Th */
202 vmovups L2H+__svml_dacosh_data_internal_avx512(%rip), %zmm8
203
204 /* -K*L2L + Tl */
205 vmovups L2L+__svml_dacosh_data_internal_avx512(%rip), %zmm9
206 vfmadd231pd {rn-sae}, %zmm5, %zmm13, %zmm1
207 vmovups poly_coeff2+__svml_dacosh_data_internal_avx512(%rip), %zmm13
208 vfnmadd231pd {rn-sae}, %zmm4, %zmm8, %zmm3
209 vfnmadd213pd {rn-sae}, %zmm2, %zmm9, %zmm4
210 vfmadd213pd {rn-sae}, %zmm14, %zmm5, %zmm1
211 vmovups poly_coeff3+__svml_dacosh_data_internal_avx512(%rip), %zmm2
212 vmovups poly_coeff1+__svml_dacosh_data_internal_avx512(%rip), %zmm14
213 vfmadd213pd {rn-sae}, %zmm15, %zmm5, %zmm1
214
215 /* R^2 */
216 vmulpd {rn-sae}, %zmm5, %zmm5, %zmm15
217 vfmadd213pd {rn-sae}, %zmm6, %zmm5, %zmm1
218 vfmadd213pd {rn-sae}, %zmm7, %zmm5, %zmm1
219 vfmadd213pd {rn-sae}, %zmm2, %zmm5, %zmm1
220 vfmadd213pd {rn-sae}, %zmm13, %zmm5, %zmm1
221 vfmadd213pd {rn-sae}, %zmm14, %zmm5, %zmm1
222
223 /* Tl + R^2*Poly */
224 vfmadd213pd {rn-sae}, %zmm4, %zmm15, %zmm1
225
226 /* R+Tl + R^2*Poly */
227 vaddpd {rn-sae}, %zmm5, %zmm1, %zmm5
228 vaddpd {rn-sae}, %zmm5, %zmm3, %zmm0{%k3}
229
230 /* Go to special inputs processing branch */
231 jne L(SPECIAL_VALUES_BRANCH)
232 # LOE rbx r12 r13 r14 r15 k0 zmm0 zmm12
233
234 /* Restore registers
235 * and exit the function
236 */
237
238L(EXIT):
239 movq %rbp, %rsp
240 popq %rbp
241 cfi_def_cfa(7, 8)
242 cfi_restore(6)
243 ret
244 cfi_def_cfa(6, 16)
245 cfi_offset(6, -16)
246
247 /* Branch to process
248 * special inputs
249 */
250
251L(SPECIAL_VALUES_BRANCH):
252 vmovups %zmm12, 64(%rsp)
253 vmovups %zmm0, 128(%rsp)
254 # LOE rbx r12 r13 r14 r15 k0 zmm0
255
256 xorl %eax, %eax
257 # LOE rbx r12 r13 r14 r15 eax k0
258
259 vzeroupper
260 movq %r12, 16(%rsp)
261 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
262 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
263 movl %eax, %r12d
264 movq %r13, 8(%rsp)
265 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
266 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
267 kmovd %k0, %r13d
268 movq %r14, (%rsp)
269 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
270 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
271 # LOE rbx r15 r12d r13d
272
273 /* Range mask
274 * bits check
275 */
276
277L(RANGEMASK_CHECK):
278 btl %r12d, %r13d
279
280 /* Call scalar math function */
281 jc L(SCALAR_MATH_CALL)
282 # LOE rbx r15 r12d r13d
283
284 /* Special inputs
285 * processing loop
286 */
287
288L(SPECIAL_VALUES_LOOP):
289 incl %r12d
290 cmpl $8, %r12d
291
292 /* Check bits in range mask */
293 jl L(RANGEMASK_CHECK)
294 # LOE rbx r15 r12d r13d
295
296 movq 16(%rsp), %r12
297 cfi_restore(12)
298 movq 8(%rsp), %r13
299 cfi_restore(13)
300 movq (%rsp), %r14
301 cfi_restore(14)
302 vmovups 128(%rsp), %zmm0
303
304 /* Go to exit */
305 jmp L(EXIT)
306 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */
307 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
308 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */
309 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
310 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */
311 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
312 # LOE rbx r12 r13 r14 r15 zmm0
313
314 /* Scalar math function call
315 * to process special input
316 */
317
318L(SCALAR_MATH_CALL):
319 movl %r12d, %r14d
320 vmovsd 64(%rsp, %r14, 8), %xmm0
321 call acosh@PLT
322 # LOE rbx r14 r15 r12d r13d xmm0
323
324 vmovsd %xmm0, 128(%rsp, %r14, 8)
325
326 /* Process special inputs in loop */
327 jmp L(SPECIAL_VALUES_LOOP)
328 # LOE rbx r15 r12d r13d
329END(_ZGVeN8v_acosh_skx)
330
331 .section .rodata, "a"
332 .align 64
333
334#ifdef __svml_dacosh_data_internal_avx512_typedef
335typedef unsigned int VUINT32;
336typedef struct {
337 __declspec(align(64)) VUINT32 Log_tbl_H[16][2];
338 __declspec(align(64)) VUINT32 Log_tbl_L[16][2];
339 __declspec(align(64)) VUINT32 One[8][2];
340 __declspec(align(64)) VUINT32 SmallThreshold[8][2];
341 __declspec(align(64)) VUINT32 Threshold[8][2];
342 __declspec(align(64)) VUINT32 LargeThreshold[8][2];
343 __declspec(align(64)) VUINT32 ca2[8][2];
344 __declspec(align(64)) VUINT32 ca1[8][2];
345 __declspec(align(64)) VUINT32 c4s[8][2];
346 __declspec(align(64)) VUINT32 c3s[8][2];
347 __declspec(align(64)) VUINT32 c2s[8][2];
348 __declspec(align(64)) VUINT32 c1s[8][2];
349 __declspec(align(64)) VUINT32 AddB5[8][2];
350 __declspec(align(64)) VUINT32 RcpBitMask[8][2];
351 __declspec(align(64)) VUINT32 OneEighth[8][2];
352 __declspec(align(64)) VUINT32 Four[8][2];
353 __declspec(align(64)) VUINT32 poly_coeff9[8][2];
354 __declspec(align(64)) VUINT32 poly_coeff8[8][2];
355 __declspec(align(64)) VUINT32 poly_coeff7[8][2];
356 __declspec(align(64)) VUINT32 poly_coeff6[8][2];
357 __declspec(align(64)) VUINT32 poly_coeff5[8][2];
358 __declspec(align(64)) VUINT32 poly_coeff4[8][2];
359 __declspec(align(64)) VUINT32 poly_coeff3[8][2];
360 __declspec(align(64)) VUINT32 poly_coeff2[8][2];
361 __declspec(align(64)) VUINT32 poly_coeff1[8][2];
362 __declspec(align(64)) VUINT32 L2H[8][2];
363 __declspec(align(64)) VUINT32 L2L[8][2];
364} __svml_dacosh_data_internal_avx512;
365#endif
366__svml_dacosh_data_internal_avx512:
367 /* Log_tbl_H */
368 .quad 0x0000000000000000
369 .quad 0xbfaf0a30c0120000
370 .quad 0xbfbe27076e2b0000
371 .quad 0xbfc5ff3070a78000
372 .quad 0xbfcc8ff7c79a8000
373 .quad 0xbfd1675cababc000
374 .quad 0xbfd4618bc21c4000
375 .quad 0xbfd739d7f6bbc000
376 .quad 0xbfd9f323ecbf8000
377 .quad 0xbfdc8ff7c79a8000
378 .quad 0xbfdf128f5faf0000
379 .quad 0xbfe0be72e4252000
380 .quad 0xbfe1e85f5e704000
381 .quad 0xbfe307d7334f2000
382 .quad 0xbfe41d8fe8468000
383 .quad 0xbfe52a2d265bc000
384 /* Log_tbl_L */
385 .align 64
386 .quad 0x0000000000000000
387 .quad 0x3d53ab33d066d1d2
388 .quad 0x3d2a342c2af0003c
389 .quad 0xbd43d3c873e20a07
390 .quad 0xbd4a21ac25d81ef3
391 .quad 0x3d59f1fc63382a8f
392 .quad 0xbd5ec27d0b7b37b3
393 .quad 0xbd50069ce24c53fb
394 .quad 0xbd584bf2b68d766f
395 .quad 0xbd5a21ac25d81ef3
396 .quad 0xbd3bb2cd720ec44c
397 .quad 0xbd55056d312f7668
398 .quad 0xbd1a07bd8b34be7c
399 .quad 0x3d5e83c094debc15
400 .quad 0x3d5aa33736867a17
401 .quad 0xbd46abb9df22bc57
402 /* One */
403 .align 64
404 .quad 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000
405 /* SmallThreshold */
406 .align 64
407 .quad 0x3ef0000000000000, 0x3ef0000000000000, 0x3ef0000000000000, 0x3ef0000000000000, 0x3ef0000000000000, 0x3ef0000000000000, 0x3ef0000000000000, 0x3ef0000000000000
408 /* Threshold */
409 .align 64
410 .quad 0x5fe0000000000000, 0x5fe0000000000000, 0x5fe0000000000000, 0x5fe0000000000000, 0x5fe0000000000000, 0x5fe0000000000000, 0x5fe0000000000000, 0x5fe0000000000000
411 /* LargeThreshold */
412 .align 64
413 .quad 0x7fefffffffffffff, 0x7fefffffffffffff, 0x7fefffffffffffff, 0x7fefffffffffffff, 0x7fefffffffffffff, 0x7fefffffffffffff, 0x7fefffffffffffff, 0x7fefffffffffffff
414 /* ca2 */
415 .align 64
416 .quad 0x3fb333220eaf02e7, 0x3fb333220eaf02e7, 0x3fb333220eaf02e7, 0x3fb333220eaf02e7, 0x3fb333220eaf02e7, 0x3fb333220eaf02e7, 0x3fb333220eaf02e7, 0x3fb333220eaf02e7
417 /* ca1 */
418 .align 64
419 .quad 0xbfc5555555521e7e, 0xbfc5555555521e7e, 0xbfc5555555521e7e, 0xbfc5555555521e7e, 0xbfc5555555521e7e, 0xbfc5555555521e7e, 0xbfc5555555521e7e, 0xbfc5555555521e7e
420 /* c4s */
421 .align 64
422 .quad 0x3fd1800001943612, 0x3fd1800001943612, 0x3fd1800001943612, 0x3fd1800001943612, 0x3fd1800001943612, 0x3fd1800001943612, 0x3fd1800001943612, 0x3fd1800001943612
423 /* c3s */
424 .align 64
425 .quad 0x3fd40000013b0000, 0x3fd40000013b0000, 0x3fd40000013b0000, 0x3fd40000013b0000, 0x3fd40000013b0000, 0x3fd40000013b0000, 0x3fd40000013b0000, 0x3fd40000013b0000
426 /* c2s */
427 .align 64
428 .quad 0x3fd8000000000000, 0x3fd8000000000000, 0x3fd8000000000000, 0x3fd8000000000000, 0x3fd8000000000000, 0x3fd8000000000000, 0x3fd8000000000000, 0x3fd8000000000000
429 /* c1s */
430 .align 64
431 .quad 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000, 0x3fe0000000000000
432 /* AddB5 */
433 .align 64
434 .quad 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000, 0x0000800000000000
435 /* RcpBitMask */
436 .align 64
437 .quad 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000, 0xffff000000000000
438 /* OneEighth */
439 .align 64
440 .quad 0x3fc0000000000000, 0x3fc0000000000000, 0x3fc0000000000000, 0x3fc0000000000000, 0x3fc0000000000000, 0x3fc0000000000000, 0x3fc0000000000000, 0x3fc0000000000000
441 /* Four */
442 .align 64
443 .quad 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, 0x4010000000000000, 0x4010000000000000
444 /* poly_coeff9 */
445 .align 64
446 .quad 0xbfb9a9b040214368, 0xbfb9a9b040214368, 0xbfb9a9b040214368, 0xbfb9a9b040214368, 0xbfb9a9b040214368, 0xbfb9a9b040214368, 0xbfb9a9b040214368, 0xbfb9a9b040214368
447 /* poly_coeff8 */
448 .align 64
449 .quad 0x3fbc80666e249778, 0x3fbc80666e249778, 0x3fbc80666e249778, 0x3fbc80666e249778, 0x3fbc80666e249778, 0x3fbc80666e249778, 0x3fbc80666e249778, 0x3fbc80666e249778
450 /* poly_coeff7 */
451 .align 64
452 .quad 0xbfbffffb8a054bc9, 0xbfbffffb8a054bc9, 0xbfbffffb8a054bc9, 0xbfbffffb8a054bc9, 0xbfbffffb8a054bc9, 0xbfbffffb8a054bc9, 0xbfbffffb8a054bc9, 0xbfbffffb8a054bc9
453 /* poly_coeff6 */
454 .align 64
455 .quad 0x3fc24922f71256f1, 0x3fc24922f71256f1, 0x3fc24922f71256f1, 0x3fc24922f71256f1, 0x3fc24922f71256f1, 0x3fc24922f71256f1, 0x3fc24922f71256f1, 0x3fc24922f71256f1
456 /* poly_coeff5 */
457 .align 64
458 .quad 0xbfc55555559ba736, 0xbfc55555559ba736, 0xbfc55555559ba736, 0xbfc55555559ba736, 0xbfc55555559ba736, 0xbfc55555559ba736, 0xbfc55555559ba736, 0xbfc55555559ba736
459 /* poly_coeff4 */
460 .align 64
461 .quad 0x3fc9999999be77af, 0x3fc9999999be77af, 0x3fc9999999be77af, 0x3fc9999999be77af, 0x3fc9999999be77af, 0x3fc9999999be77af, 0x3fc9999999be77af, 0x3fc9999999be77af
462 /* poly_coeff3 */
463 .align 64
464 .quad 0xbfcffffffffffc65, 0xbfcffffffffffc65, 0xbfcffffffffffc65, 0xbfcffffffffffc65, 0xbfcffffffffffc65, 0xbfcffffffffffc65, 0xbfcffffffffffc65, 0xbfcffffffffffc65
465 /* poly_coeff2 */
466 .align 64
467 .quad 0x3fd55555555554c1, 0x3fd55555555554c1, 0x3fd55555555554c1, 0x3fd55555555554c1, 0x3fd55555555554c1, 0x3fd55555555554c1, 0x3fd55555555554c1, 0x3fd55555555554c1
468 /* poly_coeff1 */
469 .align 64
470 .quad 0xbfe0000000000000, 0xbfe0000000000000, 0xbfe0000000000000, 0xbfe0000000000000, 0xbfe0000000000000, 0xbfe0000000000000, 0xbfe0000000000000, 0xbfe0000000000000
471 /* L2H = log(2)_high */
472 .align 64
473 .quad 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000, 0x3fe62E42FEFA0000
474 /* L2L = log(2)_low */
475 .align 64
476 .quad 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000, 0x3d7cf79abc9e0000
477 .align 64
478 .type __svml_dacosh_data_internal_avx512, @object
479 .size __svml_dacosh_data_internal_avx512, .-__svml_dacosh_data_internal_avx512
480

source code of glibc/sysdeps/x86_64/fpu/multiarch/svml_d_acosh8_core_avx512.S