1/* Function tanh vectorized with AVX-512.
2 Copyright (C) 2021-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 https://www.gnu.org/licenses/. */
18
19/*
20 * ALGORITHM DESCRIPTION:
21 *
22 * NOTE: Since the hyperbolic tangent function is odd
23 * (tanh(x) = -tanh(-x)), below algorithm deals with the absolute
24 * value of the argument |x|: tanh(x) = sign(x) * tanh(|x|)
25 *
26 * We use a table lookup method to compute tanh(|x|).
27 * The basic idea is to split the input range into a number of subintervals
28 * and to approximate tanh(.) with a polynomial on each of them.
29 *
30 * IEEE SPECIAL CONDITIONS:
31 * x = [+, -]0, r = [+, -]0
32 * x = +Inf, r = +1
33 * x = -Inf, r = -1
34 * x = QNaN, r = QNaN
35 * x = SNaN, r = QNaN
36 *
37 *
38 * ALGORITHM DETAILS
39 * We handle special values in a callout function, aside from main path
40 * computations. "Special" for this algorithm are:
41 * INF, NAN, |x| > HUGE_THRESHOLD
42 *
43 *
44 * Main path computations are organized as follows:
45 * Actually we split the interval [0, SATURATION_THRESHOLD)
46 * into a number of subintervals. On each subinterval we approximate tanh(.)
47 * with a minimax polynomial of pre-defined degree. Polynomial coefficients
48 * are computed beforehand and stored in table. We also use
49 *
50 * y := |x| + B,
51 *
52 * here B depends on subinterval and is used to make argument
53 * closer to zero.
54 * We also add large fake interval [SATURATION_THRESHOLD, HUGE_THRESHOLD],
55 * where 1.0 + 0.0*y + 0.0*y^2 ... coefficients are stored - just to
56 * preserve main path computation logic but return 1.0 for all arguments.
57 *
58 * Hence reconstruction looks as follows:
59 * we extract proper polynomial and range reduction coefficients
60 * (Pj and B), corresponding to subinterval, to which |x| belongs,
61 * and return
62 *
63 * r := sign(x) * (P0 + P1 * y + ... + Pn * y^n)
64 *
65 * NOTE: we use multiprecision technique to multiply and sum the first
66 * K terms of the polynomial. So Pj, j = 0..K are stored in
67 * table each as a pair of target precision numbers (Pj and PLj) to
68 * achieve wider than target precision.
69 *
70 *
71 */
72
73/* Offsets for data table __svml_dtanh_data_internal
74 */
75#define _dC 0
76#define _dP0 128
77#define _dP1 256
78#define _dP2 384
79#define _dP3 512
80#define _dP4 640
81#define _dP5 768
82#define _dP6 896
83#define _dP7 1024
84#define _dP8 1152
85#define _dP9 1280
86#define _dP10 1408
87#define _dP11 1536
88#define _dP12 1664
89#define _dP13 1792
90#define _dP14 1920
91#define _dP15 2048
92#define _dP16 2176
93#define _dP17 2304
94#define _iExpMantMask_UISA 2432
95#define _iMinIdxOfsMask_UISA 2496
96#define _iMaxIdxMask_UISA 2560
97#define _dbSignMask 2624
98#define _dbAbsMask 2688
99#define _iExpMantMask 2752
100#define _iExpMask 2816
101#define _iMinIdxOfsMask 2880
102#define _iMaxIdxMask 2944
103
104#include <sysdep.h>
105
106 .section .text.evex512, "ax", @progbits
107ENTRY(_ZGVeN8v_tanh_skx)
108 pushq %rbp
109 cfi_def_cfa_offset(16)
110 movq %rsp, %rbp
111 cfi_def_cfa(6, 16)
112 cfi_offset(6, -16)
113 andq $-64, %rsp
114 subq $320, %rsp
115 vpsrlq $32, %zmm0, %zmm4
116 vmovups %zmm0, (%rsp)
117 vmovups __svml_dtanh_data_internal(%rip), %zmm14
118 vmovups _dP0+__svml_dtanh_data_internal(%rip), %zmm15
119 vpmovqd %zmm4, %ymm5
120
121 /* Constant loading */
122 vandpd _dbAbsMask+__svml_dtanh_data_internal(%rip), %zmm0, %zmm13
123 vandpd _dbSignMask+__svml_dtanh_data_internal(%rip), %zmm0, %zmm3
124
125 /* Here huge arguments, INF and NaNs are filtered out to callout. */
126 vpand _iExpMantMask_UISA+__svml_dtanh_data_internal(%rip), %ymm5, %ymm7
127 vmovups _dP2+__svml_dtanh_data_internal(%rip), %zmm0
128 vmovups _dP16+__svml_dtanh_data_internal(%rip), %zmm4
129 vmovups _dP15+__svml_dtanh_data_internal(%rip), %zmm5
130 vmovups %zmm3, 64(%rsp)
131 vmovups _dP3+__svml_dtanh_data_internal(%rip), %zmm3
132 vpsubd _iMinIdxOfsMask_UISA+__svml_dtanh_data_internal(%rip), %ymm7, %ymm8
133
134 /* if VMIN, VMAX is defined for I type */
135 vxorps %ymm9, %ymm9, %ymm9
136 vpmaxsd %ymm9, %ymm8, %ymm10
137 vpminsd _iMaxIdxMask_UISA+__svml_dtanh_data_internal(%rip), %ymm10, %ymm11
138 vpsrld $19, %ymm11, %ymm12
139 vmovups _dP12+__svml_dtanh_data_internal(%rip), %zmm8
140 vmovups _dP11+__svml_dtanh_data_internal(%rip), %zmm9
141 vmovups _dP10+__svml_dtanh_data_internal(%rip), %zmm10
142 vmovups _dP9+__svml_dtanh_data_internal(%rip), %zmm11
143 vpmovzxdq %ymm12, %zmm2
144 vmovups _dP8+__svml_dtanh_data_internal(%rip), %zmm12
145 vpermt2pd _dP2+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm0
146 vpermt2pd _dC+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm14
147 vpermt2pd _dP16+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm4
148 vpermt2pd _dP15+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm5
149 vsubpd {rn-sae}, %zmm14, %zmm13, %zmm1
150 vpermt2pd _dP12+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm8
151 vpermt2pd _dP11+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm9
152 vpermt2pd _dP10+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm10
153 vpermt2pd _dP9+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm11
154 vpermt2pd _dP8+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm12
155 vpermt2pd _dP3+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm3
156 vpermt2pd _dP0+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm15
157 vmovups %zmm0, 192(%rsp)
158 vmovups _dP17+__svml_dtanh_data_internal(%rip), %zmm0
159 vmovups _dP7+__svml_dtanh_data_internal(%rip), %zmm13
160 vmovups _dP6+__svml_dtanh_data_internal(%rip), %zmm14
161 vmovups %zmm3, 256(%rsp)
162 vmovups _dP5+__svml_dtanh_data_internal(%rip), %zmm3
163 vmovups %zmm15, 128(%rsp)
164 vmovups _dP4+__svml_dtanh_data_internal(%rip), %zmm15
165 vpermt2pd _dP17+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm0
166 vpermt2pd _dP7+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm13
167 vpermt2pd _dP6+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm14
168 vpermt2pd _dP5+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm3
169 vpermt2pd _dP4+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm15
170 vfmadd213pd {rn-sae}, %zmm4, %zmm1, %zmm0
171 vpcmpgtd _iExpMask+__svml_dtanh_data_internal(%rip), %ymm7, %ymm6
172 vmovmskps %ymm6, %edx
173 vmovups _dP14+__svml_dtanh_data_internal(%rip), %zmm6
174 vfmadd213pd {rn-sae}, %zmm5, %zmm1, %zmm0
175 vmovups _dP13+__svml_dtanh_data_internal(%rip), %zmm7
176 vpermt2pd _dP14+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm6
177 vpermt2pd _dP13+64+__svml_dtanh_data_internal(%rip), %zmm2, %zmm7
178 vfmadd213pd {rn-sae}, %zmm6, %zmm1, %zmm0
179 vmovups 256(%rsp), %zmm2
180 vfmadd213pd {rn-sae}, %zmm7, %zmm1, %zmm0
181 vfmadd213pd {rn-sae}, %zmm8, %zmm1, %zmm0
182 vfmadd213pd {rn-sae}, %zmm9, %zmm1, %zmm0
183 vfmadd213pd {rn-sae}, %zmm10, %zmm1, %zmm0
184 vfmadd213pd {rn-sae}, %zmm11, %zmm1, %zmm0
185 vfmadd213pd {rn-sae}, %zmm12, %zmm1, %zmm0
186 vfmadd213pd {rn-sae}, %zmm13, %zmm1, %zmm0
187 vfmadd213pd {rn-sae}, %zmm14, %zmm1, %zmm0
188 vfmadd213pd {rn-sae}, %zmm3, %zmm1, %zmm0
189 vmovups 128(%rsp), %zmm3
190 vfmadd213pd {rn-sae}, %zmm15, %zmm1, %zmm0
191 vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
192 vmovups 192(%rsp), %zmm2
193 vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
194 vfmadd213pd {rn-sae}, %zmm3, %zmm1, %zmm0
195 vorpd 64(%rsp), %zmm0, %zmm0
196 testl %edx, %edx
197
198 /* Go to special inputs processing branch */
199 jne L(SPECIAL_VALUES_BRANCH)
200 # LOE rbx r12 r13 r14 r15 edx zmm0
201
202 /* Restore registers
203 * and exit the function
204 */
205
206L(EXIT):
207 movq %rbp, %rsp
208 popq %rbp
209 cfi_def_cfa(7, 8)
210 cfi_restore(6)
211 ret
212 cfi_def_cfa(6, 16)
213 cfi_offset(6, -16)
214
215 /* Branch to process
216 * special inputs
217 */
218
219L(SPECIAL_VALUES_BRANCH):
220 vmovups (%rsp), %zmm1
221 vmovups %zmm0, 128(%rsp)
222 vmovups %zmm1, 64(%rsp)
223 # LOE rbx r12 r13 r14 r15 edx zmm0
224
225 xorl %eax, %eax
226 # LOE rbx r12 r13 r14 r15 eax edx
227
228 vzeroupper
229 movq %r12, 16(%rsp)
230 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -304; DW_OP_plus) */
231 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xfe, 0xff, 0xff, 0x22
232 movl %eax, %r12d
233 movq %r13, 8(%rsp)
234 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -312; DW_OP_plus) */
235 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xfe, 0xff, 0xff, 0x22
236 movl %edx, %r13d
237 movq %r14, (%rsp)
238 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -320; DW_OP_plus) */
239 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xfe, 0xff, 0xff, 0x22
240 # LOE rbx r15 r12d r13d
241
242 /* Range mask
243 * bits check
244 */
245
246L(RANGEMASK_CHECK):
247 btl %r12d, %r13d
248
249 /* Call scalar math function */
250 jc L(SCALAR_MATH_CALL)
251 # LOE rbx r15 r12d r13d
252
253 /* Special inputs
254 * processing loop
255 */
256
257L(SPECIAL_VALUES_LOOP):
258 incl %r12d
259 cmpl $8, %r12d
260
261 /* Check bits in range mask */
262 jl L(RANGEMASK_CHECK)
263 # LOE rbx r15 r12d r13d
264
265 movq 16(%rsp), %r12
266 cfi_restore(12)
267 movq 8(%rsp), %r13
268 cfi_restore(13)
269 movq (%rsp), %r14
270 cfi_restore(14)
271 vmovups 128(%rsp), %zmm0
272
273 /* Go to exit */
274 jmp L(EXIT)
275 /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -304; DW_OP_plus) */
276 .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xd0, 0xfe, 0xff, 0xff, 0x22
277 /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -312; DW_OP_plus) */
278 .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc8, 0xfe, 0xff, 0xff, 0x22
279 /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -320; DW_OP_plus) */
280 .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xc0, 0xfe, 0xff, 0xff, 0x22
281 # LOE rbx r12 r13 r14 r15 zmm0
282
283 /* Scalar math function call
284 * to process special input
285 */
286
287L(SCALAR_MATH_CALL):
288 movl %r12d, %r14d
289 vmovsd 64(%rsp, %r14, 8), %xmm0
290 call tanh@PLT
291 # LOE rbx r14 r15 r12d r13d xmm0
292
293 vmovsd %xmm0, 128(%rsp, %r14, 8)
294
295 /* Process special inputs in loop */
296 jmp L(SPECIAL_VALUES_LOOP)
297 # LOE rbx r15 r12d r13d
298END(_ZGVeN8v_tanh_skx)
299
300 .section .rodata, "a"
301 .align 64
302
303#ifdef __svml_dtanh_data_internal_typedef
304typedef unsigned int VUINT32;
305typedef struct {
306 __declspec(align(64)) VUINT32 _dC[16][2];
307 __declspec(align(64)) VUINT32 _dP0[16][2];
308 __declspec(align(64)) VUINT32 _dP1[16][2];
309 __declspec(align(64)) VUINT32 _dP2[16][2];
310 __declspec(align(64)) VUINT32 _dP3[16][2];
311 __declspec(align(64)) VUINT32 _dP4[16][2];
312 __declspec(align(64)) VUINT32 _dP5[16][2];
313 __declspec(align(64)) VUINT32 _dP6[16][2];
314 __declspec(align(64)) VUINT32 _dP7[16][2];
315 __declspec(align(64)) VUINT32 _dP8[16][2];
316 __declspec(align(64)) VUINT32 _dP9[16][2];
317 __declspec(align(64)) VUINT32 _dP10[16][2];
318 __declspec(align(64)) VUINT32 _dP11[16][2];
319 __declspec(align(64)) VUINT32 _dP12[16][2];
320 __declspec(align(64)) VUINT32 _dP13[16][2];
321 __declspec(align(64)) VUINT32 _dP14[16][2];
322 __declspec(align(64)) VUINT32 _dP15[16][2];
323 __declspec(align(64)) VUINT32 _dP16[16][2];
324 __declspec(align(64)) VUINT32 _dP17[16][2];
325 __declspec(align(64)) VUINT32 _iExpMantMask_UISA[16][1];
326 __declspec(align(64)) VUINT32 _iMinIdxOfsMask_UISA[16][1];
327 __declspec(align(64)) VUINT32 _iMaxIdxMask_UISA[16][1];
328 __declspec(align(64)) VUINT32 _dbSignMask[8][2];
329 __declspec(align(64)) VUINT32 _dbAbsMask[8][2];
330 __declspec(align(64)) VUINT32 _iExpMantMask[16][1];
331 __declspec(align(64)) VUINT32 _iExpMask[16][1];
332 __declspec(align(64)) VUINT32 _iMinIdxOfsMask[16][1];
333 __declspec(align(64)) VUINT32 _iMaxIdxMask[16][1];
334} __svml_dtanh_data_internal;
335#endif
336__svml_dtanh_data_internal:
337 /* _dC */
338 .quad 0x0000000000000000, 0x3fcc000000000000, 0x3fd4000000000000, 0x3fdc000000000000
339 .quad 0x3fe4000000000000, 0x3fec000000000000, 0x3ff4000000000000, 0x3ffc000000000000
340 .quad 0x4004000000000000, 0x400c000000000000, 0x4014000000000000, 0x401c000000000000
341 .quad 0x4024000000000000, 0x402c000000000000, 0x4034000000000000, 0x0000000000000000
342 /* p0 */
343 .align 64
344 .quad 0x0000000000000000, 0x3fcb8fd0416a7c92, 0x3fd35f98a0ea650e, 0x3fda5729ee488037
345 .quad 0x3fe1bf47eabb8f95, 0x3fe686650b8c2015, 0x3feb2523bb6b2dee, 0x3fee1fbf97e33527
346 .quad 0x3fef9258260a71c2, 0x3feff112c63a9077, 0x3fefff419668df11, 0x3feffffc832750f2
347 .quad 0x3feffffffdc96f35, 0x3fefffffffffcf58, 0x3ff0000000000000, 0x3ff0000000000000
348 /* p1 */
349 .align 64
350 .quad 0x0000000000000000, 0x3c65e23ebcd3bcbe, 0xbc4c600bac3adf00, 0x3c6c44091785d040
351 .quad 0x3c8221d7a6e3674b, 0x3c69f89d2cf6b85c, 0x3c73b3e9ec0b8f1c, 0xbc7f8d4b0428aada
352 .quad 0xbc7c52d880cf43c0, 0x3c7dd36e37096480, 0x3c7b4f6380c442ca, 0xbc729755de470096
353 .quad 0x3c84cf852845efbd, 0x3c6fc4fb440a5378, 0xbc63981083b55870, 0x0000000000000000
354 /* p2 */
355 .align 64
356 .quad 0x3ff0000000000000, 0x3fee842ca3f08532, 0x3fed11574af58f1b, 0x3fea945b9c24e4f9
357 .quad 0x3fe6284c3374f815, 0x3fe02500a09f8d6e, 0x3fd1f25131e3a8c0, 0x3fbd22ca1c24a139
358 .quad 0x3f9b3afe1fba5c76, 0x3f6dd37d19b22b21, 0x3f27ccec13a9ef96, 0x3ecbe6c3f33250ae
359 .quad 0x3e41b4865394f75f, 0x3d8853f01bda5f28, 0x3c73953c0197ef58, 0x0000000000000000
360 /* p3 */
361 .align 64
362 .quad 0xbbf0b3ea3fdfaa19, 0xbfca48aaeb53bc21, 0xbfd19921f4329916, 0xbfd5e0f09bef8011
363 .quad 0xbfd893b59c35c882, 0xbfd6ba7cb7576538, 0xbfce7291743d7555, 0xbfbb6d85a01efb80
364 .quad 0xbf9addae58c7141a, 0xbf6dc59376c7aa19, 0xbf27cc5e74677410, 0xbecbe6c0e8b4cc87
365 .quad 0xbe41b486526b0565, 0xbd8853f01bef63a4, 0xbc73955be519be31, 0x0000000000000000
366 /* p4 */
367 .align 64
368 .quad 0xbfd5555555555555, 0xbfd183afc292ba11, 0xbfcc1a4b039c9bfa, 0xbfc16e1e6d8d0be6
369 .quad 0xbf92426c751e48a2, 0x3fb4f152b2bad124, 0x3fbbba40cbef72be, 0x3fb01ba038be6a3d
370 .quad 0x3f916df44871efc8, 0x3f63c6869dfc8870, 0x3f1fb9aef915d828, 0x3ec299d1e27c6e11
371 .quad 0x3e379b5ddcca334c, 0x3d8037f57bc62c9a, 0x3c6a2d4b50a2cff7, 0x0000000000000000
372 /* p5 */
373 .align 64
374 .quad 0xbce6863ee44ed636, 0x3fc04dcd0476c75e, 0x3fc43d3449a80f08, 0x3fc5c26f3699b7e7
375 .quad 0x3fc1a686f6ab2533, 0x3faf203c316ce730, 0xbf89c7a02788557c, 0xbf98157e26e0d541
376 .quad 0xbf807b55c1c7d278, 0xbf53a18d5843190f, 0xbf0fb6bbc89b1a5b, 0xbeb299c9c684a963
377 .quad 0xbe279b5dd4fb3d01, 0xbd7037f57ae72aa6, 0xbc5a2ca2bba78e86, 0x0000000000000000
378 /* p6 */
379 .align 64
380 .quad 0x3fc1111111112ab5, 0x3fb5c19efdfc08ad, 0x3fa74c98dc34fbac, 0xbf790d6a8eff0a77
381 .quad 0xbfac3c021789a786, 0xbfae2196b7326859, 0xbf93a7a011ff8c2a, 0x3f6e4709c7e8430e
382 .quad 0x3f67682afa611151, 0x3f3ef2ee77717cbf, 0x3ef95a4482f180b7, 0x3e9dc2c27da3b603
383 .quad 0x3e12e2afd9f7433e, 0x3d59f320348679ba, 0x3c44b61d9bbcc940, 0x0000000000000000
384 /* p7 */
385 .align 64
386 .quad 0xbda1ea19ddddb3b4, 0xbfb0b8df995ce4df, 0xbfb2955cf41e8164, 0xbfaf9d05c309f7c6
387 .quad 0xbf987d27ccff4291, 0x3f8b2ca62572b098, 0x3f8f1cf6c7f5b00a, 0x3f60379811e43dd5
388 .quad 0xbf4793826f78537e, 0xbf2405695e36240f, 0xbee0e08de39ce756, 0xbe83d709ba5f714e
389 .quad 0xbdf92e3fc5ee63e0, 0xbd414cc030f2110e, 0xbc2ba022e8d82a87, 0x0000000000000000
390 /* p8 */
391 .align 64
392 .quad 0xbfaba1ba1990520b, 0xbf96e37bba52f6fc, 0x3ecff7df18455399, 0x3f97362834d33a4e
393 .quad 0x3f9e7f8380184b45, 0x3f869543e7c420d4, 0xbf7326bd4914222a, 0xbf5fc15b0a9d98fa
394 .quad 0x3f14cffcfa69fbb6, 0x3f057e48e5b79d10, 0x3ec33b66d7d77264, 0x3e66ac4e578b9b10
395 .quad 0x3ddcc74b8d3d5c42, 0x3d23c589137f92b4, 0x3c107f8e2c8707a1, 0x0000000000000000
396 /* p9 */
397 .align 64
398 .quad 0xbe351ca7f096011f, 0x3f9eaaf3320c3851, 0x3f9cf823fe761fc1, 0x3f9022271754ff1f
399 .quad 0xbf731fe77c9c60af, 0xbf84a6046865ec7d, 0xbf4ca3f1f2b9192b, 0x3f4c77dee0afd227
400 .quad 0x3f04055bce68597a, 0xbee2bf0cb4a71647, 0xbea31eaafe73efd5, 0xbe46abb02c4368ed
401 .quad 0xbdbcc749ca8079dd, 0xbd03c5883836b9d2, 0xbbf07a5416264aec, 0x0000000000000000
402 /* p10 */
403 .align 64
404 .quad 0x3f9664f94e6ac14e, 0xbf94d3343bae39dd, 0xbf7bc748e60df843, 0xbf8c89372b43ba85
405 .quad 0xbf8129a092de747a, 0x3f60c85b4d538746, 0x3f5be9392199ec18, 0xbf2a0c68a4489f10
406 .quad 0xbf00462601dc2faa, 0x3eb7b6a219dea9f4, 0x3e80cbcc8d4c5c8a, 0x3e2425bb231a5e29
407 .quad 0x3d9992a4beac8662, 0x3ce191ba5ed3fb67, 0x3bc892450bad44c4, 0x0000000000000000
408 /* p11 */
409 .align 64
410 .quad 0xbea8c4c1fd7852fe, 0xbfccce16b1046f13, 0xbf81a16f224bb7b6, 0xbf62cbf00406bc09
411 .quad 0x3f75b29bb02cf69b, 0x3f607df0f9f90c17, 0xbf4b852a6e0758d5, 0xbf0078c63d1b8445
412 .quad 0x3eec12eadd55be7a, 0xbe6fa600f593181b, 0xbe5a3c935dce3f7d, 0xbe001c6d95e3ae96
413 .quad 0xbd74755a00ea1fd3, 0xbcbc1c6c063bb7ac, 0xbba3be9a4460fe00, 0x0000000000000000
414 /* p12 */
415 .align 64
416 .quad 0xbf822404577aa9dd, 0x403d8b07f7a82aa3, 0xbf9f44ab92fbab0a, 0x3fb2eac604473d6a
417 .quad 0x3f45f87d903aaac8, 0xbf5e104671036300, 0x3f19bc98ddf0f340, 0x3f0d4304bc9246e8
418 .quad 0xbed13c415f7b9d41, 0xbe722b8d9720cdb0, 0x3e322666d739bec0, 0x3dd76a553d7e7918
419 .quad 0x3d4de0fa59416a39, 0x3c948716cf3681b4, 0x3b873f9f2d2fda99, 0x0000000000000000
420 /* p13 */
421 .align 64
422 .quad 0xbefdd99a221ed573, 0x4070593a3735bab4, 0xbfccab654e44835e, 0x3fd13ed80037dbac
423 .quad 0xbf6045b9076cc487, 0x3f2085ee7e8ac170, 0x3f23524622610430, 0xbeff12a6626911b4
424 .quad 0x3eab9008bca408af, 0x3e634df71865f620, 0xbe05bb1bcf83ca73, 0xbdaf2ac143fb6762
425 .quad 0xbd23eae52a3dbf57, 0xbc6b5e3e9ca0955e, 0xbb5eca68e2c1ba2e, 0x0000000000000000
426 /* p14 */
427 .align 64
428 .quad 0x3f6e3be689423841, 0xc0d263511f5baac1, 0x40169f73b15ebe5c, 0xc025c1dd41cd6cb5
429 .quad 0xbf58fd89fe05e0d1, 0x3f73f7af01d5af7a, 0xbf1e40bdead17e6b, 0x3ee224cd6c4513e5
430 .quad 0xbe24b645e68eeaa3, 0xbe4abfebfb72bc83, 0x3dd51c38f8695ed3, 0x3d8313ac38c6832b
431 .quad 0x3cf7787935626685, 0x3c401ffc49c6bc29, 0xbabf0b21acfa52ab, 0x0000000000000000
432 /* p15 */
433 .align 64
434 .quad 0xbf2a1306713a4f3a, 0xc1045e509116b066, 0x4041fab9250984ce, 0xc0458d090ec3de95
435 .quad 0xbf74949d60113d63, 0x3f7c9fd6200d0ade, 0x3f02cd40e0ad0a9f, 0xbe858ab8e019f311
436 .quad 0xbe792fa6323b7cf8, 0x3e2df04d67876402, 0xbd95c72be95e4d2c, 0xbd55a89c30203106
437 .quad 0xbccad6b3bb9eff65, 0xbc12705ccd3dd884, 0xba8e0a4c47ae75f5, 0x0000000000000000
438 /* p16 */
439 .align 64
440 .quad 0xbf55d7e76dc56871, 0x41528c38809c90c7, 0xc076d57fb5190b02, 0x4085f09f888f8ada
441 .quad 0x3fa246332a2fcba5, 0xbfb29d851a896fcd, 0x3ed9065ae369b212, 0xbeb8e1ba4c98a030
442 .quad 0x3e6ffd0766ad4016, 0xbe0c63c29f505f5b, 0xbd7fab216b9e0e49, 0x3d2826b62056aa27
443 .quad 0x3ca313e31762f523, 0x3bea37aa21895319, 0x3ae5c7f1fd871496, 0x0000000000000000
444 /* p17 */
445 .align 64
446 .quad 0x3f35e67ab76a26e7, 0x41848ee0627d8206, 0xc0a216d618b489ec, 0x40a5b89107c8af4f
447 .quad 0x3fb69d8374520eda, 0xbfbded519f981716, 0xbef02d288b5b3371, 0x3eb290981209c1a6
448 .quad 0xbe567e924bf5ff6e, 0x3de3f7f7de6b0eb6, 0x3d69ed18bae3ebbc, 0xbcf7534c4f3dfa71
449 .quad 0xbc730b73f1eaff20, 0xbbba2cff8135d462, 0xbab5a71b5f7d9035, 0x0000000000000000
450 .align 64
451 .long 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000, 0x7ff80000 /* _iExpMantMask_UISA */
452 .align 64
453 .long 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000, 0x3fc00000 /* _iMinIdxOfsMask_UISA */
454 .align 64
455 .long 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000, 0x00780000 /* _iMaxIdxMask_UISA */
456 .align 64
457 .quad 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000 /* _dbSignMask */
458 .align 64
459 .quad 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff, 0x7fffffffffffffff /* _dbAbsMask */
460 .align 64
461 .long 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000, 0x7ffe0000 /* _iExpMantMask */
462 .align 64
463 .long 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000, 0x7fe00000 /* _iExpMask */
464 .align 64
465 .long 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000, 0x3fbe0000 /* _iMinIdxOfsMask */
466 .align 64
467 .long 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000, 0x00760000 /* _iMaxIdxMask */
468 .align 64
469 .type __svml_dtanh_data_internal, @object
470 .size __svml_dtanh_data_internal, .-__svml_dtanh_data_internal
471

source code of glibc/sysdeps/x86_64/fpu/multiarch/svml_d_tanh8_core_avx512.S