svml_s_asinhf8_core_avx2.S source code [glibc/sysdeps/x86_64/fpu/multiarch/svml_s_asinhf8_core_avx2.S]

1	/ Function asinhf vectorized with AVX2.*
2	Copyright (C) 2021-2024 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	https://www.gnu.org/licenses/. /*
18
19	/*
20	* ALGORITHM DESCRIPTION:
21	*
22	* Compute asinh(x) as log(x + sqrt(x*x + 1))
23	*
24	* Special cases:
25	*
26	* asinh(NaN) = quiet NaN, and raise invalid exception
27	* asinh(INF) = that INF
28	* asinh(0) = that 0
29	*
30	*/
31
32	/ Offsets for data table __svml_sasinh_data_internal*
33	*/
34	#define SgnMask 0
35	#define sOne 32
36	#define sPoly 64
37	#define iBrkValue 320
38	#define iOffExpoMask 352
39	#define sBigThreshold 384
40	#define sC2 416
41	#define sC3 448
42	#define sHalf 480
43	#define sLargestFinite 512
44	#define sLittleThreshold 544
45	#define sSign 576
46	#define sThirtyOne 608
47	#define sTopMask8 640
48	#define XScale 672
49	#define sLn2 704
50
51	#include <sysdep.h>
52
53	.section .text.avx2, "ax", @progbits
54	ENTRY(_ZGVdN8v_asinhf_avx2)
55	pushq %rbp
56	cfi_def_cfa_offset(`16`)
57	movq %rsp, %rbp
58	cfi_def_cfa(`6`, `16`)
59	cfi_offset(`6`, -`16`)
60	andq $-`32`, %rsp
61	subq $`96`, %rsp
62	vmovaps %ymm0, %ymm9
63
64	/ Load the constant 1 and a sign mask /
65	vmovups sOne+__svml_sasinh_data_internal(%rip), %ymm8
66
67	/ No need to split X when FMA is available in hardware. /
68	vmulps %ymm9, %ymm9, %ymm5
69	vmovups sTopMask8+__svml_sasinh_data_internal(%rip), %ymm1
70
71	/*
72	* Finally, express Y + W = X^2 + 1 accurately where Y has <= 8 bits.
73	* If \|X\| <= 1 then \|XHi\| <= 1 and so \|X2Hi\| <= 1, so we can treat 1
74	* as the dominant component in the compensated summation. Otherwise,
75	* if \|X\| >= 1, then since X2Hi only has 22 significant bits, the basic
76	* addition will be exact anyway until we get to \|X\| >= 2^24. But by
77	* that time the log function is well-conditioned enough that the
78	* rounding error doesn't matter. Hence we can treat 1 as dominant even
79	* if it literally isn't.
80	*/
81	vaddps %ymm5, %ymm8, %ymm13
82	vandps %ymm1, %ymm13, %ymm2
83	vmovaps %ymm9, %ymm4
84	vsubps %ymm13, %ymm8, %ymm11
85	vsubps %ymm2, %ymm13, %ymm15
86
87	/*
88	* Compute R = 1/sqrt(Y + W) * (1 + d)
89	* Force R to <= 8 significant bits.
90	* This means that R * Y and R^2 * Y are exactly representable.
91	*/
92	vrsqrtps %ymm2, %ymm0
93	vfmsub213ps %ymm5, %ymm9, %ymm4
94	vaddps %ymm11, %ymm5, %ymm12
95
96	/*
97	* Get the absolute value of the input, since we will exploit antisymmetry
98	* and mostly assume X >= 0 in the core computation
99	*/
100	vandps SgnMask+__svml_sasinh_data_internal(%rip), %ymm9, %ymm6
101
102	/*
103	* Check whether the input is finite, by checking \|X\| <= MaxFloat
104	* Otherwise set the rangemask so that the callout will get used.
105	* Note that this will also use the callout for NaNs since not(NaN <= MaxFloat)
106	*/
107	vcmpnle_uqps sLargestFinite+__svml_sasinh_data_internal(%rip), %ymm6, %ymm10
108	vaddps %ymm12, %ymm4, %ymm14
109
110	/*
111	* Unfortunately, we can still be in trouble if \|X\| <= 2^-5, since
112	* the absolute error 2^-(7+24)-ish in sqrt(1 + X^2) gets scaled up
113	* by 1/X and comes close to our threshold. Hence if \|X\| <= 2^-4,
114	* perform an alternative computation
115	* sqrt(1 + X^2) - 1 = X^2/2 - X^4/8 + X^6/16
116	* X2 = X^2
117	*/
118	vaddps %ymm4, %ymm5, %ymm4
119
120	/*
121	* The following computation can go wrong for very large X, basically
122	* because X^2 overflows. But for large X we have
123	* asinh(X) / log(2 X) - 1 =~= 1/(4 * X^2), so for X >= 2^30
124	* we can just later stick X back into the log and tweak up the exponent.
125	* Actually we scale X by 2^-30 and tweak the exponent up by 31,
126	* to stay in the safe range for the later log computation.
127	* Compute a flag now telling us when do do this.
128	*/
129	vcmplt_oqps sBigThreshold+__svml_sasinh_data_internal(%rip), %ymm6, %ymm7
130	vaddps %ymm15, %ymm14, %ymm3
131
132	/*
133	* Now 1 / (1 + d)
134	* = 1 / (1 + (sqrt(1 - e) - 1))
135	* = 1 / sqrt(1 - e)
136	* = 1 + 1/2 * e + 3/8 * e^2 + 5/16 * e^3 + 35/128 * e^4 + ...
137	* So compute the first three nonconstant terms of that, so that
138	* we have a relative correction (1 + Corr) to apply to S etc.
139	* C1 = 1/2
140	* C2 = 3/8
141	* C3 = 5/16
142	*/
143	vmovups sC3+__svml_sasinh_data_internal(%rip), %ymm12
144	vmovmskps %ymm10, %edx
145	vandps %ymm1, %ymm0, %ymm10
146
147	/*
148	* Compute S = (Y/sqrt(Y + W)) * (1 + d)
149	* and T = (W/sqrt(Y + W)) * (1 + d)
150	* so that S + T = sqrt(Y + W) * (1 + d)
151	* S is exact, and the rounding error in T is OK.
152	*/
153	vmulps %ymm10, %ymm2, %ymm15
154	vmulps %ymm3, %ymm10, %ymm14
155	vmovups sHalf+__svml_sasinh_data_internal(%rip), %ymm3
156	vsubps %ymm8, %ymm15, %ymm0
157
158	/*
159	* Obtain sqrt(1 + X^2) - 1 in two pieces
160	* sqrt(1 + X^2) - 1
161	* = sqrt(Y + W) - 1
162	* = (S + T) * (1 + Corr) - 1
163	* = [S - 1] + [T + (S + T) * Corr]
164	* We need a compensated summation for the last part. We treat S - 1
165	* as the larger part; it certainly is until about X < 2^-4, and in that
166	* case, the error is affordable since X dominates over sqrt(1 + X^2) - 1
167	* Final sum is dTmp5 (hi) + dTmp7 (lo)
168	*/
169	vaddps %ymm14, %ymm15, %ymm13
170
171	/*
172	* Compute e = -(2 * d + d^2)
173	* The first FMR is exact, and the rounding error in the other is acceptable
174	* since d and e are ~ 2^-8
175	*/
176	vmovaps %ymm8, %ymm11
177	vfnmadd231ps %ymm15, %ymm10, %ymm11
178	vfnmadd231ps %ymm14, %ymm10, %ymm11
179	vfmadd213ps sC2+__svml_sasinh_data_internal(%rip), %ymm11, %ymm12
180	vfmadd213ps %ymm3, %ymm11, %ymm12
181	vmulps %ymm12, %ymm11, %ymm1
182
183	/ Now multiplex the two possible computations /
184	vcmple_oqps sLittleThreshold+__svml_sasinh_data_internal(%rip), %ymm6, %ymm11
185	vfmadd213ps %ymm14, %ymm13, %ymm1
186	vaddps %ymm0, %ymm1, %ymm2
187	vsubps %ymm2, %ymm0, %ymm10
188
189	/ sX2over2 = X^2/2 /
190	vmulps %ymm4, %ymm3, %ymm0
191	vaddps %ymm10, %ymm1, %ymm1
192
193	/ sX4over4 = X^4/4 /
194	vmulps %ymm0, %ymm0, %ymm5
195
196	/ sX46 = -X^4/4 + X^6/8 /
197	vfmsub231ps %ymm0, %ymm5, %ymm5
198
199	/ sX46over2 = -X^4/8 + x^6/16 /
200	vmulps %ymm5, %ymm3, %ymm3
201	vaddps %ymm3, %ymm0, %ymm5
202	vblendvps %ymm11, %ymm5, %ymm2, %ymm2
203	vsubps %ymm5, %ymm0, %ymm4
204
205	/*
206	* Now do another compensated sum to add \|X\| + [sqrt(1 + X^2) - 1].
207	* It's always safe to assume \|X\| is larger.
208	* This is the final 2-part argument to the log1p function
209	*/
210	vaddps %ymm2, %ymm6, %ymm14
211
212	/*
213	* Now resume the main code.
214	* reduction: compute r, n
215	*/
216	vmovups iBrkValue+__svml_sasinh_data_internal(%rip), %ymm5
217	vaddps %ymm4, %ymm3, %ymm10
218
219	/*
220	* Now we feed into the log1p code, using H in place of _VARG1 and
221	* also adding L into Xl.
222	* compute 1+x as high, low parts
223	*/
224	vmaxps %ymm14, %ymm8, %ymm15
225	vminps %ymm14, %ymm8, %ymm0
226	vblendvps %ymm11, %ymm10, %ymm1, %ymm12
227	vsubps %ymm14, %ymm6, %ymm1
228	vaddps %ymm0, %ymm15, %ymm3
229
230	/ Now multiplex to the case X = 2^-30 * input, Xl = sL = 0 in the "big" case. /
231	vmulps XScale+__svml_sasinh_data_internal(%rip), %ymm6, %ymm6
232	vaddps %ymm1, %ymm2, %ymm13
233	vsubps %ymm3, %ymm15, %ymm15
234	vaddps %ymm13, %ymm12, %ymm1
235	vaddps %ymm15, %ymm0, %ymm2
236	vblendvps %ymm7, %ymm3, %ymm6, %ymm0
237	vaddps %ymm2, %ymm1, %ymm4
238	vpsubd %ymm5, %ymm0, %ymm1
239	vpsrad $`23`, %ymm1, %ymm6
240	vpand iOffExpoMask+__svml_sasinh_data_internal(%rip), %ymm1, %ymm2
241	vmovups sPoly+`224`+__svml_sasinh_data_internal(%rip), %ymm1
242	vpslld $`23`, %ymm6, %ymm10
243	vpaddd %ymm5, %ymm2, %ymm13
244	vcvtdq2ps %ymm6, %ymm0
245	vpsubd %ymm10, %ymm8, %ymm12
246
247	/ polynomial evaluation /
248	vsubps %ymm8, %ymm13, %ymm8
249
250	/ Add 31 to the exponent in the "large" case to get log(2 * input) /
251	vaddps sThirtyOne+__svml_sasinh_data_internal(%rip), %ymm0, %ymm3
252	vandps %ymm7, %ymm4, %ymm11
253	vmulps %ymm12, %ymm11, %ymm14
254	vblendvps %ymm7, %ymm0, %ymm3, %ymm0
255	vaddps %ymm8, %ymm14, %ymm2
256	vfmadd213ps sPoly+`192`+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1
257	vfmadd213ps sPoly+`160`+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1
258	vfmadd213ps sPoly+`128`+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1
259	vfmadd213ps sPoly+`96`+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1
260	vfmadd213ps sPoly+`64`+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1
261	vfmadd213ps sPoly+`32`+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1
262	vfmadd213ps sPoly+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1
263	vmulps %ymm1, %ymm2, %ymm4
264	vfmadd213ps %ymm2, %ymm2, %ymm4
265
266	/ final reconstruction /
267	vfmadd132ps sLn2+__svml_sasinh_data_internal(%rip), %ymm4, %ymm0
268
269	/ Finally, reincorporate the original sign. /
270	vandps sSign+__svml_sasinh_data_internal(%rip), %ymm9, %ymm7
271	vxorps %ymm0, %ymm7, %ymm0
272	testl %edx, %edx
273
274	/ Go to special inputs processing branch /
275	jne L(SPECIAL_VALUES_BRANCH)
276	# LOE rbx r12 r13 r14 r15 edx ymm0 ymm9
277
278	/ Restore registers*
279	* and exit the function
280	*/
281
282	L(EXIT):
283	movq %rbp, %rsp
284	popq %rbp
285	cfi_def_cfa(`7`, `8`)
286	cfi_restore(`6`)
287	ret
288	cfi_def_cfa(`6`, `16`)
289	cfi_offset(`6`, -`16`)
290
291	/ Branch to process*
292	* special inputs
293	*/
294
295	L(SPECIAL_VALUES_BRANCH):
296	vmovups %ymm9, `32`(%rsp)
297	vmovups %ymm0, `64`(%rsp)
298	# LOE rbx r12 r13 r14 r15 edx ymm0
299
300	xorl %eax, %eax
301	# LOE rbx r12 r13 r14 r15 eax edx
302
303	vzeroupper
304	movq %r12, `16`(%rsp)
305	/ DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) /
306	.cfi_escape `0x10`, `0x0c`, `0x0e`, `0x38`, `0x1c`, `0x0d`, `0xe0`, `0xff`, `0xff`, `0xff`, `0x1a`, `0x0d`, `0xb0`, `0xff`, `0xff`, `0xff`, `0x22`
307	movl %eax, %r12d
308	movq %r13, `8`(%rsp)
309	/ DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) /
310	.cfi_escape `0x10`, `0x0d`, `0x0e`, `0x38`, `0x1c`, `0x0d`, `0xe0`, `0xff`, `0xff`, `0xff`, `0x1a`, `0x0d`, `0xa8`, `0xff`, `0xff`, `0xff`, `0x22`
311	movl %edx, %r13d
312	movq %r14, (%rsp)
313	/ DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) /
314	.cfi_escape `0x10`, `0x0e`, `0x0e`, `0x38`, `0x1c`, `0x0d`, `0xe0`, `0xff`, `0xff`, `0xff`, `0x1a`, `0x0d`, `0xa0`, `0xff`, `0xff`, `0xff`, `0x22`
315	# LOE rbx r15 r12d r13d
316
317	/ Range mask*
318	* bits check
319	*/
320
321	L(RANGEMASK_CHECK):
322	btl %r12d, %r13d
323
324	/ Call scalar math function /
325	jc L(SCALAR_MATH_CALL)
326	# LOE rbx r15 r12d r13d
327
328	/ Special inputs*
329	* processing loop
330	*/
331
332	L(SPECIAL_VALUES_LOOP):
333	incl %r12d
334	cmpl $`8`, %r12d
335
336	/ Check bits in range mask /
337	jl L(RANGEMASK_CHECK)
338	# LOE rbx r15 r12d r13d
339
340	movq `16`(%rsp), %r12
341	cfi_restore(`12`)
342	movq `8`(%rsp), %r13
343	cfi_restore(`13`)
344	movq (%rsp), %r14
345	cfi_restore(`14`)
346	vmovups `64`(%rsp), %ymm0
347
348	/ Go to exit /
349	jmp L(EXIT)
350	/ DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) /
351	.cfi_escape `0x10`, `0x0c`, `0x0e`, `0x38`, `0x1c`, `0x0d`, `0xe0`, `0xff`, `0xff`, `0xff`, `0x1a`, `0x0d`, `0xb0`, `0xff`, `0xff`, `0xff`, `0x22`
352	/ DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) /
353	.cfi_escape `0x10`, `0x0d`, `0x0e`, `0x38`, `0x1c`, `0x0d`, `0xe0`, `0xff`, `0xff`, `0xff`, `0x1a`, `0x0d`, `0xa8`, `0xff`, `0xff`, `0xff`, `0x22`
354	/ DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) /
355	.cfi_escape `0x10`, `0x0e`, `0x0e`, `0x38`, `0x1c`, `0x0d`, `0xe0`, `0xff`, `0xff`, `0xff`, `0x1a`, `0x0d`, `0xa0`, `0xff`, `0xff`, `0xff`, `0x22`
356	# LOE rbx r12 r13 r14 r15 ymm0
357
358	/ Scalar math function call*
359	* to process special input
360	*/
361
362	L(SCALAR_MATH_CALL):
363	movl %r12d, %r14d
364	vmovss `32`(%rsp, %r14, `4`), %xmm0
365	call asinhf@PLT
366	# LOE rbx r14 r15 r12d r13d xmm0
367
368	vmovss %xmm0, `64`(%rsp, %r14, `4`)
369
370	/ Process special inputs in loop /
371	jmp L(SPECIAL_VALUES_LOOP)
372	# LOE rbx r15 r12d r13d
373	END(_ZGVdN8v_asinhf_avx2)
374
375	.section .rodata, "a"
376	.align `32`
377
378	#ifdef __svml_sasinh_data_internal_typedef
379	typedef unsigned int VUINT32;
380	typedef struct {
381	__declspec(align(`32`)) VUINT32 SgnMask[`8`][`1`];
382	__declspec(align(`32`)) VUINT32 sOne[`8`][`1`];
383	__declspec(align(`32`)) VUINT32 sPoly[`8`][`8`][`1`];
384	__declspec(align(`32`)) VUINT32 iBrkValue[`8`][`1`];
385	__declspec(align(`32`)) VUINT32 iOffExpoMask[`8`][`1`];
386	__declspec(align(`32`)) VUINT32 sBigThreshold[`8`][`1`];
387	__declspec(align(`32`)) VUINT32 sC2[`8`][`1`];
388	__declspec(align(`32`)) VUINT32 sC3[`8`][`1`];
389	__declspec(align(`32`)) VUINT32 sHalf[`8`][`1`];
390	__declspec(align(`32`)) VUINT32 sLargestFinite[`8`][`1`];
391	__declspec(align(`32`)) VUINT32 sLittleThreshold[`8`][`1`];
392	__declspec(align(`32`)) VUINT32 sSign[`8`][`1`];
393	__declspec(align(`32`)) VUINT32 sThirtyOne[`8`][`1`];
394	__declspec(align(`32`)) VUINT32 sTopMask8[`8`][`1`];
395	__declspec(align(`32`)) VUINT32 XScale[`8`][`1`];
396	__declspec(align(`32`)) VUINT32 sLn2[`8`][`1`];
397	} __svml_sasinh_data_internal;
398	#endif
399	__svml_sasinh_data_internal:
400	/ SgnMask /
401	.long `0x7fffffff`, `0x7fffffff`, `0x7fffffff`, `0x7fffffff`, `0x7fffffff`, `0x7fffffff`, `0x7fffffff`, `0x7fffffff`
402	/ sOne = SP 1.0 /
403	.align `32`
404	.long `0x3f800000`, `0x3f800000`, `0x3f800000`, `0x3f800000`, `0x3f800000`, `0x3f800000`, `0x3f800000`, `0x3f800000`
405	/ sPoly[] = SP polynomial /
406	.align `32`
407	.long `0xbf000000`, `0xbf000000`, `0xbf000000`, `0xbf000000`, `0xbf000000`, `0xbf000000`, `0xbf000000`, `0xbf000000` / -5.0000000000000000000000000e-01 P0 /
408	.long `0x3eaaaa94`, `0x3eaaaa94`, `0x3eaaaa94`, `0x3eaaaa94`, `0x3eaaaa94`, `0x3eaaaa94`, `0x3eaaaa94`, `0x3eaaaa94` / 3.3333265781402587890625000e-01 P1 /
409	.long `0xbe80058e`, `0xbe80058e`, `0xbe80058e`, `0xbe80058e`, `0xbe80058e`, `0xbe80058e`, `0xbe80058e`, `0xbe80058e` / -2.5004237890243530273437500e-01 P2 /
410	.long `0x3e4ce190`, `0x3e4ce190`, `0x3e4ce190`, `0x3e4ce190`, `0x3e4ce190`, `0x3e4ce190`, `0x3e4ce190`, `0x3e4ce190` / 2.0007920265197753906250000e-01 P3 /
411	.long `0xbe28ad37`, `0xbe28ad37`, `0xbe28ad37`, `0xbe28ad37`, `0xbe28ad37`, `0xbe28ad37`, `0xbe28ad37`, `0xbe28ad37` / -1.6472326219081878662109375e-01 P4 /
412	.long `0x3e0fcb12`, `0x3e0fcb12`, `0x3e0fcb12`, `0x3e0fcb12`, `0x3e0fcb12`, `0x3e0fcb12`, `0x3e0fcb12`, `0x3e0fcb12` / 1.4042308926582336425781250e-01 P5 /
413	.long `0xbe1ad9e3`, `0xbe1ad9e3`, `0xbe1ad9e3`, `0xbe1ad9e3`, `0xbe1ad9e3`, `0xbe1ad9e3`, `0xbe1ad9e3`, `0xbe1ad9e3` / -1.5122179687023162841796875e-01 P6 /
414	.long `0x3e0d84ed`, `0x3e0d84ed`, `0x3e0d84ed`, `0x3e0d84ed`, `0x3e0d84ed`, `0x3e0d84ed`, `0x3e0d84ed`, `0x3e0d84ed` / 1.3820238411426544189453125e-01 P7 /
415	/ iBrkValue = SP 2/3 /
416	.align `32`
417	.long `0x3f2aaaab`, `0x3f2aaaab`, `0x3f2aaaab`, `0x3f2aaaab`, `0x3f2aaaab`, `0x3f2aaaab`, `0x3f2aaaab`, `0x3f2aaaab`
418	/ iOffExpoMask = SP significand mask /
419	.align `32`
420	.long `0x007fffff`, `0x007fffff`, `0x007fffff`, `0x007fffff`, `0x007fffff`, `0x007fffff`, `0x007fffff`, `0x007fffff`
421	/ sBigThreshold /
422	.align `32`
423	.long `0x4E800000`, `0x4E800000`, `0x4E800000`, `0x4E800000`, `0x4E800000`, `0x4E800000`, `0x4E800000`, `0x4E800000`
424	/ sC2 /
425	.align `32`
426	.long `0x3EC00000`, `0x3EC00000`, `0x3EC00000`, `0x3EC00000`, `0x3EC00000`, `0x3EC00000`, `0x3EC00000`, `0x3EC00000`
427	/ sC3 /
428	.align `32`
429	.long `0x3EA00000`, `0x3EA00000`, `0x3EA00000`, `0x3EA00000`, `0x3EA00000`, `0x3EA00000`, `0x3EA00000`, `0x3EA00000`
430	/ sHalf /
431	.align `32`
432	.long `0x3F000000`, `0x3F000000`, `0x3F000000`, `0x3F000000`, `0x3F000000`, `0x3F000000`, `0x3F000000`, `0x3F000000`
433	/ sLargestFinite /
434	.align `32`
435	.long `0x7F7FFFFF`, `0x7F7FFFFF`, `0x7F7FFFFF`, `0x7F7FFFFF`, `0x7F7FFFFF`, `0x7F7FFFFF`, `0x7F7FFFFF`, `0x7F7FFFFF`
436	/ sLittleThreshold /
437	.align `32`
438	.long `0x3D800000`, `0x3D800000`, `0x3D800000`, `0x3D800000`, `0x3D800000`, `0x3D800000`, `0x3D800000`, `0x3D800000`
439	/ sSign /
440	.align `32`
441	.long `0x80000000`, `0x80000000`, `0x80000000`, `0x80000000`, `0x80000000`, `0x80000000`, `0x80000000`, `0x80000000`
442	/ sThirtyOne /
443	.align `32`
444	.long `0x41F80000`, `0x41F80000`, `0x41F80000`, `0x41F80000`, `0x41F80000`, `0x41F80000`, `0x41F80000`, `0x41F80000`
445	/ sTopMask8 /
446	.align `32`
447	.long `0xFFFF0000`, `0xFFFF0000`, `0xFFFF0000`, `0xFFFF0000`, `0xFFFF0000`, `0xFFFF0000`, `0xFFFF0000`, `0xFFFF0000`
448	/ XScale /
449	.align `32`
450	.long `0x30800000`, `0x30800000`, `0x30800000`, `0x30800000`, `0x30800000`, `0x30800000`, `0x30800000`, `0x30800000`
451	/ sLn2 = SP ln(2) /
452	.align `32`
453	.long `0x3f317218`, `0x3f317218`, `0x3f317218`, `0x3f317218`, `0x3f317218`, `0x3f317218`, `0x3f317218`, `0x3f317218`
454	.align `32`
455	.type __svml_sasinh_data_internal, @object
456	.size __svml_sasinh_data_internal, .-__svml_sasinh_data_internal
457

source code of glibc/sysdeps/x86_64/fpu/multiarch/svml_s_asinhf8_core_avx2.S