1 | /* Function asinhf vectorized with AVX2. |
2 | Copyright (C) 2021-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | https://www.gnu.org/licenses/. */ |
18 | |
19 | /* |
20 | * ALGORITHM DESCRIPTION: |
21 | * |
22 | * Compute asinh(x) as log(x + sqrt(x*x + 1)) |
23 | * |
24 | * Special cases: |
25 | * |
26 | * asinh(NaN) = quiet NaN, and raise invalid exception |
27 | * asinh(INF) = that INF |
28 | * asinh(0) = that 0 |
29 | * |
30 | */ |
31 | |
32 | /* Offsets for data table __svml_sasinh_data_internal |
33 | */ |
34 | #define SgnMask 0 |
35 | #define sOne 32 |
36 | #define sPoly 64 |
37 | #define iBrkValue 320 |
38 | #define iOffExpoMask 352 |
39 | #define sBigThreshold 384 |
40 | #define sC2 416 |
41 | #define sC3 448 |
42 | #define sHalf 480 |
43 | #define sLargestFinite 512 |
44 | #define sLittleThreshold 544 |
45 | #define sSign 576 |
46 | #define sThirtyOne 608 |
47 | #define sTopMask8 640 |
48 | #define XScale 672 |
49 | #define sLn2 704 |
50 | |
51 | #include <sysdep.h> |
52 | |
53 | .section .text.avx2, "ax" , @progbits |
54 | ENTRY(_ZGVdN8v_asinhf_avx2) |
55 | pushq %rbp |
56 | cfi_def_cfa_offset(16) |
57 | movq %rsp, %rbp |
58 | cfi_def_cfa(6, 16) |
59 | cfi_offset(6, -16) |
60 | andq $-32, %rsp |
61 | subq $96, %rsp |
62 | vmovaps %ymm0, %ymm9 |
63 | |
64 | /* Load the constant 1 and a sign mask */ |
65 | vmovups sOne+__svml_sasinh_data_internal(%rip), %ymm8 |
66 | |
67 | /* No need to split X when FMA is available in hardware. */ |
68 | vmulps %ymm9, %ymm9, %ymm5 |
69 | vmovups sTopMask8+__svml_sasinh_data_internal(%rip), %ymm1 |
70 | |
71 | /* |
72 | * Finally, express Y + W = X^2 + 1 accurately where Y has <= 8 bits. |
73 | * If |X| <= 1 then |XHi| <= 1 and so |X2Hi| <= 1, so we can treat 1 |
74 | * as the dominant component in the compensated summation. Otherwise, |
75 | * if |X| >= 1, then since X2Hi only has 22 significant bits, the basic |
76 | * addition will be exact anyway until we get to |X| >= 2^24. But by |
77 | * that time the log function is well-conditioned enough that the |
78 | * rounding error doesn't matter. Hence we can treat 1 as dominant even |
79 | * if it literally isn't. |
80 | */ |
81 | vaddps %ymm5, %ymm8, %ymm13 |
82 | vandps %ymm1, %ymm13, %ymm2 |
83 | vmovaps %ymm9, %ymm4 |
84 | vsubps %ymm13, %ymm8, %ymm11 |
85 | vsubps %ymm2, %ymm13, %ymm15 |
86 | |
87 | /* |
88 | * Compute R = 1/sqrt(Y + W) * (1 + d) |
89 | * Force R to <= 8 significant bits. |
90 | * This means that R * Y and R^2 * Y are exactly representable. |
91 | */ |
92 | vrsqrtps %ymm2, %ymm0 |
93 | vfmsub213ps %ymm5, %ymm9, %ymm4 |
94 | vaddps %ymm11, %ymm5, %ymm12 |
95 | |
96 | /* |
97 | * Get the absolute value of the input, since we will exploit antisymmetry |
98 | * and mostly assume X >= 0 in the core computation |
99 | */ |
100 | vandps SgnMask+__svml_sasinh_data_internal(%rip), %ymm9, %ymm6 |
101 | |
102 | /* |
103 | * Check whether the input is finite, by checking |X| <= MaxFloat |
104 | * Otherwise set the rangemask so that the callout will get used. |
105 | * Note that this will also use the callout for NaNs since not(NaN <= MaxFloat) |
106 | */ |
107 | vcmpnle_uqps sLargestFinite+__svml_sasinh_data_internal(%rip), %ymm6, %ymm10 |
108 | vaddps %ymm12, %ymm4, %ymm14 |
109 | |
110 | /* |
111 | * Unfortunately, we can still be in trouble if |X| <= 2^-5, since |
112 | * the absolute error 2^-(7+24)-ish in sqrt(1 + X^2) gets scaled up |
113 | * by 1/X and comes close to our threshold. Hence if |X| <= 2^-4, |
114 | * perform an alternative computation |
115 | * sqrt(1 + X^2) - 1 = X^2/2 - X^4/8 + X^6/16 |
116 | * X2 = X^2 |
117 | */ |
118 | vaddps %ymm4, %ymm5, %ymm4 |
119 | |
120 | /* |
121 | * The following computation can go wrong for very large X, basically |
122 | * because X^2 overflows. But for large X we have |
123 | * asinh(X) / log(2 X) - 1 =~= 1/(4 * X^2), so for X >= 2^30 |
124 | * we can just later stick X back into the log and tweak up the exponent. |
125 | * Actually we scale X by 2^-30 and tweak the exponent up by 31, |
126 | * to stay in the safe range for the later log computation. |
127 | * Compute a flag now telling us when do do this. |
128 | */ |
129 | vcmplt_oqps sBigThreshold+__svml_sasinh_data_internal(%rip), %ymm6, %ymm7 |
130 | vaddps %ymm15, %ymm14, %ymm3 |
131 | |
132 | /* |
133 | * Now 1 / (1 + d) |
134 | * = 1 / (1 + (sqrt(1 - e) - 1)) |
135 | * = 1 / sqrt(1 - e) |
136 | * = 1 + 1/2 * e + 3/8 * e^2 + 5/16 * e^3 + 35/128 * e^4 + ... |
137 | * So compute the first three nonconstant terms of that, so that |
138 | * we have a relative correction (1 + Corr) to apply to S etc. |
139 | * C1 = 1/2 |
140 | * C2 = 3/8 |
141 | * C3 = 5/16 |
142 | */ |
143 | vmovups sC3+__svml_sasinh_data_internal(%rip), %ymm12 |
144 | vmovmskps %ymm10, %edx |
145 | vandps %ymm1, %ymm0, %ymm10 |
146 | |
147 | /* |
148 | * Compute S = (Y/sqrt(Y + W)) * (1 + d) |
149 | * and T = (W/sqrt(Y + W)) * (1 + d) |
150 | * so that S + T = sqrt(Y + W) * (1 + d) |
151 | * S is exact, and the rounding error in T is OK. |
152 | */ |
153 | vmulps %ymm10, %ymm2, %ymm15 |
154 | vmulps %ymm3, %ymm10, %ymm14 |
155 | vmovups sHalf+__svml_sasinh_data_internal(%rip), %ymm3 |
156 | vsubps %ymm8, %ymm15, %ymm0 |
157 | |
158 | /* |
159 | * Obtain sqrt(1 + X^2) - 1 in two pieces |
160 | * sqrt(1 + X^2) - 1 |
161 | * = sqrt(Y + W) - 1 |
162 | * = (S + T) * (1 + Corr) - 1 |
163 | * = [S - 1] + [T + (S + T) * Corr] |
164 | * We need a compensated summation for the last part. We treat S - 1 |
165 | * as the larger part; it certainly is until about X < 2^-4, and in that |
166 | * case, the error is affordable since X dominates over sqrt(1 + X^2) - 1 |
167 | * Final sum is dTmp5 (hi) + dTmp7 (lo) |
168 | */ |
169 | vaddps %ymm14, %ymm15, %ymm13 |
170 | |
171 | /* |
172 | * Compute e = -(2 * d + d^2) |
173 | * The first FMR is exact, and the rounding error in the other is acceptable |
174 | * since d and e are ~ 2^-8 |
175 | */ |
176 | vmovaps %ymm8, %ymm11 |
177 | vfnmadd231ps %ymm15, %ymm10, %ymm11 |
178 | vfnmadd231ps %ymm14, %ymm10, %ymm11 |
179 | vfmadd213ps sC2+__svml_sasinh_data_internal(%rip), %ymm11, %ymm12 |
180 | vfmadd213ps %ymm3, %ymm11, %ymm12 |
181 | vmulps %ymm12, %ymm11, %ymm1 |
182 | |
183 | /* Now multiplex the two possible computations */ |
184 | vcmple_oqps sLittleThreshold+__svml_sasinh_data_internal(%rip), %ymm6, %ymm11 |
185 | vfmadd213ps %ymm14, %ymm13, %ymm1 |
186 | vaddps %ymm0, %ymm1, %ymm2 |
187 | vsubps %ymm2, %ymm0, %ymm10 |
188 | |
189 | /* sX2over2 = X^2/2 */ |
190 | vmulps %ymm4, %ymm3, %ymm0 |
191 | vaddps %ymm10, %ymm1, %ymm1 |
192 | |
193 | /* sX4over4 = X^4/4 */ |
194 | vmulps %ymm0, %ymm0, %ymm5 |
195 | |
196 | /* sX46 = -X^4/4 + X^6/8 */ |
197 | vfmsub231ps %ymm0, %ymm5, %ymm5 |
198 | |
199 | /* sX46over2 = -X^4/8 + x^6/16 */ |
200 | vmulps %ymm5, %ymm3, %ymm3 |
201 | vaddps %ymm3, %ymm0, %ymm5 |
202 | vblendvps %ymm11, %ymm5, %ymm2, %ymm2 |
203 | vsubps %ymm5, %ymm0, %ymm4 |
204 | |
205 | /* |
206 | * Now do another compensated sum to add |X| + [sqrt(1 + X^2) - 1]. |
207 | * It's always safe to assume |X| is larger. |
208 | * This is the final 2-part argument to the log1p function |
209 | */ |
210 | vaddps %ymm2, %ymm6, %ymm14 |
211 | |
212 | /* |
213 | * Now resume the main code. |
214 | * reduction: compute r, n |
215 | */ |
216 | vmovups iBrkValue+__svml_sasinh_data_internal(%rip), %ymm5 |
217 | vaddps %ymm4, %ymm3, %ymm10 |
218 | |
219 | /* |
220 | * Now we feed into the log1p code, using H in place of _VARG1 and |
221 | * also adding L into Xl. |
222 | * compute 1+x as high, low parts |
223 | */ |
224 | vmaxps %ymm14, %ymm8, %ymm15 |
225 | vminps %ymm14, %ymm8, %ymm0 |
226 | vblendvps %ymm11, %ymm10, %ymm1, %ymm12 |
227 | vsubps %ymm14, %ymm6, %ymm1 |
228 | vaddps %ymm0, %ymm15, %ymm3 |
229 | |
230 | /* Now multiplex to the case X = 2^-30 * input, Xl = sL = 0 in the "big" case. */ |
231 | vmulps XScale+__svml_sasinh_data_internal(%rip), %ymm6, %ymm6 |
232 | vaddps %ymm1, %ymm2, %ymm13 |
233 | vsubps %ymm3, %ymm15, %ymm15 |
234 | vaddps %ymm13, %ymm12, %ymm1 |
235 | vaddps %ymm15, %ymm0, %ymm2 |
236 | vblendvps %ymm7, %ymm3, %ymm6, %ymm0 |
237 | vaddps %ymm2, %ymm1, %ymm4 |
238 | vpsubd %ymm5, %ymm0, %ymm1 |
239 | vpsrad $23, %ymm1, %ymm6 |
240 | vpand iOffExpoMask+__svml_sasinh_data_internal(%rip), %ymm1, %ymm2 |
241 | vmovups sPoly+224+__svml_sasinh_data_internal(%rip), %ymm1 |
242 | vpslld $23, %ymm6, %ymm10 |
243 | vpaddd %ymm5, %ymm2, %ymm13 |
244 | vcvtdq2ps %ymm6, %ymm0 |
245 | vpsubd %ymm10, %ymm8, %ymm12 |
246 | |
247 | /* polynomial evaluation */ |
248 | vsubps %ymm8, %ymm13, %ymm8 |
249 | |
250 | /* Add 31 to the exponent in the "large" case to get log(2 * input) */ |
251 | vaddps sThirtyOne+__svml_sasinh_data_internal(%rip), %ymm0, %ymm3 |
252 | vandps %ymm7, %ymm4, %ymm11 |
253 | vmulps %ymm12, %ymm11, %ymm14 |
254 | vblendvps %ymm7, %ymm0, %ymm3, %ymm0 |
255 | vaddps %ymm8, %ymm14, %ymm2 |
256 | vfmadd213ps sPoly+192+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1 |
257 | vfmadd213ps sPoly+160+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1 |
258 | vfmadd213ps sPoly+128+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1 |
259 | vfmadd213ps sPoly+96+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1 |
260 | vfmadd213ps sPoly+64+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1 |
261 | vfmadd213ps sPoly+32+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1 |
262 | vfmadd213ps sPoly+__svml_sasinh_data_internal(%rip), %ymm2, %ymm1 |
263 | vmulps %ymm1, %ymm2, %ymm4 |
264 | vfmadd213ps %ymm2, %ymm2, %ymm4 |
265 | |
266 | /* final reconstruction */ |
267 | vfmadd132ps sLn2+__svml_sasinh_data_internal(%rip), %ymm4, %ymm0 |
268 | |
269 | /* Finally, reincorporate the original sign. */ |
270 | vandps sSign+__svml_sasinh_data_internal(%rip), %ymm9, %ymm7 |
271 | vxorps %ymm0, %ymm7, %ymm0 |
272 | testl %edx, %edx |
273 | |
274 | /* Go to special inputs processing branch */ |
275 | jne L(SPECIAL_VALUES_BRANCH) |
276 | # LOE rbx r12 r13 r14 r15 edx ymm0 ymm9 |
277 | |
278 | /* Restore registers |
279 | * and exit the function |
280 | */ |
281 | |
282 | L(EXIT): |
283 | movq %rbp, %rsp |
284 | popq %rbp |
285 | cfi_def_cfa(7, 8) |
286 | cfi_restore(6) |
287 | ret |
288 | cfi_def_cfa(6, 16) |
289 | cfi_offset(6, -16) |
290 | |
291 | /* Branch to process |
292 | * special inputs |
293 | */ |
294 | |
295 | L(SPECIAL_VALUES_BRANCH): |
296 | vmovups %ymm9, 32(%rsp) |
297 | vmovups %ymm0, 64(%rsp) |
298 | # LOE rbx r12 r13 r14 r15 edx ymm0 |
299 | |
300 | xorl %eax, %eax |
301 | # LOE rbx r12 r13 r14 r15 eax edx |
302 | |
303 | vzeroupper |
304 | movq %r12, 16(%rsp) |
305 | /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ |
306 | .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 |
307 | movl %eax, %r12d |
308 | movq %r13, 8(%rsp) |
309 | /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ |
310 | .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 |
311 | movl %edx, %r13d |
312 | movq %r14, (%rsp) |
313 | /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ |
314 | .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 |
315 | # LOE rbx r15 r12d r13d |
316 | |
317 | /* Range mask |
318 | * bits check |
319 | */ |
320 | |
321 | L(RANGEMASK_CHECK): |
322 | btl %r12d, %r13d |
323 | |
324 | /* Call scalar math function */ |
325 | jc L(SCALAR_MATH_CALL) |
326 | # LOE rbx r15 r12d r13d |
327 | |
328 | /* Special inputs |
329 | * processing loop |
330 | */ |
331 | |
332 | L(SPECIAL_VALUES_LOOP): |
333 | incl %r12d |
334 | cmpl $8, %r12d |
335 | |
336 | /* Check bits in range mask */ |
337 | jl L(RANGEMASK_CHECK) |
338 | # LOE rbx r15 r12d r13d |
339 | |
340 | movq 16(%rsp), %r12 |
341 | cfi_restore(12) |
342 | movq 8(%rsp), %r13 |
343 | cfi_restore(13) |
344 | movq (%rsp), %r14 |
345 | cfi_restore(14) |
346 | vmovups 64(%rsp), %ymm0 |
347 | |
348 | /* Go to exit */ |
349 | jmp L(EXIT) |
350 | /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -80; DW_OP_plus) */ |
351 | .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xb0, 0xff, 0xff, 0xff, 0x22 |
352 | /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -88; DW_OP_plus) */ |
353 | .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa8, 0xff, 0xff, 0xff, 0x22 |
354 | /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -32; DW_OP_and; DW_OP_const4s: -96; DW_OP_plus) */ |
355 | .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xe0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0xa0, 0xff, 0xff, 0xff, 0x22 |
356 | # LOE rbx r12 r13 r14 r15 ymm0 |
357 | |
358 | /* Scalar math function call |
359 | * to process special input |
360 | */ |
361 | |
362 | L(SCALAR_MATH_CALL): |
363 | movl %r12d, %r14d |
364 | vmovss 32(%rsp, %r14, 4), %xmm0 |
365 | call asinhf@PLT |
366 | # LOE rbx r14 r15 r12d r13d xmm0 |
367 | |
368 | vmovss %xmm0, 64(%rsp, %r14, 4) |
369 | |
370 | /* Process special inputs in loop */ |
371 | jmp L(SPECIAL_VALUES_LOOP) |
372 | # LOE rbx r15 r12d r13d |
373 | END(_ZGVdN8v_asinhf_avx2) |
374 | |
375 | .section .rodata, "a" |
376 | .align 32 |
377 | |
378 | #ifdef __svml_sasinh_data_internal_typedef |
379 | typedef unsigned int VUINT32; |
380 | typedef struct { |
381 | __declspec(align(32)) VUINT32 SgnMask[8][1]; |
382 | __declspec(align(32)) VUINT32 sOne[8][1]; |
383 | __declspec(align(32)) VUINT32 sPoly[8][8][1]; |
384 | __declspec(align(32)) VUINT32 iBrkValue[8][1]; |
385 | __declspec(align(32)) VUINT32 iOffExpoMask[8][1]; |
386 | __declspec(align(32)) VUINT32 sBigThreshold[8][1]; |
387 | __declspec(align(32)) VUINT32 sC2[8][1]; |
388 | __declspec(align(32)) VUINT32 sC3[8][1]; |
389 | __declspec(align(32)) VUINT32 sHalf[8][1]; |
390 | __declspec(align(32)) VUINT32 sLargestFinite[8][1]; |
391 | __declspec(align(32)) VUINT32 sLittleThreshold[8][1]; |
392 | __declspec(align(32)) VUINT32 sSign[8][1]; |
393 | __declspec(align(32)) VUINT32 sThirtyOne[8][1]; |
394 | __declspec(align(32)) VUINT32 sTopMask8[8][1]; |
395 | __declspec(align(32)) VUINT32 XScale[8][1]; |
396 | __declspec(align(32)) VUINT32 sLn2[8][1]; |
397 | } __svml_sasinh_data_internal; |
398 | #endif |
399 | __svml_sasinh_data_internal: |
400 | /* SgnMask */ |
401 | .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff |
402 | /* sOne = SP 1.0 */ |
403 | .align 32 |
404 | .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 |
405 | /* sPoly[] = SP polynomial */ |
406 | .align 32 |
407 | .long 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000 /* -5.0000000000000000000000000e-01 P0 */ |
408 | .long 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94, 0x3eaaaa94 /* 3.3333265781402587890625000e-01 P1 */ |
409 | .long 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e, 0xbe80058e /* -2.5004237890243530273437500e-01 P2 */ |
410 | .long 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190, 0x3e4ce190 /* 2.0007920265197753906250000e-01 P3 */ |
411 | .long 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37, 0xbe28ad37 /* -1.6472326219081878662109375e-01 P4 */ |
412 | .long 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12, 0x3e0fcb12 /* 1.4042308926582336425781250e-01 P5 */ |
413 | .long 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3, 0xbe1ad9e3 /* -1.5122179687023162841796875e-01 P6 */ |
414 | .long 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed, 0x3e0d84ed /* 1.3820238411426544189453125e-01 P7 */ |
415 | /* iBrkValue = SP 2/3 */ |
416 | .align 32 |
417 | .long 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab, 0x3f2aaaab |
418 | /* iOffExpoMask = SP significand mask */ |
419 | .align 32 |
420 | .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff |
421 | /* sBigThreshold */ |
422 | .align 32 |
423 | .long 0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000, 0x4E800000 |
424 | /* sC2 */ |
425 | .align 32 |
426 | .long 0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000, 0x3EC00000 |
427 | /* sC3 */ |
428 | .align 32 |
429 | .long 0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000, 0x3EA00000 |
430 | /* sHalf */ |
431 | .align 32 |
432 | .long 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000 |
433 | /* sLargestFinite */ |
434 | .align 32 |
435 | .long 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF |
436 | /* sLittleThreshold */ |
437 | .align 32 |
438 | .long 0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000, 0x3D800000 |
439 | /* sSign */ |
440 | .align 32 |
441 | .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000 |
442 | /* sThirtyOne */ |
443 | .align 32 |
444 | .long 0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000, 0x41F80000 |
445 | /* sTopMask8 */ |
446 | .align 32 |
447 | .long 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000, 0xFFFF0000 |
448 | /* XScale */ |
449 | .align 32 |
450 | .long 0x30800000, 0x30800000, 0x30800000, 0x30800000, 0x30800000, 0x30800000, 0x30800000, 0x30800000 |
451 | /* sLn2 = SP ln(2) */ |
452 | .align 32 |
453 | .long 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218, 0x3f317218 |
454 | .align 32 |
455 | .type __svml_sasinh_data_internal, @object |
456 | .size __svml_sasinh_data_internal, .-__svml_sasinh_data_internal |
457 | |