1 | /* Function tanf vectorized with AVX-512. |
2 | Copyright (C) 2021-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | https://www.gnu.org/licenses/. */ |
18 | |
19 | /* |
20 | * ALGORITHM DESCRIPTION: |
21 | * |
22 | * ( optimized for throughput, with small table lookup, works when HW FMA is available ) |
23 | * |
24 | * Implementation reduces argument x to |R|<pi/64 |
25 | * 32-entry tables used to store high and low parts of tan(x0) |
26 | * Argument x = N*pi + x0 + (R); x0 = k*pi/32, with k in {0, 1, ..., 31} |
27 | * (very large arguments reduction resolved in _vsreduction_core.i) |
28 | * Compute result as (tan(x0) + tan(R))/(1-tan(x0)*tan(R)) |
29 | * _HA_ version keeps extra precision for numerator, denominator, and during |
30 | * final NR-iteration computing quotient. |
31 | * |
32 | * |
33 | */ |
34 | |
35 | /* Offsets for data table __svml_stan_data_internal |
36 | */ |
37 | #define _sInvPI_uisa 0 |
38 | #define _sPI1_uisa 64 |
39 | #define _sPI2_uisa 128 |
40 | #define _sPI3_uisa 192 |
41 | #define Th_tbl_uisa 256 |
42 | #define _sPC3_uisa 384 |
43 | #define _sPC5_uisa 448 |
44 | #define _sRangeReductionVal_uisa 512 |
45 | #define _sAbsMask 576 |
46 | #define _sRangeVal 640 |
47 | #define _sRShifter 704 |
48 | #define _sOne 768 |
49 | #define _sRangeReductionVal 832 |
50 | #define _sPI1 896 |
51 | #define _sPI2 960 |
52 | #define _sPI3 1024 |
53 | |
54 | #include <sysdep.h> |
55 | |
56 | .section .text.evex512, "ax" , @progbits |
57 | ENTRY(_ZGVeN16v_tanf_skx) |
58 | pushq %rbp |
59 | cfi_def_cfa_offset(16) |
60 | movq %rsp, %rbp |
61 | cfi_def_cfa(6, 16) |
62 | cfi_offset(6, -16) |
63 | andq $-64, %rsp |
64 | subq $192, %rsp |
65 | xorl %edx, %edx |
66 | |
67 | /* Large values check */ |
68 | vmovups _sRangeReductionVal_uisa+__svml_stan_data_internal(%rip), %zmm10 |
69 | |
70 | /* |
71 | * |
72 | * Main path |
73 | * |
74 | * start arg. reduction |
75 | */ |
76 | vmovups _sRShifter+__svml_stan_data_internal(%rip), %zmm1 |
77 | vmovups _sPI1_uisa+__svml_stan_data_internal(%rip), %zmm4 |
78 | vmovups _sPI2_uisa+__svml_stan_data_internal(%rip), %zmm2 |
79 | vmovups _sPI3_uisa+__svml_stan_data_internal(%rip), %zmm3 |
80 | vmovaps %zmm0, %zmm11 |
81 | vandps _sAbsMask+__svml_stan_data_internal(%rip), %zmm11, %zmm0 |
82 | vcmpps $22, {sae}, %zmm10, %zmm0, %k6 |
83 | vmovups __svml_stan_data_internal(%rip), %zmm10 |
84 | |
85 | /* |
86 | * |
87 | * End of main path |
88 | */ |
89 | |
90 | kortestw %k6, %k6 |
91 | vfmadd213ps {rn-sae}, %zmm1, %zmm11, %zmm10 |
92 | vsubps {rn-sae}, %zmm1, %zmm10, %zmm5 |
93 | vfnmadd213ps {rn-sae}, %zmm11, %zmm5, %zmm4 |
94 | vfnmadd231ps {rn-sae}, %zmm5, %zmm2, %zmm4 |
95 | vfnmadd213ps {rn-sae}, %zmm4, %zmm3, %zmm5 |
96 | |
97 | /* Go to auxiliary branch */ |
98 | jne L(AUX_BRANCH) |
99 | # LOE rbx r12 r13 r14 r15 edx zmm0 zmm5 zmm10 zmm11 k6 |
100 | |
101 | /* Return from auxiliary branch |
102 | * for out of main path inputs |
103 | */ |
104 | |
105 | L(AUX_BRANCH_RETURN): |
106 | /* Table lookup */ |
107 | vmovups Th_tbl_uisa+__svml_stan_data_internal(%rip), %zmm3 |
108 | vmovups _sPC3_uisa+__svml_stan_data_internal(%rip), %zmm0 |
109 | vmulps {rn-sae}, %zmm5, %zmm5, %zmm1 |
110 | vpermt2ps Th_tbl_uisa+64+__svml_stan_data_internal(%rip), %zmm10, %zmm3 |
111 | vmovups _sPC5_uisa+__svml_stan_data_internal(%rip), %zmm10 |
112 | vfmadd231ps {rn-sae}, %zmm1, %zmm10, %zmm0 |
113 | vmulps {rn-sae}, %zmm5, %zmm0, %zmm4 |
114 | vfmadd213ps {rn-sae}, %zmm5, %zmm1, %zmm4 |
115 | |
116 | /* |
117 | * Computer Denominator: |
118 | * sDenominator - sDlow ~= 1-(sTh+sTl)*(sP+sPlow) |
119 | */ |
120 | vmovups _sOne+__svml_stan_data_internal(%rip), %zmm5 |
121 | vmulps {rn-sae}, %zmm4, %zmm3, %zmm7 |
122 | |
123 | /* |
124 | * Compute Numerator: |
125 | * sNumerator + sNlow ~= sTh+sTl+sP+sPlow |
126 | */ |
127 | vaddps {rn-sae}, %zmm3, %zmm4, %zmm8 |
128 | vsubps {rn-sae}, %zmm7, %zmm5, %zmm9 |
129 | vsubps {rn-sae}, %zmm3, %zmm8, %zmm2 |
130 | |
131 | /* |
132 | * Now computes (sNumerator + sNlow)/(sDenominator - sDlow) |
133 | * Choose NR iteration instead of hardware division |
134 | */ |
135 | vrcp14ps %zmm9, %zmm14 |
136 | vsubps {rn-sae}, %zmm5, %zmm9, %zmm6 |
137 | vsubps {rn-sae}, %zmm2, %zmm4, %zmm13 |
138 | vmulps {rn-sae}, %zmm8, %zmm14, %zmm15 |
139 | vaddps {rn-sae}, %zmm7, %zmm6, %zmm12 |
140 | |
141 | /* One NR iteration to refine sQuotient */ |
142 | vfmsub213ps {rn-sae}, %zmm8, %zmm15, %zmm9 |
143 | vfnmadd213ps {rn-sae}, %zmm9, %zmm15, %zmm12 |
144 | vsubps {rn-sae}, %zmm13, %zmm12, %zmm0 |
145 | vfnmadd213ps {rn-sae}, %zmm15, %zmm14, %zmm0 |
146 | testl %edx, %edx |
147 | |
148 | /* Go to special inputs processing branch */ |
149 | jne L(SPECIAL_VALUES_BRANCH) |
150 | # LOE rbx r12 r13 r14 r15 edx zmm0 zmm11 |
151 | |
152 | /* Restore registers |
153 | * and exit the function |
154 | */ |
155 | |
156 | L(EXIT): |
157 | movq %rbp, %rsp |
158 | popq %rbp |
159 | cfi_def_cfa(7, 8) |
160 | cfi_restore(6) |
161 | ret |
162 | cfi_def_cfa(6, 16) |
163 | cfi_offset(6, -16) |
164 | |
165 | /* Branch to process |
166 | * special inputs |
167 | */ |
168 | |
169 | L(SPECIAL_VALUES_BRANCH): |
170 | vmovups %zmm11, 64(%rsp) |
171 | vmovups %zmm0, 128(%rsp) |
172 | # LOE rbx r12 r13 r14 r15 edx zmm0 |
173 | |
174 | xorl %eax, %eax |
175 | # LOE rbx r12 r13 r14 r15 eax edx |
176 | |
177 | vzeroupper |
178 | movq %r12, 16(%rsp) |
179 | /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ |
180 | .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 |
181 | movl %eax, %r12d |
182 | movq %r13, 8(%rsp) |
183 | /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ |
184 | .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 |
185 | movl %edx, %r13d |
186 | movq %r14, (%rsp) |
187 | /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ |
188 | .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 |
189 | # LOE rbx r15 r12d r13d |
190 | |
191 | /* Range mask |
192 | * bits check |
193 | */ |
194 | |
195 | L(RANGEMASK_CHECK): |
196 | btl %r12d, %r13d |
197 | |
198 | /* Call scalar math function */ |
199 | jc L(SCALAR_MATH_CALL) |
200 | # LOE rbx r15 r12d r13d |
201 | |
202 | /* Special inputs |
203 | * processing loop |
204 | */ |
205 | |
206 | L(SPECIAL_VALUES_LOOP): |
207 | incl %r12d |
208 | cmpl $16, %r12d |
209 | |
210 | /* Check bits in range mask */ |
211 | jl L(RANGEMASK_CHECK) |
212 | # LOE rbx r15 r12d r13d |
213 | |
214 | movq 16(%rsp), %r12 |
215 | cfi_restore(12) |
216 | movq 8(%rsp), %r13 |
217 | cfi_restore(13) |
218 | movq (%rsp), %r14 |
219 | cfi_restore(14) |
220 | vmovups 128(%rsp), %zmm0 |
221 | |
222 | /* Go to exit */ |
223 | jmp L(EXIT) |
224 | /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus) */ |
225 | .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22 |
226 | /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus) */ |
227 | .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22 |
228 | /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus) */ |
229 | .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22 |
230 | # LOE rbx r12 r13 r14 r15 zmm0 |
231 | |
232 | /* Scalar math function call |
233 | * to process special input |
234 | */ |
235 | |
236 | L(SCALAR_MATH_CALL): |
237 | movl %r12d, %r14d |
238 | vmovss 64(%rsp, %r14, 4), %xmm0 |
239 | call tanf@PLT |
240 | # LOE rbx r14 r15 r12d r13d xmm0 |
241 | |
242 | vmovss %xmm0, 128(%rsp, %r14, 4) |
243 | |
244 | /* Process special inputs in loop */ |
245 | jmp L(SPECIAL_VALUES_LOOP) |
246 | cfi_restore(12) |
247 | cfi_restore(13) |
248 | cfi_restore(14) |
249 | # LOE rbx r15 r12d r13d |
250 | |
251 | /* Auxiliary branch |
252 | * for out of main path inputs |
253 | */ |
254 | |
255 | L(AUX_BRANCH): |
256 | vmovups _sRangeVal+__svml_stan_data_internal(%rip), %zmm6 |
257 | |
258 | /* |
259 | * Get the (2^a / 2pi) mod 1 values from the table. |
260 | * Because doesn't have I-type gather, we need a trivial cast |
261 | */ |
262 | lea __svml_stan_reduction_data_internal(%rip), %rax |
263 | vmovups %zmm5, (%rsp) |
264 | vandps %zmm0, %zmm6, %zmm14 |
265 | vcmpps $0, {sae}, %zmm6, %zmm14, %k0 |
266 | |
267 | /* |
268 | * Break the P_xxx and m into 16-bit chunks ready for |
269 | * the long multiplication via 16x16->32 multiplications |
270 | */ |
271 | vmovups .FLT_15(%rip), %zmm6 |
272 | kxnorw %k0, %k0, %k1 |
273 | kxnorw %k0, %k0, %k2 |
274 | kxnorw %k0, %k0, %k3 |
275 | kmovw %k0, %edx |
276 | vpandd .FLT_12(%rip), %zmm11, %zmm5 |
277 | vpsrld $23, %zmm5, %zmm7 |
278 | vpslld $1, %zmm7, %zmm8 |
279 | vpaddd %zmm7, %zmm8, %zmm9 |
280 | vpslld $2, %zmm9, %zmm4 |
281 | vpxord %zmm3, %zmm3, %zmm3 |
282 | vpxord %zmm15, %zmm15, %zmm15 |
283 | vpxord %zmm2, %zmm2, %zmm2 |
284 | vgatherdps (%rax, %zmm4), %zmm3{%k1} |
285 | vgatherdps 4(%rax, %zmm4), %zmm15{%k2} |
286 | vgatherdps 8(%rax, %zmm4), %zmm2{%k3} |
287 | vpsrld $16, %zmm3, %zmm5 |
288 | vpsrld $16, %zmm2, %zmm13 |
289 | |
290 | /* |
291 | * Also get the significand as an integer |
292 | * NB: adding in the integer bit is wrong for denorms! |
293 | * To make this work for denorms we should do something slightly different |
294 | */ |
295 | vpandd .FLT_13(%rip), %zmm11, %zmm0 |
296 | vpaddd .FLT_14(%rip), %zmm0, %zmm1 |
297 | vpsrld $16, %zmm15, %zmm0 |
298 | vpsrld $16, %zmm1, %zmm8 |
299 | vpandd %zmm6, %zmm3, %zmm9 |
300 | vpandd %zmm6, %zmm15, %zmm12 |
301 | vpandd %zmm6, %zmm2, %zmm7 |
302 | vpandd %zmm6, %zmm1, %zmm14 |
303 | |
304 | /* Now do the big multiplication and carry propagation */ |
305 | vpmulld %zmm9, %zmm8, %zmm4 |
306 | vpmulld %zmm0, %zmm8, %zmm3 |
307 | vpmulld %zmm12, %zmm8, %zmm2 |
308 | vpmulld %zmm13, %zmm8, %zmm1 |
309 | vpmulld %zmm7, %zmm8, %zmm8 |
310 | vpmulld %zmm5, %zmm14, %zmm7 |
311 | vpmulld %zmm9, %zmm14, %zmm5 |
312 | vpmulld %zmm0, %zmm14, %zmm9 |
313 | vpmulld %zmm12, %zmm14, %zmm0 |
314 | vpmulld %zmm13, %zmm14, %zmm12 |
315 | vpsrld $16, %zmm12, %zmm14 |
316 | vpsrld $16, %zmm0, %zmm13 |
317 | vpsrld $16, %zmm9, %zmm15 |
318 | vpsrld $16, %zmm5, %zmm12 |
319 | vpsrld $16, %zmm8, %zmm8 |
320 | vpaddd %zmm14, %zmm1, %zmm1 |
321 | vpaddd %zmm13, %zmm2, %zmm2 |
322 | vpaddd %zmm15, %zmm3, %zmm15 |
323 | vpaddd %zmm12, %zmm4, %zmm3 |
324 | vpandd %zmm6, %zmm0, %zmm13 |
325 | vpaddd %zmm1, %zmm13, %zmm4 |
326 | vpaddd %zmm4, %zmm8, %zmm14 |
327 | vpsrld $16, %zmm14, %zmm0 |
328 | vpandd %zmm6, %zmm9, %zmm9 |
329 | vpaddd %zmm2, %zmm9, %zmm1 |
330 | vpaddd %zmm1, %zmm0, %zmm8 |
331 | |
332 | /* |
333 | * Now round at the 2^-8 bit position for reduction mod pi/2^7 |
334 | * instead of the original 2pi (but still with the same 2pi scaling). |
335 | * Use a shifter of 2^15 + 2^14. |
336 | * The N we get is our final version; it has an offset of |
337 | * 2^8 because of the implicit integer bit, and anyway for negative |
338 | * starting value it's a 2s complement thing. But we need to mask |
339 | * off the exponent part anyway so it's fine. |
340 | */ |
341 | vmovups .FLT_18(%rip), %zmm1 |
342 | vpandd %zmm6, %zmm7, %zmm7 |
343 | vpaddd %zmm3, %zmm7, %zmm13 |
344 | vpsrld $16, %zmm8, %zmm3 |
345 | vpandd %zmm6, %zmm5, %zmm5 |
346 | vpaddd %zmm15, %zmm5, %zmm2 |
347 | vpaddd %zmm2, %zmm3, %zmm15 |
348 | vpsrld $16, %zmm15, %zmm12 |
349 | vpaddd %zmm13, %zmm12, %zmm5 |
350 | |
351 | /* Assemble reduced argument from the pieces */ |
352 | vpandd %zmm6, %zmm14, %zmm9 |
353 | vpandd %zmm6, %zmm15, %zmm7 |
354 | vpslld $16, %zmm5, %zmm6 |
355 | vpslld $16, %zmm8, %zmm5 |
356 | vpaddd %zmm7, %zmm6, %zmm4 |
357 | vpaddd %zmm9, %zmm5, %zmm9 |
358 | vpsrld $9, %zmm4, %zmm6 |
359 | |
360 | /* |
361 | * We want to incorporate the original sign now too. |
362 | * Do it here for convenience in getting the right N value, |
363 | * though we could wait right to the end if we were prepared |
364 | * to modify the sign of N later too. |
365 | * So get the appropriate sign mask now (or sooner). |
366 | */ |
367 | vpandd .FLT_16(%rip), %zmm11, %zmm0 |
368 | vpandd .FLT_21(%rip), %zmm9, %zmm13 |
369 | vpslld $5, %zmm13, %zmm14 |
370 | |
371 | /* |
372 | * Create floating-point high part, implicitly adding integer bit 1 |
373 | * Incorporate overall sign at this stage too. |
374 | */ |
375 | vpxord .FLT_17(%rip), %zmm0, %zmm8 |
376 | vpord %zmm8, %zmm6, %zmm2 |
377 | vaddps {rn-sae}, %zmm2, %zmm1, %zmm12 |
378 | vsubps {rn-sae}, %zmm1, %zmm12, %zmm3 |
379 | vsubps {rn-sae}, %zmm3, %zmm2, %zmm7 |
380 | |
381 | /* |
382 | * Create floating-point low and medium parts, respectively |
383 | * lo_17, ... lo_0, 0, ..., 0 |
384 | * hi_8, ... hi_0, lo_31, ..., lo_18 |
385 | * then subtract off the implicitly added integer bits, |
386 | * 2^-46 and 2^-23, respectively. |
387 | * Put the original sign into all of them at this stage. |
388 | */ |
389 | vpxord .FLT_20(%rip), %zmm0, %zmm6 |
390 | vpord %zmm6, %zmm14, %zmm15 |
391 | vpandd .FLT_23(%rip), %zmm4, %zmm4 |
392 | vsubps {rn-sae}, %zmm6, %zmm15, %zmm8 |
393 | vandps .FLT_26(%rip), %zmm11, %zmm15 |
394 | vpsrld $18, %zmm9, %zmm6 |
395 | |
396 | /* |
397 | * If the magnitude of the input is <= 2^-20, then |
398 | * just pass through the input, since no reduction will be needed and |
399 | * the main path will only work accurately if the reduced argument is |
400 | * about >= 2^-40 (which it is for all large pi multiples) |
401 | */ |
402 | vmovups .FLT_27(%rip), %zmm14 |
403 | vcmpps $26, {sae}, %zmm14, %zmm15, %k4 |
404 | vcmpps $22, {sae}, %zmm14, %zmm15, %k5 |
405 | vpxord .FLT_22(%rip), %zmm0, %zmm1 |
406 | vpslld $14, %zmm4, %zmm0 |
407 | vpord %zmm6, %zmm0, %zmm0 |
408 | vpord %zmm1, %zmm0, %zmm4 |
409 | vsubps {rn-sae}, %zmm1, %zmm4, %zmm2 |
410 | vpternlogd $255, %zmm6, %zmm6, %zmm6 |
411 | |
412 | /* Now add them up into 2 reasonably aligned pieces */ |
413 | vaddps {rn-sae}, %zmm2, %zmm7, %zmm13 |
414 | vsubps {rn-sae}, %zmm13, %zmm7, %zmm7 |
415 | vaddps {rn-sae}, %zmm7, %zmm2, %zmm3 |
416 | |
417 | /* |
418 | * The output is _VRES_R (high) + _VRES_E (low), and the integer part is _VRES_IND |
419 | * Set sRp2 = _VRES_R^2 and then resume the original code. |
420 | */ |
421 | vmovups .FLT_28(%rip), %zmm2 |
422 | vaddps {rn-sae}, %zmm8, %zmm3, %zmm1 |
423 | vmovups .FLT_25(%rip), %zmm8 |
424 | |
425 | /* Grab our final N value as an integer, appropriately masked mod 2^8 */ |
426 | vpandd .FLT_19(%rip), %zmm12, %zmm5 |
427 | |
428 | /* |
429 | * Now multiply those numbers all by 2 pi, reasonably accurately. |
430 | * (RHi + RLo) * (pi_lead + pi_trail) ~= |
431 | * RHi * pi_lead + (RHi * pi_trail + RLo * pi_lead) |
432 | */ |
433 | vmovups .FLT_24(%rip), %zmm12 |
434 | vmulps {rn-sae}, %zmm12, %zmm13, %zmm0 |
435 | vmovaps %zmm12, %zmm9 |
436 | vfmsub213ps {rn-sae}, %zmm0, %zmm13, %zmm9 |
437 | vfmadd213ps {rn-sae}, %zmm9, %zmm8, %zmm13 |
438 | vmovaps %zmm6, %zmm8 |
439 | vfmadd213ps {rn-sae}, %zmm13, %zmm12, %zmm1 |
440 | vpandnd %zmm15, %zmm15, %zmm8{%k4} |
441 | vpandnd %zmm15, %zmm15, %zmm6{%k5} |
442 | vandps %zmm11, %zmm6, %zmm14 |
443 | vandps %zmm0, %zmm8, %zmm15 |
444 | vandps %zmm1, %zmm8, %zmm12 |
445 | vorps %zmm15, %zmm14, %zmm6 |
446 | vpsrld $31, %zmm6, %zmm3 |
447 | vpsubd %zmm3, %zmm2, %zmm4 |
448 | vpaddd %zmm4, %zmm5, %zmm7 |
449 | vpsrld $2, %zmm7, %zmm13 |
450 | vpslld $2, %zmm13, %zmm9 |
451 | |
452 | /* |
453 | * |
454 | * End of large arguments path |
455 | * |
456 | * Merge results from main and large paths: |
457 | */ |
458 | vblendmps %zmm13, %zmm10, %zmm10{%k6} |
459 | vpsubd %zmm9, %zmm5, %zmm5 |
460 | vmovups .FLT_29(%rip), %zmm9 |
461 | vcvtdq2ps {rn-sae}, %zmm5, %zmm0 |
462 | vmovups .FLT_30(%rip), %zmm5 |
463 | vfmadd231ps {rn-sae}, %zmm0, %zmm5, %zmm12 |
464 | vmovups (%rsp), %zmm5 |
465 | vaddps {rn-sae}, %zmm6, %zmm12, %zmm6 |
466 | vfmadd213ps {rn-sae}, %zmm6, %zmm9, %zmm0 |
467 | vblendmps %zmm0, %zmm5, %zmm5{%k6} |
468 | |
469 | /* Return to main vector processing path */ |
470 | jmp L(AUX_BRANCH_RETURN) |
471 | # LOE rbx r12 r13 r14 r15 edx zmm5 zmm10 zmm11 |
472 | END(_ZGVeN16v_tanf_skx) |
473 | |
474 | .section .rodata, "a" |
475 | .align 64 |
476 | |
477 | .FLT_12: |
478 | .long 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 |
479 | .type .FLT_12, @object |
480 | .size .FLT_12, 64 |
481 | .align 64 |
482 | |
483 | .FLT_13: |
484 | .long 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff |
485 | .type .FLT_13, @object |
486 | .size .FLT_13, 64 |
487 | .align 64 |
488 | |
489 | .FLT_14: |
490 | .long 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000, 0x00800000 |
491 | .type .FLT_14, @object |
492 | .size .FLT_14, 64 |
493 | .align 64 |
494 | |
495 | .FLT_15: |
496 | .long 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff |
497 | .type .FLT_15, @object |
498 | .size .FLT_15, 64 |
499 | .align 64 |
500 | |
501 | .FLT_16: |
502 | .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000 |
503 | .type .FLT_16, @object |
504 | .size .FLT_16, 64 |
505 | .align 64 |
506 | |
507 | .FLT_17: |
508 | .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 |
509 | .type .FLT_17, @object |
510 | .size .FLT_17, 64 |
511 | .align 64 |
512 | |
513 | .FLT_18: |
514 | .long 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000, 0x47400000 |
515 | .type .FLT_18, @object |
516 | .size .FLT_18, 64 |
517 | .align 64 |
518 | |
519 | .FLT_19: |
520 | .long 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff |
521 | .type .FLT_19, @object |
522 | .size .FLT_19, 64 |
523 | .align 64 |
524 | |
525 | .FLT_20: |
526 | .long 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000, 0x28800000 |
527 | .type .FLT_20, @object |
528 | .size .FLT_20, 64 |
529 | .align 64 |
530 | |
531 | .FLT_21: |
532 | .long 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff |
533 | .type .FLT_21, @object |
534 | .size .FLT_21, 64 |
535 | .align 64 |
536 | |
537 | .FLT_22: |
538 | .long 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000, 0x34000000 |
539 | .type .FLT_22, @object |
540 | .size .FLT_22, 64 |
541 | .align 64 |
542 | |
543 | .FLT_23: |
544 | .long 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff |
545 | .type .FLT_23, @object |
546 | .size .FLT_23, 64 |
547 | .align 64 |
548 | |
549 | .FLT_24: |
550 | .long 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb, 0x40c90fdb |
551 | .type .FLT_24, @object |
552 | .size .FLT_24, 64 |
553 | .align 64 |
554 | |
555 | .FLT_25: |
556 | .long 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e, 0xb43bbd2e |
557 | .type .FLT_25, @object |
558 | .size .FLT_25, 64 |
559 | .align 64 |
560 | |
561 | .FLT_26: |
562 | .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff |
563 | .type .FLT_26, @object |
564 | .size .FLT_26, 64 |
565 | .align 64 |
566 | |
567 | .FLT_27: |
568 | .long 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000, 0x35800000 |
569 | .type .FLT_27, @object |
570 | .size .FLT_27, 64 |
571 | .align 64 |
572 | |
573 | .FLT_28: |
574 | .long 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002, 0x00000002 |
575 | .type .FLT_28, @object |
576 | .size .FLT_28, 64 |
577 | .align 64 |
578 | |
579 | .FLT_29: |
580 | .long 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb, 0x3cc90fdb |
581 | .type .FLT_29, @object |
582 | .size .FLT_29, 64 |
583 | .align 64 |
584 | |
585 | .FLT_30: |
586 | .long 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e, 0xb03bbd2e |
587 | .type .FLT_30, @object |
588 | .size .FLT_30, 64 |
589 | .align 64 |
590 | |
591 | #ifdef __svml_stan_data_internal_typedef |
592 | typedef unsigned int VUINT32; |
593 | typedef struct { |
594 | __declspec(align(64)) VUINT32 _sInvPI_uisa[16][1]; |
595 | __declspec(align(64)) VUINT32 _sPI1_uisa[16][1]; |
596 | __declspec(align(64)) VUINT32 _sPI2_uisa[16][1]; |
597 | __declspec(align(64)) VUINT32 _sPI3_uisa[16][1]; |
598 | __declspec(align(64)) VUINT32 Th_tbl_uisa[32][1]; |
599 | __declspec(align(64)) VUINT32 _sPC3_uisa[16][1]; |
600 | __declspec(align(64)) VUINT32 _sPC5_uisa[16][1]; |
601 | __declspec(align(64)) VUINT32 _sRangeReductionVal_uisa[16][1]; |
602 | __declspec(align(64)) VUINT32 _sAbsMask[16][1]; |
603 | __declspec(align(64)) VUINT32 _sRangeVal[16][1]; |
604 | __declspec(align(64)) VUINT32 _sRShifter[16][1]; |
605 | __declspec(align(64)) VUINT32 _sOne[16][1]; |
606 | __declspec(align(64)) VUINT32 _sRangeReductionVal[16][1]; |
607 | __declspec(align(64)) VUINT32 _sPI1[16][1]; |
608 | __declspec(align(64)) VUINT32 _sPI2[16][1]; |
609 | __declspec(align(64)) VUINT32 _sPI3[16][1]; |
610 | } __svml_stan_data_internal; |
611 | #endif |
612 | __svml_stan_data_internal: |
613 | /* UISA */ |
614 | .long 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983, 0x4122f983 /* _sInvPI_uisa */ |
615 | .align 64 |
616 | .long 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda, 0x3dc90fda /* _sPI1_uisa */ |
617 | .align 64 |
618 | .long 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168, 0x31a22168 /* _sPI2_uisa */ |
619 | .align 64 |
620 | .long 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5, 0x25c234c5 /* _sPI3_uisa */ |
621 | /* Th_tbl_uisa for i from 0 to 31 do printsingle(tan(i*Pi/32)); */ |
622 | .align 64 |
623 | .long 0x80000000, 0x3dc9b5dc, 0x3e4bafaf, 0x3e9b5042 |
624 | .long 0x3ed413cd, 0x3f08d5b9, 0x3f2b0dc1, 0x3f521801 |
625 | .long 0x3f800000, 0x3f9bf7ec, 0x3fbf90c7, 0x3fef789e |
626 | .long 0x401a827a, 0x4052facf, 0x40a0dff7, 0x41227363 |
627 | .long 0xff7fffff, 0xc1227363, 0xc0a0dff7, 0xc052facf |
628 | .long 0xc01a827a, 0xbfef789e, 0xbfbf90c7, 0xbf9bf7ec |
629 | .long 0xbf800000, 0xbf521801, 0xbf2b0dc1, 0xbf08d5b9 |
630 | .long 0xbed413cd, 0xbe9b5042, 0xbe4bafaf, 0xbdc9b5dc |
631 | .align 64 |
632 | .long 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6, 0x3eaaaaa6 /* _sPC3_uisa */ |
633 | .align 64 |
634 | .long 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888, 0x3e08b888 /* _sPC5_uisa */ |
635 | .align 64 |
636 | .long 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000 /* _sRangeReductionVal_uisa */ |
637 | .align 64 |
638 | .long 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF /* _sAbsMask */ |
639 | .align 64 |
640 | .long 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 /* _sRangeVal */ |
641 | .align 64 |
642 | .long 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000, 0x4B400000 /* _sRShifter */ |
643 | .align 64 |
644 | .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 /* _sOne */ |
645 | .align 64 |
646 | .long 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000, 0x46010000 /* _sRangeVal */ |
647 | .align 64 |
648 | .long 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000, 0x3FC90000 /* _sPI1 */ |
649 | .align 64 |
650 | .long 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000, 0x39FDA000 /* _sPI2 */ |
651 | .align 64 |
652 | .long 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000, 0x33A22000 /* _sPI3 */ |
653 | .align 64 |
654 | .type __svml_stan_data_internal, @object |
655 | .size __svml_stan_data_internal, .-__svml_stan_data_internal |
656 | .align 64 |
657 | |
658 | #ifdef __svml_stan_reduction_data_internal_typedef |
659 | typedef unsigned int VUINT32; |
660 | typedef struct { |
661 | __declspec(align(64)) VUINT32 _sPtable[256][3][1]; |
662 | } __svml_stan_reduction_data_internal; |
663 | #endif |
664 | __svml_stan_reduction_data_internal: |
665 | /* P_hi P_med P_lo */ |
666 | .long 0x00000000, 0x00000000, 0x00000000 /* 0 */ |
667 | .long 0x00000000, 0x00000000, 0x00000000 /* 1 */ |
668 | .long 0x00000000, 0x00000000, 0x00000000 /* 2 */ |
669 | .long 0x00000000, 0x00000000, 0x00000000 /* 3 */ |
670 | .long 0x00000000, 0x00000000, 0x00000000 /* 4 */ |
671 | .long 0x00000000, 0x00000000, 0x00000000 /* 5 */ |
672 | .long 0x00000000, 0x00000000, 0x00000000 /* 6 */ |
673 | .long 0x00000000, 0x00000000, 0x00000000 /* 7 */ |
674 | .long 0x00000000, 0x00000000, 0x00000000 /* 8 */ |
675 | .long 0x00000000, 0x00000000, 0x00000000 /* 9 */ |
676 | .long 0x00000000, 0x00000000, 0x00000000 /* 10 */ |
677 | .long 0x00000000, 0x00000000, 0x00000000 /* 11 */ |
678 | .long 0x00000000, 0x00000000, 0x00000000 /* 12 */ |
679 | .long 0x00000000, 0x00000000, 0x00000000 /* 13 */ |
680 | .long 0x00000000, 0x00000000, 0x00000000 /* 14 */ |
681 | .long 0x00000000, 0x00000000, 0x00000000 /* 15 */ |
682 | .long 0x00000000, 0x00000000, 0x00000000 /* 16 */ |
683 | .long 0x00000000, 0x00000000, 0x00000000 /* 17 */ |
684 | .long 0x00000000, 0x00000000, 0x00000000 /* 18 */ |
685 | .long 0x00000000, 0x00000000, 0x00000000 /* 19 */ |
686 | .long 0x00000000, 0x00000000, 0x00000000 /* 20 */ |
687 | .long 0x00000000, 0x00000000, 0x00000000 /* 21 */ |
688 | .long 0x00000000, 0x00000000, 0x00000000 /* 22 */ |
689 | .long 0x00000000, 0x00000000, 0x00000000 /* 23 */ |
690 | .long 0x00000000, 0x00000000, 0x00000000 /* 24 */ |
691 | .long 0x00000000, 0x00000000, 0x00000000 /* 25 */ |
692 | .long 0x00000000, 0x00000000, 0x00000000 /* 26 */ |
693 | .long 0x00000000, 0x00000000, 0x00000000 /* 27 */ |
694 | .long 0x00000000, 0x00000000, 0x00000000 /* 28 */ |
695 | .long 0x00000000, 0x00000000, 0x00000000 /* 29 */ |
696 | .long 0x00000000, 0x00000000, 0x00000000 /* 30 */ |
697 | .long 0x00000000, 0x00000000, 0x00000000 /* 31 */ |
698 | .long 0x00000000, 0x00000000, 0x00000000 /* 32 */ |
699 | .long 0x00000000, 0x00000000, 0x00000000 /* 33 */ |
700 | .long 0x00000000, 0x00000000, 0x00000000 /* 34 */ |
701 | .long 0x00000000, 0x00000000, 0x00000000 /* 35 */ |
702 | .long 0x00000000, 0x00000000, 0x00000000 /* 36 */ |
703 | .long 0x00000000, 0x00000000, 0x00000000 /* 37 */ |
704 | .long 0x00000000, 0x00000000, 0x00000000 /* 38 */ |
705 | .long 0x00000000, 0x00000000, 0x00000000 /* 39 */ |
706 | .long 0x00000000, 0x00000000, 0x00000000 /* 40 */ |
707 | .long 0x00000000, 0x00000000, 0x00000000 /* 41 */ |
708 | .long 0x00000000, 0x00000000, 0x00000000 /* 42 */ |
709 | .long 0x00000000, 0x00000000, 0x00000000 /* 43 */ |
710 | .long 0x00000000, 0x00000000, 0x00000000 /* 44 */ |
711 | .long 0x00000000, 0x00000000, 0x00000000 /* 45 */ |
712 | .long 0x00000000, 0x00000000, 0x00000000 /* 46 */ |
713 | .long 0x00000000, 0x00000000, 0x00000000 /* 47 */ |
714 | .long 0x00000000, 0x00000000, 0x00000000 /* 48 */ |
715 | .long 0x00000000, 0x00000000, 0x00000000 /* 49 */ |
716 | .long 0x00000000, 0x00000000, 0x00000000 /* 50 */ |
717 | .long 0x00000000, 0x00000000, 0x00000000 /* 51 */ |
718 | .long 0x00000000, 0x00000000, 0x00000000 /* 52 */ |
719 | .long 0x00000000, 0x00000000, 0x00000000 /* 53 */ |
720 | .long 0x00000000, 0x00000000, 0x00000000 /* 54 */ |
721 | .long 0x00000000, 0x00000000, 0x00000000 /* 55 */ |
722 | .long 0x00000000, 0x00000000, 0x00000000 /* 56 */ |
723 | .long 0x00000000, 0x00000000, 0x00000001 /* 57 */ |
724 | .long 0x00000000, 0x00000000, 0x00000002 /* 58 */ |
725 | .long 0x00000000, 0x00000000, 0x00000005 /* 59 */ |
726 | .long 0x00000000, 0x00000000, 0x0000000A /* 60 */ |
727 | .long 0x00000000, 0x00000000, 0x00000014 /* 61 */ |
728 | .long 0x00000000, 0x00000000, 0x00000028 /* 62 */ |
729 | .long 0x00000000, 0x00000000, 0x00000051 /* 63 */ |
730 | .long 0x00000000, 0x00000000, 0x000000A2 /* 64 */ |
731 | .long 0x00000000, 0x00000000, 0x00000145 /* 65 */ |
732 | .long 0x00000000, 0x00000000, 0x0000028B /* 66 */ |
733 | .long 0x00000000, 0x00000000, 0x00000517 /* 67 */ |
734 | .long 0x00000000, 0x00000000, 0x00000A2F /* 68 */ |
735 | .long 0x00000000, 0x00000000, 0x0000145F /* 69 */ |
736 | .long 0x00000000, 0x00000000, 0x000028BE /* 70 */ |
737 | .long 0x00000000, 0x00000000, 0x0000517C /* 71 */ |
738 | .long 0x00000000, 0x00000000, 0x0000A2F9 /* 72 */ |
739 | .long 0x00000000, 0x00000000, 0x000145F3 /* 73 */ |
740 | .long 0x00000000, 0x00000000, 0x00028BE6 /* 74 */ |
741 | .long 0x00000000, 0x00000000, 0x000517CC /* 75 */ |
742 | .long 0x00000000, 0x00000000, 0x000A2F98 /* 76 */ |
743 | .long 0x00000000, 0x00000000, 0x00145F30 /* 77 */ |
744 | .long 0x00000000, 0x00000000, 0x0028BE60 /* 78 */ |
745 | .long 0x00000000, 0x00000000, 0x00517CC1 /* 79 */ |
746 | .long 0x00000000, 0x00000000, 0x00A2F983 /* 80 */ |
747 | .long 0x00000000, 0x00000000, 0x0145F306 /* 81 */ |
748 | .long 0x00000000, 0x00000000, 0x028BE60D /* 82 */ |
749 | .long 0x00000000, 0x00000000, 0x0517CC1B /* 83 */ |
750 | .long 0x00000000, 0x00000000, 0x0A2F9836 /* 84 */ |
751 | .long 0x00000000, 0x00000000, 0x145F306D /* 85 */ |
752 | .long 0x00000000, 0x00000000, 0x28BE60DB /* 86 */ |
753 | .long 0x00000000, 0x00000000, 0x517CC1B7 /* 87 */ |
754 | .long 0x00000000, 0x00000000, 0xA2F9836E /* 88 */ |
755 | .long 0x00000000, 0x00000001, 0x45F306DC /* 89 */ |
756 | .long 0x00000000, 0x00000002, 0x8BE60DB9 /* 90 */ |
757 | .long 0x00000000, 0x00000005, 0x17CC1B72 /* 91 */ |
758 | .long 0x00000000, 0x0000000A, 0x2F9836E4 /* 92 */ |
759 | .long 0x00000000, 0x00000014, 0x5F306DC9 /* 93 */ |
760 | .long 0x00000000, 0x00000028, 0xBE60DB93 /* 94 */ |
761 | .long 0x00000000, 0x00000051, 0x7CC1B727 /* 95 */ |
762 | .long 0x00000000, 0x000000A2, 0xF9836E4E /* 96 */ |
763 | .long 0x00000000, 0x00000145, 0xF306DC9C /* 97 */ |
764 | .long 0x00000000, 0x0000028B, 0xE60DB939 /* 98 */ |
765 | .long 0x00000000, 0x00000517, 0xCC1B7272 /* 99 */ |
766 | .long 0x00000000, 0x00000A2F, 0x9836E4E4 /* 100 */ |
767 | .long 0x00000000, 0x0000145F, 0x306DC9C8 /* 101 */ |
768 | .long 0x00000000, 0x000028BE, 0x60DB9391 /* 102 */ |
769 | .long 0x00000000, 0x0000517C, 0xC1B72722 /* 103 */ |
770 | .long 0x00000000, 0x0000A2F9, 0x836E4E44 /* 104 */ |
771 | .long 0x00000000, 0x000145F3, 0x06DC9C88 /* 105 */ |
772 | .long 0x00000000, 0x00028BE6, 0x0DB93910 /* 106 */ |
773 | .long 0x00000000, 0x000517CC, 0x1B727220 /* 107 */ |
774 | .long 0x00000000, 0x000A2F98, 0x36E4E441 /* 108 */ |
775 | .long 0x00000000, 0x00145F30, 0x6DC9C882 /* 109 */ |
776 | .long 0x00000000, 0x0028BE60, 0xDB939105 /* 110 */ |
777 | .long 0x00000000, 0x00517CC1, 0xB727220A /* 111 */ |
778 | .long 0x00000000, 0x00A2F983, 0x6E4E4415 /* 112 */ |
779 | .long 0x00000000, 0x0145F306, 0xDC9C882A /* 113 */ |
780 | .long 0x00000000, 0x028BE60D, 0xB9391054 /* 114 */ |
781 | .long 0x00000000, 0x0517CC1B, 0x727220A9 /* 115 */ |
782 | .long 0x00000000, 0x0A2F9836, 0xE4E44152 /* 116 */ |
783 | .long 0x00000000, 0x145F306D, 0xC9C882A5 /* 117 */ |
784 | .long 0x00000000, 0x28BE60DB, 0x9391054A /* 118 */ |
785 | .long 0x00000000, 0x517CC1B7, 0x27220A94 /* 119 */ |
786 | .long 0x00000000, 0xA2F9836E, 0x4E441529 /* 120 */ |
787 | .long 0x00000001, 0x45F306DC, 0x9C882A53 /* 121 */ |
788 | .long 0x00000002, 0x8BE60DB9, 0x391054A7 /* 122 */ |
789 | .long 0x00000005, 0x17CC1B72, 0x7220A94F /* 123 */ |
790 | .long 0x0000000A, 0x2F9836E4, 0xE441529F /* 124 */ |
791 | .long 0x00000014, 0x5F306DC9, 0xC882A53F /* 125 */ |
792 | .long 0x00000028, 0xBE60DB93, 0x91054A7F /* 126 */ |
793 | .long 0x00000051, 0x7CC1B727, 0x220A94FE /* 127 */ |
794 | .long 0x000000A2, 0xF9836E4E, 0x441529FC /* 128 */ |
795 | .long 0x00000145, 0xF306DC9C, 0x882A53F8 /* 129 */ |
796 | .long 0x0000028B, 0xE60DB939, 0x1054A7F0 /* 130 */ |
797 | .long 0x00000517, 0xCC1B7272, 0x20A94FE1 /* 131 */ |
798 | .long 0x00000A2F, 0x9836E4E4, 0x41529FC2 /* 132 */ |
799 | .long 0x0000145F, 0x306DC9C8, 0x82A53F84 /* 133 */ |
800 | .long 0x000028BE, 0x60DB9391, 0x054A7F09 /* 134 */ |
801 | .long 0x0000517C, 0xC1B72722, 0x0A94FE13 /* 135 */ |
802 | .long 0x0000A2F9, 0x836E4E44, 0x1529FC27 /* 136 */ |
803 | .long 0x000145F3, 0x06DC9C88, 0x2A53F84E /* 137 */ |
804 | .long 0x00028BE6, 0x0DB93910, 0x54A7F09D /* 138 */ |
805 | .long 0x000517CC, 0x1B727220, 0xA94FE13A /* 139 */ |
806 | .long 0x000A2F98, 0x36E4E441, 0x529FC275 /* 140 */ |
807 | .long 0x00145F30, 0x6DC9C882, 0xA53F84EA /* 141 */ |
808 | .long 0x0028BE60, 0xDB939105, 0x4A7F09D5 /* 142 */ |
809 | .long 0x00517CC1, 0xB727220A, 0x94FE13AB /* 143 */ |
810 | .long 0x00A2F983, 0x6E4E4415, 0x29FC2757 /* 144 */ |
811 | .long 0x0145F306, 0xDC9C882A, 0x53F84EAF /* 145 */ |
812 | .long 0x028BE60D, 0xB9391054, 0xA7F09D5F /* 146 */ |
813 | .long 0x0517CC1B, 0x727220A9, 0x4FE13ABE /* 147 */ |
814 | .long 0x0A2F9836, 0xE4E44152, 0x9FC2757D /* 148 */ |
815 | .long 0x145F306D, 0xC9C882A5, 0x3F84EAFA /* 149 */ |
816 | .long 0x28BE60DB, 0x9391054A, 0x7F09D5F4 /* 150 */ |
817 | .long 0x517CC1B7, 0x27220A94, 0xFE13ABE8 /* 151 */ |
818 | .long 0xA2F9836E, 0x4E441529, 0xFC2757D1 /* 152 */ |
819 | .long 0x45F306DC, 0x9C882A53, 0xF84EAFA3 /* 153 */ |
820 | .long 0x8BE60DB9, 0x391054A7, 0xF09D5F47 /* 154 */ |
821 | .long 0x17CC1B72, 0x7220A94F, 0xE13ABE8F /* 155 */ |
822 | .long 0x2F9836E4, 0xE441529F, 0xC2757D1F /* 156 */ |
823 | .long 0x5F306DC9, 0xC882A53F, 0x84EAFA3E /* 157 */ |
824 | .long 0xBE60DB93, 0x91054A7F, 0x09D5F47D /* 158 */ |
825 | .long 0x7CC1B727, 0x220A94FE, 0x13ABE8FA /* 159 */ |
826 | .long 0xF9836E4E, 0x441529FC, 0x2757D1F5 /* 160 */ |
827 | .long 0xF306DC9C, 0x882A53F8, 0x4EAFA3EA /* 161 */ |
828 | .long 0xE60DB939, 0x1054A7F0, 0x9D5F47D4 /* 162 */ |
829 | .long 0xCC1B7272, 0x20A94FE1, 0x3ABE8FA9 /* 163 */ |
830 | .long 0x9836E4E4, 0x41529FC2, 0x757D1F53 /* 164 */ |
831 | .long 0x306DC9C8, 0x82A53F84, 0xEAFA3EA6 /* 165 */ |
832 | .long 0x60DB9391, 0x054A7F09, 0xD5F47D4D /* 166 */ |
833 | .long 0xC1B72722, 0x0A94FE13, 0xABE8FA9A /* 167 */ |
834 | .long 0x836E4E44, 0x1529FC27, 0x57D1F534 /* 168 */ |
835 | .long 0x06DC9C88, 0x2A53F84E, 0xAFA3EA69 /* 169 */ |
836 | .long 0x0DB93910, 0x54A7F09D, 0x5F47D4D3 /* 170 */ |
837 | .long 0x1B727220, 0xA94FE13A, 0xBE8FA9A6 /* 171 */ |
838 | .long 0x36E4E441, 0x529FC275, 0x7D1F534D /* 172 */ |
839 | .long 0x6DC9C882, 0xA53F84EA, 0xFA3EA69B /* 173 */ |
840 | .long 0xDB939105, 0x4A7F09D5, 0xF47D4D37 /* 174 */ |
841 | .long 0xB727220A, 0x94FE13AB, 0xE8FA9A6E /* 175 */ |
842 | .long 0x6E4E4415, 0x29FC2757, 0xD1F534DD /* 176 */ |
843 | .long 0xDC9C882A, 0x53F84EAF, 0xA3EA69BB /* 177 */ |
844 | .long 0xB9391054, 0xA7F09D5F, 0x47D4D377 /* 178 */ |
845 | .long 0x727220A9, 0x4FE13ABE, 0x8FA9A6EE /* 179 */ |
846 | .long 0xE4E44152, 0x9FC2757D, 0x1F534DDC /* 180 */ |
847 | .long 0xC9C882A5, 0x3F84EAFA, 0x3EA69BB8 /* 181 */ |
848 | .long 0x9391054A, 0x7F09D5F4, 0x7D4D3770 /* 182 */ |
849 | .long 0x27220A94, 0xFE13ABE8, 0xFA9A6EE0 /* 183 */ |
850 | .long 0x4E441529, 0xFC2757D1, 0xF534DDC0 /* 184 */ |
851 | .long 0x9C882A53, 0xF84EAFA3, 0xEA69BB81 /* 185 */ |
852 | .long 0x391054A7, 0xF09D5F47, 0xD4D37703 /* 186 */ |
853 | .long 0x7220A94F, 0xE13ABE8F, 0xA9A6EE06 /* 187 */ |
854 | .long 0xE441529F, 0xC2757D1F, 0x534DDC0D /* 188 */ |
855 | .long 0xC882A53F, 0x84EAFA3E, 0xA69BB81B /* 189 */ |
856 | .long 0x91054A7F, 0x09D5F47D, 0x4D377036 /* 190 */ |
857 | .long 0x220A94FE, 0x13ABE8FA, 0x9A6EE06D /* 191 */ |
858 | .long 0x441529FC, 0x2757D1F5, 0x34DDC0DB /* 192 */ |
859 | .long 0x882A53F8, 0x4EAFA3EA, 0x69BB81B6 /* 193 */ |
860 | .long 0x1054A7F0, 0x9D5F47D4, 0xD377036D /* 194 */ |
861 | .long 0x20A94FE1, 0x3ABE8FA9, 0xA6EE06DB /* 195 */ |
862 | .long 0x41529FC2, 0x757D1F53, 0x4DDC0DB6 /* 196 */ |
863 | .long 0x82A53F84, 0xEAFA3EA6, 0x9BB81B6C /* 197 */ |
864 | .long 0x054A7F09, 0xD5F47D4D, 0x377036D8 /* 198 */ |
865 | .long 0x0A94FE13, 0xABE8FA9A, 0x6EE06DB1 /* 199 */ |
866 | .long 0x1529FC27, 0x57D1F534, 0xDDC0DB62 /* 200 */ |
867 | .long 0x2A53F84E, 0xAFA3EA69, 0xBB81B6C5 /* 201 */ |
868 | .long 0x54A7F09D, 0x5F47D4D3, 0x77036D8A /* 202 */ |
869 | .long 0xA94FE13A, 0xBE8FA9A6, 0xEE06DB14 /* 203 */ |
870 | .long 0x529FC275, 0x7D1F534D, 0xDC0DB629 /* 204 */ |
871 | .long 0xA53F84EA, 0xFA3EA69B, 0xB81B6C52 /* 205 */ |
872 | .long 0x4A7F09D5, 0xF47D4D37, 0x7036D8A5 /* 206 */ |
873 | .long 0x94FE13AB, 0xE8FA9A6E, 0xE06DB14A /* 207 */ |
874 | .long 0x29FC2757, 0xD1F534DD, 0xC0DB6295 /* 208 */ |
875 | .long 0x53F84EAF, 0xA3EA69BB, 0x81B6C52B /* 209 */ |
876 | .long 0xA7F09D5F, 0x47D4D377, 0x036D8A56 /* 210 */ |
877 | .long 0x4FE13ABE, 0x8FA9A6EE, 0x06DB14AC /* 211 */ |
878 | .long 0x9FC2757D, 0x1F534DDC, 0x0DB62959 /* 212 */ |
879 | .long 0x3F84EAFA, 0x3EA69BB8, 0x1B6C52B3 /* 213 */ |
880 | .long 0x7F09D5F4, 0x7D4D3770, 0x36D8A566 /* 214 */ |
881 | .long 0xFE13ABE8, 0xFA9A6EE0, 0x6DB14ACC /* 215 */ |
882 | .long 0xFC2757D1, 0xF534DDC0, 0xDB629599 /* 216 */ |
883 | .long 0xF84EAFA3, 0xEA69BB81, 0xB6C52B32 /* 217 */ |
884 | .long 0xF09D5F47, 0xD4D37703, 0x6D8A5664 /* 218 */ |
885 | .long 0xE13ABE8F, 0xA9A6EE06, 0xDB14ACC9 /* 219 */ |
886 | .long 0xC2757D1F, 0x534DDC0D, 0xB6295993 /* 220 */ |
887 | .long 0x84EAFA3E, 0xA69BB81B, 0x6C52B327 /* 221 */ |
888 | .long 0x09D5F47D, 0x4D377036, 0xD8A5664F /* 222 */ |
889 | .long 0x13ABE8FA, 0x9A6EE06D, 0xB14ACC9E /* 223 */ |
890 | .long 0x2757D1F5, 0x34DDC0DB, 0x6295993C /* 224 */ |
891 | .long 0x4EAFA3EA, 0x69BB81B6, 0xC52B3278 /* 225 */ |
892 | .long 0x9D5F47D4, 0xD377036D, 0x8A5664F1 /* 226 */ |
893 | .long 0x3ABE8FA9, 0xA6EE06DB, 0x14ACC9E2 /* 227 */ |
894 | .long 0x757D1F53, 0x4DDC0DB6, 0x295993C4 /* 228 */ |
895 | .long 0xEAFA3EA6, 0x9BB81B6C, 0x52B32788 /* 229 */ |
896 | .long 0xD5F47D4D, 0x377036D8, 0xA5664F10 /* 230 */ |
897 | .long 0xABE8FA9A, 0x6EE06DB1, 0x4ACC9E21 /* 231 */ |
898 | .long 0x57D1F534, 0xDDC0DB62, 0x95993C43 /* 232 */ |
899 | .long 0xAFA3EA69, 0xBB81B6C5, 0x2B327887 /* 233 */ |
900 | .long 0x5F47D4D3, 0x77036D8A, 0x5664F10E /* 234 */ |
901 | .long 0xBE8FA9A6, 0xEE06DB14, 0xACC9E21C /* 235 */ |
902 | .long 0x7D1F534D, 0xDC0DB629, 0x5993C439 /* 236 */ |
903 | .long 0xFA3EA69B, 0xB81B6C52, 0xB3278872 /* 237 */ |
904 | .long 0xF47D4D37, 0x7036D8A5, 0x664F10E4 /* 238 */ |
905 | .long 0xE8FA9A6E, 0xE06DB14A, 0xCC9E21C8 /* 239 */ |
906 | .long 0xD1F534DD, 0xC0DB6295, 0x993C4390 /* 240 */ |
907 | .long 0xA3EA69BB, 0x81B6C52B, 0x32788720 /* 241 */ |
908 | .long 0x47D4D377, 0x036D8A56, 0x64F10E41 /* 242 */ |
909 | .long 0x8FA9A6EE, 0x06DB14AC, 0xC9E21C82 /* 243 */ |
910 | .long 0x1F534DDC, 0x0DB62959, 0x93C43904 /* 244 */ |
911 | .long 0x3EA69BB8, 0x1B6C52B3, 0x27887208 /* 245 */ |
912 | .long 0x7D4D3770, 0x36D8A566, 0x4F10E410 /* 246 */ |
913 | .long 0xFA9A6EE0, 0x6DB14ACC, 0x9E21C820 /* 247 */ |
914 | .long 0xF534DDC0, 0xDB629599, 0x3C439041 /* 248 */ |
915 | .long 0xEA69BB81, 0xB6C52B32, 0x78872083 /* 249 */ |
916 | .long 0xD4D37703, 0x6D8A5664, 0xF10E4107 /* 250 */ |
917 | .long 0xA9A6EE06, 0xDB14ACC9, 0xE21C820F /* 251 */ |
918 | .long 0x534DDC0D, 0xB6295993, 0xC439041F /* 252 */ |
919 | .long 0xA69BB81B, 0x6C52B327, 0x8872083F /* 253 */ |
920 | .long 0x4D377036, 0xD8A5664F, 0x10E4107F /* 254 */ |
921 | .long 0x9A6EE06D, 0xB14ACC9E, 0x21C820FF /* 255 */ |
922 | .align 64 |
923 | .type __svml_stan_reduction_data_internal, @object |
924 | .size __svml_stan_reduction_data_internal, .-__svml_stan_reduction_data_internal |
925 | |