1 | /* Function atan2f vectorized with AVX-512. |
2 | Copyright (C) 2021-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | https://www.gnu.org/licenses/. */ |
18 | |
19 | /* |
20 | * ALGORITHM DESCRIPTION: |
21 | * For 0.0 <= x <= 7.0/16.0: atan(x) = atan(0.0) + atan(s), where s=(x-0.0)/(1.0+0.0*x) |
22 | * For 7.0/16.0 <= x <= 11.0/16.0: atan(x) = atan(0.5) + atan(s), where s=(x-0.5)/(1.0+0.5*x) |
23 | * For 11.0/16.0 <= x <= 19.0/16.0: atan(x) = atan(1.0) + atan(s), where s=(x-1.0)/(1.0+1.0*x) |
24 | * For 19.0/16.0 <= x <= 39.0/16.0: atan(x) = atan(1.5) + atan(s), where s=(x-1.5)/(1.0+1.5*x) |
25 | * For 39.0/16.0 <= x <= inf : atan(x) = atan(inf) + atan(s), where s=-1.0/x |
26 | * Where atan(s) ~= s+s^3*Poly11(s^2) on interval |s|<7.0/0.16. |
27 | * |
28 | * |
29 | */ |
30 | |
31 | /* Offsets for data table __svml_satan2_data_internal |
32 | */ |
33 | #define sZERO 0 |
34 | #define sONE 64 |
35 | #define sSIGN_MASK 128 |
36 | #define sABS_MASK 192 |
37 | #define sPIO2 256 |
38 | #define sPI 320 |
39 | #define sPC8 384 |
40 | #define sPC7 448 |
41 | #define sPC6 512 |
42 | #define sPC5 576 |
43 | #define sPC4 640 |
44 | #define sPC3 704 |
45 | #define sPC2 768 |
46 | #define sPC1 832 |
47 | #define sPC0 896 |
48 | #define iCHK_WORK_SUB 960 |
49 | #define iCHK_WORK_CMP 1024 |
50 | |
51 | #include <sysdep.h> |
52 | |
53 | .section .text.evex512, "ax" , @progbits |
54 | ENTRY(_ZGVeN16vv_atan2f_skx) |
55 | pushq %rbp |
56 | cfi_def_cfa_offset(16) |
57 | movq %rsp, %rbp |
58 | cfi_def_cfa(6, 16) |
59 | cfi_offset(6, -16) |
60 | andq $-64, %rsp |
61 | subq $256, %rsp |
62 | xorl %edx, %edx |
63 | |
64 | /* |
65 | * #define NO_VECTOR_ZERO_ATAN2_ARGS |
66 | * Declarations |
67 | * Variables |
68 | * Constants |
69 | * The end of declarations |
70 | * Implementation |
71 | * Arguments signs |
72 | */ |
73 | vmovups sABS_MASK+__svml_satan2_data_internal(%rip), %zmm6 |
74 | vmovups sONE+__svml_satan2_data_internal(%rip), %zmm3 |
75 | |
76 | /* Testing on working interval. */ |
77 | vmovups iCHK_WORK_SUB+__svml_satan2_data_internal(%rip), %zmm9 |
78 | vmovups iCHK_WORK_CMP+__svml_satan2_data_internal(%rip), %zmm14 |
79 | |
80 | /* |
81 | * 1) If y<x then a= y, b=x, PIO2=0 |
82 | * 2) If y>x then a=-x, b=y, PIO2=Pi/2 |
83 | */ |
84 | vmovups sPIO2+__svml_satan2_data_internal(%rip), %zmm4 |
85 | vpternlogd $255, %zmm13, %zmm13, %zmm13 |
86 | vmovaps %zmm1, %zmm8 |
87 | vandps %zmm6, %zmm8, %zmm2 |
88 | vandps %zmm6, %zmm0, %zmm1 |
89 | vorps sSIGN_MASK+__svml_satan2_data_internal(%rip), %zmm2, %zmm5 |
90 | vpsubd %zmm9, %zmm2, %zmm10 |
91 | vpsubd %zmm9, %zmm1, %zmm12 |
92 | vxorps %zmm2, %zmm8, %zmm7 |
93 | vxorps %zmm1, %zmm0, %zmm6 |
94 | vcmpps $17, {sae}, %zmm2, %zmm1, %k1 |
95 | vpcmpgtd %zmm10, %zmm14, %k2 |
96 | vpcmpgtd %zmm12, %zmm14, %k3 |
97 | vmovups sPC6+__svml_satan2_data_internal(%rip), %zmm14 |
98 | vblendmps %zmm1, %zmm5, %zmm11{%k1} |
99 | vblendmps %zmm2, %zmm1, %zmm5{%k1} |
100 | vxorps %zmm4, %zmm4, %zmm4{%k1} |
101 | |
102 | /* |
103 | * Division a/b. |
104 | * Enabled when FMA is available and |
105 | * performance is better with NR iteration |
106 | */ |
107 | vrcp14ps %zmm5, %zmm15 |
108 | vfnmadd231ps {rn-sae}, %zmm5, %zmm15, %zmm3 |
109 | vfmadd213ps {rn-sae}, %zmm15, %zmm3, %zmm15 |
110 | vmulps {rn-sae}, %zmm15, %zmm11, %zmm3 |
111 | vfnmadd231ps {rn-sae}, %zmm5, %zmm3, %zmm11 |
112 | vfmadd213ps {rn-sae}, %zmm3, %zmm11, %zmm15 |
113 | vmovups sPC8+__svml_satan2_data_internal(%rip), %zmm11 |
114 | vpternlogd $255, %zmm3, %zmm3, %zmm3 |
115 | |
116 | /* Polynomial. */ |
117 | vmulps {rn-sae}, %zmm15, %zmm15, %zmm9 |
118 | vpandnd %zmm10, %zmm10, %zmm13{%k2} |
119 | vmulps {rn-sae}, %zmm9, %zmm9, %zmm10 |
120 | vfmadd231ps {rn-sae}, %zmm10, %zmm11, %zmm14 |
121 | vmovups sPC5+__svml_satan2_data_internal(%rip), %zmm11 |
122 | vpandnd %zmm12, %zmm12, %zmm3{%k3} |
123 | vpord %zmm3, %zmm13, %zmm3 |
124 | vmovups sPC4+__svml_satan2_data_internal(%rip), %zmm13 |
125 | vmovups sPC7+__svml_satan2_data_internal(%rip), %zmm12 |
126 | vptestmd %zmm3, %zmm3, %k0 |
127 | vfmadd213ps {rn-sae}, %zmm13, %zmm10, %zmm14 |
128 | vfmadd231ps {rn-sae}, %zmm10, %zmm12, %zmm11 |
129 | vmovups sPC3+__svml_satan2_data_internal(%rip), %zmm12 |
130 | vmovups sPC2+__svml_satan2_data_internal(%rip), %zmm13 |
131 | |
132 | /* Special branch for fast (vector) processing of zero arguments */ |
133 | kortestw %k0, %k0 |
134 | vfmadd213ps {rn-sae}, %zmm12, %zmm10, %zmm11 |
135 | vmovups sPC1+__svml_satan2_data_internal(%rip), %zmm12 |
136 | vfmadd213ps {rn-sae}, %zmm13, %zmm10, %zmm14 |
137 | vmovups sPC0+__svml_satan2_data_internal(%rip), %zmm13 |
138 | vfmadd213ps {rn-sae}, %zmm12, %zmm10, %zmm11 |
139 | vfmadd213ps {rn-sae}, %zmm13, %zmm10, %zmm14 |
140 | vfmadd213ps {rn-sae}, %zmm14, %zmm9, %zmm11 |
141 | |
142 | /* Reconstruction. */ |
143 | vfmadd213ps {rn-sae}, %zmm4, %zmm15, %zmm11 |
144 | |
145 | /* if x<0, sPI = Pi, else sPI =0 */ |
146 | vmovups __svml_satan2_data_internal(%rip), %zmm15 |
147 | vorps %zmm7, %zmm11, %zmm9 |
148 | vcmpps $18, {sae}, %zmm15, %zmm8, %k4 |
149 | vmovups sPI+__svml_satan2_data_internal(%rip), %zmm11 |
150 | vaddps {rn-sae}, %zmm11, %zmm9, %zmm9{%k4} |
151 | vorps %zmm6, %zmm9, %zmm10 |
152 | |
153 | /* Go to auxiliary branch */ |
154 | jne L(AUX_BRANCH) |
155 | # LOE rbx r12 r13 r14 r15 edx zmm0 zmm1 zmm2 zmm3 zmm4 zmm5 zmm6 zmm7 zmm8 zmm10 zmm11 |
156 | |
157 | /* Return from auxiliary branch |
158 | * for out of main path inputs |
159 | */ |
160 | |
161 | L(AUX_BRANCH_RETURN): |
162 | /* |
163 | * Special branch for fast (vector) processing of zero arguments |
164 | * The end of implementation |
165 | */ |
166 | testl %edx, %edx |
167 | |
168 | /* Go to special inputs processing branch */ |
169 | jne L(SPECIAL_VALUES_BRANCH) |
170 | # LOE rbx r12 r13 r14 r15 edx zmm0 zmm8 zmm10 |
171 | |
172 | /* Restore registers |
173 | * and exit the function |
174 | */ |
175 | |
176 | L(EXIT): |
177 | vmovaps %zmm10, %zmm0 |
178 | movq %rbp, %rsp |
179 | popq %rbp |
180 | cfi_def_cfa(7, 8) |
181 | cfi_restore(6) |
182 | ret |
183 | cfi_def_cfa(6, 16) |
184 | cfi_offset(6, -16) |
185 | |
186 | /* Branch to process |
187 | * special inputs |
188 | */ |
189 | |
190 | L(SPECIAL_VALUES_BRANCH): |
191 | vmovups %zmm0, 64(%rsp) |
192 | vmovups %zmm8, 128(%rsp) |
193 | vmovups %zmm10, 192(%rsp) |
194 | # LOE rbx r12 r13 r14 r15 edx zmm10 |
195 | |
196 | xorl %eax, %eax |
197 | # LOE rbx r12 r13 r14 r15 eax edx |
198 | |
199 | vzeroupper |
200 | movq %r12, 16(%rsp) |
201 | /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -240; DW_OP_plus) */ |
202 | .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x10, 0xff, 0xff, 0xff, 0x22 |
203 | movl %eax, %r12d |
204 | movq %r13, 8(%rsp) |
205 | /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -248; DW_OP_plus) */ |
206 | .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x08, 0xff, 0xff, 0xff, 0x22 |
207 | movl %edx, %r13d |
208 | movq %r14, (%rsp) |
209 | /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -256; DW_OP_plus) */ |
210 | .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x00, 0xff, 0xff, 0xff, 0x22 |
211 | # LOE rbx r15 r12d r13d |
212 | |
213 | /* Range mask |
214 | * bits check |
215 | */ |
216 | |
217 | L(RANGEMASK_CHECK): |
218 | btl %r12d, %r13d |
219 | |
220 | /* Call scalar math function */ |
221 | jc L(SCALAR_MATH_CALL) |
222 | # LOE rbx r15 r12d r13d |
223 | |
224 | /* Special inputs |
225 | * processing loop |
226 | */ |
227 | |
228 | L(SPECIAL_VALUES_LOOP): |
229 | incl %r12d |
230 | cmpl $16, %r12d |
231 | |
232 | /* Check bits in range mask */ |
233 | jl L(RANGEMASK_CHECK) |
234 | # LOE rbx r15 r12d r13d |
235 | |
236 | movq 16(%rsp), %r12 |
237 | cfi_restore(12) |
238 | movq 8(%rsp), %r13 |
239 | cfi_restore(13) |
240 | movq (%rsp), %r14 |
241 | cfi_restore(14) |
242 | vmovups 192(%rsp), %zmm10 |
243 | |
244 | /* Go to exit */ |
245 | jmp L(EXIT) |
246 | /* DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -240; DW_OP_plus) */ |
247 | .cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x10, 0xff, 0xff, 0xff, 0x22 |
248 | /* DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -248; DW_OP_plus) */ |
249 | .cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x08, 0xff, 0xff, 0xff, 0x22 |
250 | /* DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -256; DW_OP_plus) */ |
251 | .cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x00, 0xff, 0xff, 0xff, 0x22 |
252 | # LOE rbx r12 r13 r14 r15 zmm10 |
253 | |
254 | /* Scalar math function call |
255 | * to process special input |
256 | */ |
257 | |
258 | L(SCALAR_MATH_CALL): |
259 | movl %r12d, %r14d |
260 | vmovss 64(%rsp, %r14, 4), %xmm0 |
261 | vmovss 128(%rsp, %r14, 4), %xmm1 |
262 | call atan2f@PLT |
263 | # LOE rbx r14 r15 r12d r13d xmm0 |
264 | |
265 | vmovss %xmm0, 192(%rsp, %r14, 4) |
266 | |
267 | /* Process special inputs in loop */ |
268 | jmp L(SPECIAL_VALUES_LOOP) |
269 | cfi_restore(12) |
270 | cfi_restore(13) |
271 | cfi_restore(14) |
272 | # LOE rbx r15 r12d r13d |
273 | |
274 | /* Auxiliary branch |
275 | * for out of main path inputs |
276 | */ |
277 | |
278 | L(AUX_BRANCH): |
279 | /* Check if at least on of Y or Y is zero: iAXAYZERO */ |
280 | vmovups __svml_satan2_data_internal(%rip), %zmm9 |
281 | |
282 | /* Check if both X & Y are not NaNs: iXYnotNAN */ |
283 | vcmpps $3, {sae}, %zmm8, %zmm8, %k1 |
284 | vcmpps $3, {sae}, %zmm0, %zmm0, %k2 |
285 | vpcmpd $4, %zmm9, %zmm2, %k3 |
286 | vpcmpd $4, %zmm9, %zmm1, %k4 |
287 | |
288 | /* |
289 | * Path for zero arguments (at least one of both) |
290 | * Check if both args are zeros (den. is zero) |
291 | */ |
292 | vcmpps $4, {sae}, %zmm9, %zmm5, %k5 |
293 | |
294 | /* Res = sign(Y)*(X<0)?(PIO2+PI):PIO2 */ |
295 | vpcmpgtd %zmm8, %zmm9, %k6 |
296 | vpternlogd $255, %zmm14, %zmm14, %zmm14 |
297 | vpternlogd $255, %zmm12, %zmm12, %zmm12 |
298 | vpternlogd $255, %zmm13, %zmm13, %zmm13 |
299 | vpandnd %zmm2, %zmm2, %zmm14{%k3} |
300 | vpternlogd $255, %zmm2, %zmm2, %zmm2 |
301 | vpandnd %zmm1, %zmm1, %zmm2{%k4} |
302 | vpord %zmm2, %zmm14, %zmm15 |
303 | vpternlogd $255, %zmm2, %zmm2, %zmm2 |
304 | vpandnd %zmm5, %zmm5, %zmm2{%k5} |
305 | |
306 | /* Set sPIO2 to zero if den. is zero */ |
307 | vpandnd %zmm4, %zmm2, %zmm4 |
308 | vpandd %zmm2, %zmm9, %zmm5 |
309 | vpord %zmm5, %zmm4, %zmm2 |
310 | vorps %zmm7, %zmm2, %zmm7 |
311 | vaddps {rn-sae}, %zmm11, %zmm7, %zmm7{%k6} |
312 | vorps %zmm6, %zmm7, %zmm6 |
313 | vpandnd %zmm8, %zmm8, %zmm12{%k1} |
314 | vpandnd %zmm0, %zmm0, %zmm13{%k2} |
315 | vandps %zmm13, %zmm12, %zmm12 |
316 | |
317 | /* Check if at least on of Y or Y is zero and not NaN: iAXAYZEROnotNAN */ |
318 | vpandd %zmm12, %zmm15, %zmm1 |
319 | |
320 | /* Exclude from previous callout mask zero (and not NaN) arguments */ |
321 | vpandnd %zmm3, %zmm1, %zmm3 |
322 | |
323 | /* Go to callout */ |
324 | vptestmd %zmm3, %zmm3, %k0 |
325 | kmovw %k0, %edx |
326 | |
327 | /* Merge results from main and spec path */ |
328 | vpandnd %zmm10, %zmm1, %zmm10 |
329 | vpandd %zmm1, %zmm6, %zmm11 |
330 | vpord %zmm11, %zmm10, %zmm10 |
331 | |
332 | /* Return to main vector processing path */ |
333 | jmp L(AUX_BRANCH_RETURN) |
334 | # LOE rbx r12 r13 r14 r15 edx zmm0 zmm8 zmm10 |
335 | END(_ZGVeN16vv_atan2f_skx) |
336 | |
337 | .section .rodata, "a" |
338 | .align 64 |
339 | |
340 | #ifdef __svml_satan2_data_internal_typedef |
341 | typedef unsigned int VUINT32; |
342 | typedef struct { |
343 | __declspec(align(64)) VUINT32 sZERO[16][1]; |
344 | __declspec(align(64)) VUINT32 sONE[16][1]; |
345 | __declspec(align(64)) VUINT32 sSIGN_MASK[16][1]; |
346 | __declspec(align(64)) VUINT32 sABS_MASK[16][1]; |
347 | __declspec(align(64)) VUINT32 sPIO2[16][1]; |
348 | __declspec(align(64)) VUINT32 sPI[16][1]; |
349 | __declspec(align(64)) VUINT32 sPC8[16][1]; |
350 | __declspec(align(64)) VUINT32 sPC7[16][1]; |
351 | __declspec(align(64)) VUINT32 sPC6[16][1]; |
352 | __declspec(align(64)) VUINT32 sPC5[16][1]; |
353 | __declspec(align(64)) VUINT32 sPC4[16][1]; |
354 | __declspec(align(64)) VUINT32 sPC3[16][1]; |
355 | __declspec(align(64)) VUINT32 sPC2[16][1]; |
356 | __declspec(align(64)) VUINT32 sPC1[16][1]; |
357 | __declspec(align(64)) VUINT32 sPC0[16][1]; |
358 | __declspec(align(64)) VUINT32 iCHK_WORK_SUB[16][1]; |
359 | __declspec(align(64)) VUINT32 iCHK_WORK_CMP[16][1]; |
360 | } __svml_satan2_data_internal; |
361 | #endif |
362 | __svml_satan2_data_internal: |
363 | .long 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 // sZERO |
364 | .align 64 |
365 | .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 // sONE |
366 | .align 64 |
367 | .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000 // sSIGN_MASK |
368 | .align 64 |
369 | .long 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF // sABS_MASK |
370 | .align 64 |
371 | .long 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB, 0x3FC90FDB // sPIO2 |
372 | .align 64 |
373 | .long 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB, 0x40490FDB // sPI |
374 | .align 64 |
375 | .long 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0, 0x3B322CC0 // sA08 |
376 | .align 64 |
377 | .long 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631, 0xBC7F2631 // sA07 |
378 | .align 64 |
379 | .long 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384, 0x3D2BC384 // sA06 |
380 | .align 64 |
381 | .long 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629, 0xBD987629 // sA05 |
382 | .align 64 |
383 | .long 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474, 0x3DD96474 // sA04 |
384 | .align 64 |
385 | .long 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8, 0xBE1161F8 // sA03 |
386 | .align 64 |
387 | .long 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F, 0x3E4CB79F // sA02 |
388 | .align 64 |
389 | .long 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49, 0xBEAAAA49 // sA01 |
390 | .align 64 |
391 | .long 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000 // sA00 |
392 | .align 64 |
393 | .long 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000, 0x81000000 // iCHK_WORK_SUB |
394 | .align 64 |
395 | .long 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000, 0xFC000000 // iCHK_WORK_CMP |
396 | .align 64 |
397 | .type __svml_satan2_data_internal, @object |
398 | .size __svml_satan2_data_internal, .-__svml_satan2_data_internal |
399 | |