1/* Function sincos vectorized with AVX-512. Wrapper to AVX2 version.
2 Copyright (C) 2014-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#include "svml_d_wrapper_impl.h"
21
22 .text
23ENTRY (_ZGVeN8vl8l8_sincos)
24WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
25END (_ZGVeN8vl8l8_sincos)
26
27/* AVX512 ISA version as wrapper to AVX2 ISA version (for vector
28 function declared with #pragma omp declare simd notinbranch). */
29.macro WRAPPER_IMPL_AVX512_fFF_vvv callee
30#ifndef __ILP32__
31 pushq %rbp
32 cfi_adjust_cfa_offset (8)
33 cfi_rel_offset (%rbp, 0)
34 movq %rsp, %rbp
35 cfi_def_cfa_register (%rbp)
36 andq $-64, %rsp
37 subq $320, %rsp
38 vmovups %zmm0, 256(%rsp)
39 lea (%rsp), %rdi
40 vmovups %zmm1, 128(%rdi)
41 vmovups %zmm2, 192(%rdi)
42 lea 64(%rsp), %rsi
43 call HIDDEN_JUMPTARGET(\callee)
44 vmovdqu 288(%rsp), %ymm0
45 lea 32(%rsp), %rdi
46 lea 96(%rsp), %rsi
47 call HIDDEN_JUMPTARGET(\callee)
48 movq 128(%rsp), %rdx
49 movq 192(%rsp), %rsi
50 movq 136(%rsp), %r8
51 movq 200(%rsp), %r10
52 movq (%rsp), %rax
53 movq 64(%rsp), %rcx
54 movq 8(%rsp), %rdi
55 movq 72(%rsp), %r9
56 movq %rax, (%rdx)
57 movq %rcx, (%rsi)
58 movq 144(%rsp), %rax
59 movq 208(%rsp), %rcx
60 movq %rdi, (%r8)
61 movq %r9, (%r10)
62 movq 152(%rsp), %rdi
63 movq 216(%rsp), %r9
64 movq 16(%rsp), %r11
65 movq 80(%rsp), %rdx
66 movq 24(%rsp), %rsi
67 movq 88(%rsp), %r8
68 movq %r11, (%rax)
69 movq %rdx, (%rcx)
70 movq 160(%rsp), %r11
71 movq 224(%rsp), %rdx
72 movq %rsi, (%rdi)
73 movq %r8, (%r9)
74 movq 168(%rsp), %rsi
75 movq 232(%rsp), %r8
76 movq 32(%rsp), %r10
77 movq 96(%rsp), %rax
78 movq 40(%rsp), %rcx
79 movq 104(%rsp), %rdi
80 movq %r10, (%r11)
81 movq %rax, (%rdx)
82 movq 176(%rsp), %r10
83 movq 240(%rsp), %rax
84 movq %rcx, (%rsi)
85 movq %rdi, (%r8)
86 movq 184(%rsp), %rcx
87 movq 248(%rsp), %rdi
88 movq 48(%rsp), %r9
89 movq 112(%rsp), %r11
90 movq 56(%rsp), %rdx
91 movq 120(%rsp), %rsi
92 movq %r9, (%r10)
93 movq %r11, (%rax)
94 movq %rdx, (%rcx)
95 movq %rsi, (%rdi)
96 movq %rbp, %rsp
97 cfi_def_cfa_register (%rsp)
98 popq %rbp
99 cfi_adjust_cfa_offset (-8)
100 cfi_restore (%rbp)
101 ret
102#else
103 leal 8(%rsp), %r10d
104 .cfi_def_cfa 10, 0
105 andl $-64, %esp
106 pushq -8(%r10d)
107 pushq %rbp
108 .cfi_escape 0x10,0x6,0x2,0x76,0
109 movl %esp, %ebp
110 pushq %r12
111 leal -112(%rbp), %esi
112 pushq %r10
113 .cfi_escape 0xf,0x3,0x76,0x70,0x6
114 .cfi_escape 0x10,0xc,0x2,0x76,0x78
115 leal -176(%rbp), %edi
116 movq %rsi, %r12
117 pushq %rbx
118 .cfi_escape 0x10,0x3,0x2,0x76,0x68
119 movq %rdi, %rbx
120 subl $280, %esp
121 vmovdqa %ymm1, -208(%ebp)
122 vmovdqa %ymm2, -240(%ebp)
123 vmovapd %zmm0, -304(%ebp)
124 call HIDDEN_JUMPTARGET(\callee)
125 leal 32(%r12), %esi
126 vmovupd -272(%ebp), %ymm0
127 leal 32(%rbx), %edi
128 call HIDDEN_JUMPTARGET(\callee)
129 movl -208(%ebp), %eax
130 vmovsd -176(%ebp), %xmm0
131 vmovsd %xmm0, (%eax)
132 movl -204(%ebp), %eax
133 vmovsd -168(%ebp), %xmm0
134 vmovsd %xmm0, (%eax)
135 movl -200(%ebp), %eax
136 vmovsd -160(%ebp), %xmm0
137 vmovsd %xmm0, (%eax)
138 movl -196(%ebp), %eax
139 vmovsd -152(%ebp), %xmm0
140 vmovsd %xmm0, (%eax)
141 movl -192(%ebp), %eax
142 vmovsd -144(%ebp), %xmm0
143 vmovsd %xmm0, (%eax)
144 movl -188(%ebp), %eax
145 vmovsd -136(%ebp), %xmm0
146 vmovsd %xmm0, (%eax)
147 movl -184(%ebp), %eax
148 vmovsd -128(%ebp), %xmm0
149 vmovsd %xmm0, (%eax)
150 movl -180(%ebp), %eax
151 vmovsd -120(%ebp), %xmm0
152 vmovsd %xmm0, (%eax)
153 movl -240(%ebp), %eax
154 vmovsd -112(%ebp), %xmm0
155 vmovsd %xmm0, (%eax)
156 movl -236(%ebp), %eax
157 vmovsd -104(%ebp), %xmm0
158 vmovsd %xmm0, (%eax)
159 movl -232(%ebp), %eax
160 vmovsd -96(%ebp), %xmm0
161 vmovsd %xmm0, (%eax)
162 movl -228(%ebp), %eax
163 vmovsd -88(%ebp), %xmm0
164 vmovsd %xmm0, (%eax)
165 movl -224(%ebp), %eax
166 vmovsd -80(%ebp), %xmm0
167 vmovsd %xmm0, (%eax)
168 movl -220(%ebp), %eax
169 vmovsd -72(%ebp), %xmm0
170 vmovsd %xmm0, (%eax)
171 movl -216(%ebp), %eax
172 vmovsd -64(%ebp), %xmm0
173 vmovsd %xmm0, (%eax)
174 movl -212(%ebp), %eax
175 vmovsd -56(%ebp), %xmm0
176 vmovsd %xmm0, (%eax)
177 addl $280, %esp
178 popq %rbx
179 popq %r10
180 .cfi_def_cfa 10, 0
181 popq %r12
182 popq %rbp
183 leal -8(%r10), %esp
184 .cfi_def_cfa 7, 8
185 ret
186#endif
187.endm
188
189ENTRY (_ZGVeN8vvv_sincos)
190WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN4vl8l8_sincos
191END (_ZGVeN8vvv_sincos)
192

source code of glibc/sysdeps/x86_64/fpu/svml_d_sincos8_core.S