1/* Wrapper implementations of vector math functions.
2 Copyright (C) 2014-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19/* SSE2 ISA version as wrapper to scalar. */
20.macro WRAPPER_IMPL_SSE2 callee
21 subq $40, %rsp
22 cfi_adjust_cfa_offset(40)
23 movaps %xmm0, (%rsp)
24 call JUMPTARGET(\callee)
25 movsd %xmm0, 16(%rsp)
26 movsd 8(%rsp), %xmm0
27 call JUMPTARGET(\callee)
28 movsd 16(%rsp), %xmm1
29 movsd %xmm0, 24(%rsp)
30 unpcklpd %xmm0, %xmm1
31 movaps %xmm1, %xmm0
32 addq $40, %rsp
33 cfi_adjust_cfa_offset(-40)
34 ret
35.endm
36
37/* 2 argument SSE2 ISA version as wrapper to scalar. */
38.macro WRAPPER_IMPL_SSE2_ff callee
39 subq $56, %rsp
40 cfi_adjust_cfa_offset(56)
41 movaps %xmm0, (%rsp)
42 movaps %xmm1, 16(%rsp)
43 call JUMPTARGET(\callee)
44 movsd %xmm0, 32(%rsp)
45 movsd 8(%rsp), %xmm0
46 movsd 24(%rsp), %xmm1
47 call JUMPTARGET(\callee)
48 movsd 32(%rsp), %xmm1
49 movsd %xmm0, 40(%rsp)
50 unpcklpd %xmm0, %xmm1
51 movaps %xmm1, %xmm0
52 addq $56, %rsp
53 cfi_adjust_cfa_offset(-56)
54 ret
55.endm
56
57/* 3 argument SSE2 ISA version as wrapper to scalar. */
58.macro WRAPPER_IMPL_SSE2_fFF callee
59 pushq %rbp
60 cfi_adjust_cfa_offset (8)
61 cfi_rel_offset (%rbp, 0)
62 pushq %rbx
63 cfi_adjust_cfa_offset (8)
64 cfi_rel_offset (%rbx, 0)
65 movq %rdi, %rbp
66 movq %rsi, %rbx
67 subq $40, %rsp
68 cfi_adjust_cfa_offset(40)
69 leaq 16(%rsp), %rsi
70 leaq 24(%rsp), %rdi
71 movaps %xmm0, (%rsp)
72 call JUMPTARGET(\callee)
73 leaq 16(%rsp), %rsi
74 leaq 24(%rsp), %rdi
75 movsd 24(%rsp), %xmm0
76 movapd (%rsp), %xmm1
77 movsd %xmm0, 0(%rbp)
78 unpckhpd %xmm1, %xmm1
79 movsd 16(%rsp), %xmm0
80 movsd %xmm0, (%rbx)
81 movapd %xmm1, %xmm0
82 call JUMPTARGET(\callee)
83 movsd 24(%rsp), %xmm0
84 movsd %xmm0, 8(%rbp)
85 movsd 16(%rsp), %xmm0
86 movsd %xmm0, 8(%rbx)
87 addq $40, %rsp
88 cfi_adjust_cfa_offset(-40)
89 popq %rbx
90 cfi_adjust_cfa_offset (-8)
91 cfi_restore (%rbx)
92 popq %rbp
93 cfi_adjust_cfa_offset (-8)
94 cfi_restore (%rbp)
95 ret
96.endm
97
98/* AVX/AVX2 ISA version as wrapper to SSE ISA version. */
99.macro WRAPPER_IMPL_AVX callee
100 pushq %rbp
101 cfi_adjust_cfa_offset (8)
102 cfi_rel_offset (%rbp, 0)
103 movq %rsp, %rbp
104 cfi_def_cfa_register (%rbp)
105 andq $-32, %rsp
106 subq $32, %rsp
107 vextractf128 $1, %ymm0, (%rsp)
108 vzeroupper
109 call HIDDEN_JUMPTARGET(\callee)
110 vmovapd %xmm0, 16(%rsp)
111 vmovaps (%rsp), %xmm0
112 call HIDDEN_JUMPTARGET(\callee)
113 vmovapd %xmm0, %xmm1
114 vmovapd 16(%rsp), %xmm0
115 vinsertf128 $1, %xmm1, %ymm0, %ymm0
116 movq %rbp, %rsp
117 cfi_def_cfa_register (%rsp)
118 popq %rbp
119 cfi_adjust_cfa_offset (-8)
120 cfi_restore (%rbp)
121 ret
122.endm
123
124/* 2 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */
125.macro WRAPPER_IMPL_AVX_ff callee
126 pushq %rbp
127 cfi_adjust_cfa_offset (8)
128 cfi_rel_offset (%rbp, 0)
129 movq %rsp, %rbp
130 cfi_def_cfa_register (%rbp)
131 andq $-32, %rsp
132 subq $64, %rsp
133 vextractf128 $1, %ymm0, 16(%rsp)
134 vextractf128 $1, %ymm1, (%rsp)
135 vzeroupper
136 call HIDDEN_JUMPTARGET(\callee)
137 vmovaps %xmm0, 32(%rsp)
138 vmovaps 16(%rsp), %xmm0
139 vmovaps (%rsp), %xmm1
140 call HIDDEN_JUMPTARGET(\callee)
141 vmovaps %xmm0, %xmm1
142 vmovaps 32(%rsp), %xmm0
143 vinsertf128 $1, %xmm1, %ymm0, %ymm0
144 movq %rbp, %rsp
145 cfi_def_cfa_register (%rsp)
146 popq %rbp
147 cfi_adjust_cfa_offset (-8)
148 cfi_restore (%rbp)
149 ret
150.endm
151
152/* 3 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */
153.macro WRAPPER_IMPL_AVX_fFF callee
154 pushq %rbp
155 cfi_adjust_cfa_offset (8)
156 cfi_rel_offset (%rbp, 0)
157 movq %rsp, %rbp
158 cfi_def_cfa_register (%rbp)
159 andq $-32, %rsp
160 pushq %r13
161 cfi_adjust_cfa_offset (8)
162 cfi_rel_offset (%r13, 0)
163 pushq %r14
164 cfi_adjust_cfa_offset (8)
165 cfi_rel_offset (%r14, 0)
166 subq $48, %rsp
167 movq %rsi, %r14
168 movq %rdi, %r13
169 vextractf128 $1, %ymm0, 32(%rsp)
170 vzeroupper
171 call HIDDEN_JUMPTARGET(\callee)
172 vmovaps 32(%rsp), %xmm0
173 lea (%rsp), %rdi
174 lea 16(%rsp), %rsi
175 call HIDDEN_JUMPTARGET(\callee)
176 vmovapd (%rsp), %xmm0
177 vmovapd 16(%rsp), %xmm1
178 vmovapd %xmm0, 16(%r13)
179 vmovapd %xmm1, 16(%r14)
180 addq $48, %rsp
181 popq %r14
182 cfi_adjust_cfa_offset (-8)
183 cfi_restore (%r14)
184 popq %r13
185 cfi_adjust_cfa_offset (-8)
186 cfi_restore (%r13)
187 movq %rbp, %rsp
188 cfi_def_cfa_register (%rsp)
189 popq %rbp
190 cfi_adjust_cfa_offset (-8)
191 cfi_restore (%rbp)
192 ret
193.endm
194
195/* AVX512 ISA version as wrapper to AVX2 ISA version. */
196.macro WRAPPER_IMPL_AVX512 callee
197 pushq %rbp
198 cfi_adjust_cfa_offset (8)
199 cfi_rel_offset (%rbp, 0)
200 movq %rsp, %rbp
201 cfi_def_cfa_register (%rbp)
202 andq $-64, %rsp
203 subq $128, %rsp
204 vmovups %zmm0, (%rsp)
205 vmovupd (%rsp), %ymm0
206 call HIDDEN_JUMPTARGET(\callee)
207 vmovupd %ymm0, 64(%rsp)
208 vmovupd 32(%rsp), %ymm0
209 call HIDDEN_JUMPTARGET(\callee)
210 vmovupd %ymm0, 96(%rsp)
211 vmovups 64(%rsp), %zmm0
212 movq %rbp, %rsp
213 cfi_def_cfa_register (%rsp)
214 popq %rbp
215 cfi_adjust_cfa_offset (-8)
216 cfi_restore (%rbp)
217 ret
218.endm
219
220/* 2 argument AVX512 ISA version as wrapper to AVX2 ISA version. */
221.macro WRAPPER_IMPL_AVX512_ff callee
222 pushq %rbp
223 cfi_adjust_cfa_offset (8)
224 cfi_rel_offset (%rbp, 0)
225 movq %rsp, %rbp
226 cfi_def_cfa_register (%rbp)
227 andq $-64, %rsp
228 subq $192, %rsp
229 vmovups %zmm0, (%rsp)
230 vmovups %zmm1, 64(%rsp)
231 vmovupd (%rsp), %ymm0
232 vmovupd 64(%rsp), %ymm1
233 call HIDDEN_JUMPTARGET(\callee)
234 vmovupd %ymm0, 128(%rsp)
235 vmovupd 32(%rsp), %ymm0
236 vmovupd 96(%rsp), %ymm1
237 call HIDDEN_JUMPTARGET(\callee)
238 vmovupd %ymm0, 160(%rsp)
239 vmovups 128(%rsp), %zmm0
240 movq %rbp, %rsp
241 cfi_def_cfa_register (%rsp)
242 popq %rbp
243 cfi_adjust_cfa_offset (-8)
244 cfi_restore (%rbp)
245 ret
246.endm
247
248/* 3 argument AVX512 ISA version as wrapper to AVX2 ISA version. */
249.macro WRAPPER_IMPL_AVX512_fFF callee
250 pushq %rbp
251 cfi_adjust_cfa_offset (8)
252 cfi_rel_offset (%rbp, 0)
253 movq %rsp, %rbp
254 cfi_def_cfa_register (%rbp)
255 andq $-64, %rsp
256 pushq %r12
257 cfi_adjust_cfa_offset (8)
258 cfi_rel_offset (%r12, 0)
259 pushq %r13
260 cfi_adjust_cfa_offset (8)
261 cfi_rel_offset (%r13, 0)
262 subq $176, %rsp
263 movq %rsi, %r13
264 vmovups %zmm0, (%rsp)
265 movq %rdi, %r12
266 vmovupd (%rsp), %ymm0
267 call HIDDEN_JUMPTARGET(\callee)
268 vmovupd 32(%rsp), %ymm0
269 lea 64(%rsp), %rdi
270 lea 96(%rsp), %rsi
271 call HIDDEN_JUMPTARGET(\callee)
272 vmovupd 64(%rsp), %ymm0
273 vmovupd 96(%rsp), %ymm1
274 vmovupd %ymm0, 32(%r12)
275 vmovupd %ymm1, 32(%r13)
276 vzeroupper
277 addq $176, %rsp
278 popq %r13
279 cfi_adjust_cfa_offset (-8)
280 cfi_restore (%r13)
281 popq %r12
282 cfi_adjust_cfa_offset (-8)
283 cfi_restore (%r12)
284 movq %rbp, %rsp
285 cfi_def_cfa_register (%rsp)
286 popq %rbp
287 cfi_adjust_cfa_offset (-8)
288 cfi_restore (%rbp)
289 ret
290.endm
291

source code of glibc/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h