1/* Function sincosf vectorized in AVX ISA as wrapper to SSE4 ISA version.
2 Copyright (C) 2014-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#include "svml_s_wrapper_impl.h"
21
22 .text
23ENTRY (_ZGVcN8vl4l4_sincosf)
24WRAPPER_IMPL_AVX_fFF _ZGVbN4vl4l4_sincosf
25END (_ZGVcN8vl4l4_sincosf)
26
27/* AVX ISA version as wrapper to SSE ISA version (for vector
28 function declared with #pragma omp declare simd notinbranch). */
29.macro WRAPPER_IMPL_AVX_fFF_vvv callee
30#ifndef __ILP32__
31 pushq %rbp
32 movq %rsp, %rbp
33 andq $-32, %rsp
34 subq $224, %rsp
35 vmovups %ymm0, 64(%rsp)
36 lea (%rsp), %rdi
37 vmovdqu %xmm1, 96(%rdi)
38 vmovdqu %xmm2, 112(%rdi)
39 vmovdqu %xmm3, 128(%rdi)
40 vmovdqu %xmm4, 144(%rdi)
41 vmovdqu %xmm5, 160(%rdi)
42 lea 32(%rsp), %rsi
43 vmovdqu %xmm6, 144(%rsi)
44 vmovdqu %xmm7, 160(%rsi)
45 vzeroupper
46 call HIDDEN_JUMPTARGET(\callee)
47 vmovdqu 80(%rsp), %xmm0
48 lea 16(%rsp), %rdi
49 lea 48(%rsp), %rsi
50 call HIDDEN_JUMPTARGET(\callee)
51 movq 96(%rsp), %rdx
52 movq 104(%rsp), %rsi
53 movq 112(%rsp), %r8
54 movq 120(%rsp), %r10
55 movl (%rsp), %eax
56 movl 4(%rsp), %ecx
57 movl 8(%rsp), %edi
58 movl 12(%rsp), %r9d
59 movl %eax, (%rdx)
60 movl %ecx, (%rsi)
61 movq 128(%rsp), %rax
62 movq 136(%rsp), %rcx
63 movl %edi, (%r8)
64 movl %r9d, (%r10)
65 movq 144(%rsp), %rdi
66 movq 152(%rsp), %r9
67 movl 16(%rsp), %r11d
68 movl 20(%rsp), %edx
69 movl 24(%rsp), %esi
70 movl 28(%rsp), %r8d
71 movl %r11d, (%rax)
72 movl %edx, (%rcx)
73 movq 160(%rsp), %r11
74 movq 168(%rsp), %rdx
75 movl %esi, (%rdi)
76 movl %r8d, (%r9)
77 movq 176(%rsp), %rsi
78 movq 184(%rsp), %r8
79 movl 32(%rsp), %r10d
80 movl 36(%rsp), %eax
81 movl 40(%rsp), %ecx
82 movl 44(%rsp), %edi
83 movl %r10d, (%r11)
84 movl %eax, (%rdx)
85 movq 192(%rsp), %r10
86 movq 200(%rsp), %rax
87 movl %ecx, (%rsi)
88 movl %edi, (%r8)
89 movq 16(%rbp), %rcx
90 movq 24(%rbp), %rdi
91 movl 48(%rsp), %r9d
92 movl 52(%rsp), %r11d
93 movl 56(%rsp), %edx
94 movl 60(%rsp), %esi
95 movl %r9d, (%r10)
96 movl %r11d, (%rax)
97 movl %edx, (%rcx)
98 movl %esi, (%rdi)
99 movq %rbp, %rsp
100 popq %rbp
101 ret
102#else
103 leal 8(%rsp), %r10d
104 .cfi_def_cfa 10, 0
105 andl $-32, %esp
106 pushq -8(%r10d)
107 pushq %rbp
108 .cfi_escape 0x10,0x6,0x2,0x76,0
109 movl %esp, %ebp
110 pushq %r12
111 leal -80(%rbp), %esi
112 pushq %r10
113 .cfi_escape 0xf,0x3,0x76,0x70,0x6
114 .cfi_escape 0x10,0xc,0x2,0x76,0x78
115 leal -112(%rbp), %edi
116 movq %rsi, %r12
117 pushq %rbx
118 .cfi_escape 0x10,0x3,0x2,0x76,0x68
119 movq %rdi, %rbx
120 subl $184, %esp
121 vmovaps %xmm1, -128(%ebp)
122 vmovaps %xmm2, -144(%ebp)
123 vmovaps %xmm3, -160(%ebp)
124 vmovaps %xmm4, -176(%ebp)
125 vmovaps %ymm0, -208(%ebp)
126 vzeroupper
127 call HIDDEN_JUMPTARGET(\callee)
128 leal 16(%r12), %esi
129 vmovups -192(%ebp), %xmm0
130 leal 16(%rbx), %edi
131 call HIDDEN_JUMPTARGET(\callee)
132 movq -128(%ebp), %rax
133 vmovss -112(%ebp), %xmm0
134 vmovdqa -128(%ebp), %xmm7
135 vmovdqa -144(%ebp), %xmm3
136 vmovss %xmm0, (%eax)
137 vmovss -108(%ebp), %xmm0
138 vpextrd $1, %xmm7, %eax
139 vmovss %xmm0, (%eax)
140 movq -120(%ebp), %rax
141 vmovss -104(%ebp), %xmm0
142 vmovss %xmm0, (%eax)
143 vmovss -100(%ebp), %xmm0
144 vpextrd $3, %xmm7, %eax
145 vmovdqa -160(%ebp), %xmm7
146 vmovss %xmm0, (%eax)
147 movq -144(%ebp), %rax
148 vmovss -96(%ebp), %xmm0
149 vmovss %xmm0, (%eax)
150 vmovss -92(%ebp), %xmm0
151 vpextrd $1, %xmm3, %eax
152 vmovss %xmm0, (%eax)
153 movq -136(%ebp), %rax
154 vmovss -88(%ebp), %xmm0
155 vmovss %xmm0, (%eax)
156 vmovss -84(%ebp), %xmm0
157 vpextrd $3, %xmm3, %eax
158 vmovss %xmm0, (%eax)
159 movq -160(%ebp), %rax
160 vmovss -80(%ebp), %xmm0
161 vmovss %xmm0, (%eax)
162 vmovss -76(%ebp), %xmm0
163 vpextrd $1, %xmm7, %eax
164 vmovss %xmm0, (%eax)
165 movq -152(%ebp), %rax
166 vmovss -72(%ebp), %xmm0
167 vmovss %xmm0, (%eax)
168 vmovss -68(%ebp), %xmm0
169 vpextrd $3, %xmm7, %eax
170 vmovss %xmm0, (%eax)
171 movq -176(%ebp), %rax
172 vmovss -64(%ebp), %xmm0
173 vmovdqa -176(%ebp), %xmm3
174 vmovss %xmm0, (%eax)
175 vmovss -60(%ebp), %xmm0
176 vpextrd $1, %xmm3, %eax
177 vmovss %xmm0, (%eax)
178 movq -168(%ebp), %rax
179 vmovss -56(%ebp), %xmm0
180 vmovss %xmm0, (%eax)
181 vmovss -52(%ebp), %xmm0
182 vpextrd $3, %xmm3, %eax
183 vmovss %xmm0, (%eax)
184 addl $184, %esp
185 popq %rbx
186 popq %r10
187 .cfi_def_cfa 10, 0
188 popq %r12
189 popq %rbp
190 leal -8(%r10), %esp
191 .cfi_def_cfa 7, 8
192 ret
193#endif
194.endm
195
196ENTRY (_ZGVcN8vvv_sincosf)
197WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN4vl4l4_sincosf
198END (_ZGVcN8vvv_sincosf)
199

source code of glibc/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S