svml_s_sincosf8_core.S source code [glibc/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S]

1	/ Function sincosf vectorized with AVX2, wrapper version.*
2	Copyright (C) 2014-2022 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <sysdep.h>
20	#include "svml_s_wrapper_impl.h"
21
22	.text
23	ENTRY (_ZGVdN8vl4l4_sincosf)
24	WRAPPER_IMPL_AVX_fFF _ZGVbN4vl4l4_sincosf
25	END (_ZGVdN8vl4l4_sincosf)
26	libmvec_hidden_def (_ZGVdN8vl4l4_sincosf)
27
28	/ AVX2 ISA version as wrapper to SSE ISA version (for vector*
29	function declared with #pragma omp declare simd notinbranch). /*
30	.macro WRAPPER_IMPL_AVX2_fFF_vvv callee
31	#ifndef __ILP32__
32	pushq %rbp
33	cfi_adjust_cfa_offset (`8`)
34	cfi_rel_offset (%rbp, `0`)
35	movq %rsp, %rbp
36	cfi_def_cfa_register (%rbp)
37	andq $-`32`, %rsp
38	subq $`224`, %rsp
39	vmovups %ymm0, `192`(%rsp)
40	lea (%rsp), %rdi
41	vmovdqu %ymm1, `64`(%rdi)
42	vmovdqu %ymm2, `96`(%rdi)
43	vmovdqu %ymm3, `128`(%rdi)
44	vmovdqu %ymm4, `160`(%rdi)
45	lea `32`(%rsp), %rsi
46	vzeroupper
47	call HIDDEN_JUMPTARGET(\callee)
48	vmovups `208`(%rsp), %xmm0
49	lea `16`(%rsp), %rdi
50	lea `48`(%rsp), %rsi
51	call HIDDEN_JUMPTARGET(\callee)
52	movq `64`(%rsp), %rdx
53	movq `72`(%rsp), %rsi
54	movq `80`(%rsp), %r8
55	movq `88`(%rsp), %r10
56	movl (%rsp), %eax
57	movl `4`(%rsp), %ecx
58	movl `8`(%rsp), %edi
59	movl `12`(%rsp), %r9d
60	movl %eax, (%rdx)
61	movl %ecx, (%rsi)
62	movq `96`(%rsp), %rax
63	movq `104`(%rsp), %rcx
64	movl %edi, (%r8)
65	movl %r9d, (%r10)
66	movq `112`(%rsp), %rdi
67	movq `120`(%rsp), %r9
68	movl `16`(%rsp), %r11d
69	movl `20`(%rsp), %edx
70	movl `24`(%rsp), %esi
71	movl `28`(%rsp), %r8d
72	movl %r11d, (%rax)
73	movl %edx, (%rcx)
74	movq `128`(%rsp), %r11
75	movq `136`(%rsp), %rdx
76	movl %esi, (%rdi)
77	movl %r8d, (%r9)
78	movq `144`(%rsp), %rsi
79	movq `152`(%rsp), %r8
80	movl `32`(%rsp), %r10d
81	movl `36`(%rsp), %eax
82	movl `40`(%rsp), %ecx
83	movl `44`(%rsp), %edi
84	movl %r10d, (%r11)
85	movl %eax, (%rdx)
86	movq `160`(%rsp), %r10
87	movq `168`(%rsp), %rax
88	movl %ecx, (%rsi)
89	movl %edi, (%r8)
90	movq `176`(%rsp), %rcx
91	movq `184`(%rsp), %rdi
92	movl `48`(%rsp), %r9d
93	movl `52`(%rsp), %r11d
94	movl `56`(%rsp), %edx
95	movl `60`(%rsp), %esi
96	movl %r9d, (%r10)
97	movl %r11d, (%rax)
98	movl %edx, (%rcx)
99	movl %esi, (%rdi)
100	movq %rbp, %rsp
101	cfi_def_cfa_register (%rsp)
102	popq %rbp
103	cfi_adjust_cfa_offset (-`8`)
104	cfi_restore (%rbp)
105	ret
106	#else
107	leal `8`(%rsp), %r10d
108	.cfi_def_cfa `10`, `0`
109	andl $-`32`, %esp
110	pushq -`8`(%r10d)
111	pushq %rbp
112	.cfi_escape `0x10`,`0x6`,`0x2`,`0x76`,`0`
113	movl %esp, %ebp
114	pushq %r12
115	leal -`80`(%rbp), %esi
116	pushq %r10
117	.cfi_escape `0xf`,`0x3`,`0x76`,`0x70`,`0x6`
118	.cfi_escape `0x10`,`0xc`,`0x2`,`0x76`,`0x78`
119	leal -`112`(%rbp), %edi
120	movq %rsi, %r12
121	pushq %rbx
122	.cfi_escape `0x10`,`0x3`,`0x2`,`0x76`,`0x68`
123	movq %rdi, %rbx
124	subl $`184`, %esp
125	vmovdqa %ymm1, -`144`(%ebp)
126	vmovdqa %ymm2, -`176`(%ebp)
127	vmovaps %ymm0, -`208`(%ebp)
128	vzeroupper
129	call HIDDEN_JUMPTARGET(\callee)
130	leal `16`(%r12), %esi
131	vmovups -`192`(%ebp), %xmm0
132	leal `16`(%rbx), %edi
133	call HIDDEN_JUMPTARGET(\callee)
134	movl -`144`(%ebp), %eax
135	vmovss -`112`(%ebp), %xmm0
136	vmovss %xmm0, (%eax)
137	movl -`140`(%ebp), %eax
138	vmovss -`108`(%ebp), %xmm0
139	vmovss %xmm0, (%eax)
140	movl -`136`(%ebp), %eax
141	vmovss -`104`(%ebp), %xmm0
142	vmovss %xmm0, (%eax)
143	movl -`132`(%ebp), %eax
144	vmovss -`100`(%ebp), %xmm0
145	vmovss %xmm0, (%eax)
146	movl -`128`(%ebp), %eax
147	vmovss -`96`(%ebp), %xmm0
148	vmovss %xmm0, (%eax)
149	movl -`124`(%ebp), %eax
150	vmovss -`92`(%ebp), %xmm0
151	vmovss %xmm0, (%eax)
152	movl -`120`(%ebp), %eax
153	vmovss -`88`(%ebp), %xmm0
154	vmovss %xmm0, (%eax)
155	movl -`116`(%ebp), %eax
156	vmovss -`84`(%ebp), %xmm0
157	vmovss %xmm0, (%eax)
158	movl -`176`(%ebp), %eax
159	vmovss -`80`(%ebp), %xmm0
160	vmovss %xmm0, (%eax)
161	movl -`172`(%ebp), %eax
162	vmovss -`76`(%ebp), %xmm0
163	vmovss %xmm0, (%eax)
164	movl -`168`(%ebp), %eax
165	vmovss -`72`(%ebp), %xmm0
166	vmovss %xmm0, (%eax)
167	movl -`164`(%ebp), %eax
168	vmovss -`68`(%ebp), %xmm0
169	vmovss %xmm0, (%eax)
170	movl -`160`(%ebp), %eax
171	vmovss -`64`(%ebp), %xmm0
172	vmovss %xmm0, (%eax)
173	movl -`156`(%ebp), %eax
174	vmovss -`60`(%ebp), %xmm0
175	vmovss %xmm0, (%eax)
176	movl -`152`(%ebp), %eax
177	vmovss -`56`(%ebp), %xmm0
178	vmovss %xmm0, (%eax)
179	movl -`148`(%ebp), %eax
180	vmovss -`52`(%ebp), %xmm0
181	vmovss %xmm0, (%eax)
182	addl $`184`, %esp
183	popq %rbx
184	popq %r10
185	.cfi_def_cfa `10`, `0`
186	popq %r12
187	popq %rbp
188	leal -`8`(%r10), %esp
189	.cfi_def_cfa `7`, `8`
190	ret
191	#endif
192	.endm
193
194	ENTRY (_ZGVdN8vvv_sincosf)
195	WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN4vl4l4_sincosf
196	END (_ZGVdN8vvv_sincosf)
197
198	#ifndef USE_MULTIARCH
199	libmvec_hidden_def (_ZGVdN8vvv_sincosf)
200	#endif
201

source code of glibc/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S