svml_d_sincos8_core.S source code [glibc/sysdeps/x86_64/fpu/svml_d_sincos8_core.S]

1	/ Function sincos vectorized with AVX-512. Wrapper to AVX2 version.*
2	Copyright (C) 2014-2022 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <sysdep.h>
20	#include "svml_d_wrapper_impl.h"
21
22	.text
23	ENTRY (_ZGVeN8vl8l8_sincos)
24	WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
25	END (_ZGVeN8vl8l8_sincos)
26
27	/ AVX512 ISA version as wrapper to AVX2 ISA version (for vector*
28	function declared with #pragma omp declare simd notinbranch). /*
29	.macro WRAPPER_IMPL_AVX512_fFF_vvv callee
30	#ifndef __ILP32__
31	pushq %rbp
32	cfi_adjust_cfa_offset (`8`)
33	cfi_rel_offset (%rbp, `0`)
34	movq %rsp, %rbp
35	cfi_def_cfa_register (%rbp)
36	andq $-`64`, %rsp
37	subq $`320`, %rsp
38	vmovups %zmm0, `256`(%rsp)
39	lea (%rsp), %rdi
40	vmovups %zmm1, `128`(%rdi)
41	vmovups %zmm2, `192`(%rdi)
42	lea `64`(%rsp), %rsi
43	call HIDDEN_JUMPTARGET(\callee)
44	vmovdqu `288`(%rsp), %ymm0
45	lea `32`(%rsp), %rdi
46	lea `96`(%rsp), %rsi
47	call HIDDEN_JUMPTARGET(\callee)
48	movq `128`(%rsp), %rdx
49	movq `192`(%rsp), %rsi
50	movq `136`(%rsp), %r8
51	movq `200`(%rsp), %r10
52	movq (%rsp), %rax
53	movq `64`(%rsp), %rcx
54	movq `8`(%rsp), %rdi
55	movq `72`(%rsp), %r9
56	movq %rax, (%rdx)
57	movq %rcx, (%rsi)
58	movq `144`(%rsp), %rax
59	movq `208`(%rsp), %rcx
60	movq %rdi, (%r8)
61	movq %r9, (%r10)
62	movq `152`(%rsp), %rdi
63	movq `216`(%rsp), %r9
64	movq `16`(%rsp), %r11
65	movq `80`(%rsp), %rdx
66	movq `24`(%rsp), %rsi
67	movq `88`(%rsp), %r8
68	movq %r11, (%rax)
69	movq %rdx, (%rcx)
70	movq `160`(%rsp), %r11
71	movq `224`(%rsp), %rdx
72	movq %rsi, (%rdi)
73	movq %r8, (%r9)
74	movq `168`(%rsp), %rsi
75	movq `232`(%rsp), %r8
76	movq `32`(%rsp), %r10
77	movq `96`(%rsp), %rax
78	movq `40`(%rsp), %rcx
79	movq `104`(%rsp), %rdi
80	movq %r10, (%r11)
81	movq %rax, (%rdx)
82	movq `176`(%rsp), %r10
83	movq `240`(%rsp), %rax
84	movq %rcx, (%rsi)
85	movq %rdi, (%r8)
86	movq `184`(%rsp), %rcx
87	movq `248`(%rsp), %rdi
88	movq `48`(%rsp), %r9
89	movq `112`(%rsp), %r11
90	movq `56`(%rsp), %rdx
91	movq `120`(%rsp), %rsi
92	movq %r9, (%r10)
93	movq %r11, (%rax)
94	movq %rdx, (%rcx)
95	movq %rsi, (%rdi)
96	movq %rbp, %rsp
97	cfi_def_cfa_register (%rsp)
98	popq %rbp
99	cfi_adjust_cfa_offset (-`8`)
100	cfi_restore (%rbp)
101	ret
102	#else
103	leal `8`(%rsp), %r10d
104	.cfi_def_cfa `10`, `0`
105	andl $-`64`, %esp
106	pushq -`8`(%r10d)
107	pushq %rbp
108	.cfi_escape `0x10`,`0x6`,`0x2`,`0x76`,`0`
109	movl %esp, %ebp
110	pushq %r12
111	leal -`112`(%rbp), %esi
112	pushq %r10
113	.cfi_escape `0xf`,`0x3`,`0x76`,`0x70`,`0x6`
114	.cfi_escape `0x10`,`0xc`,`0x2`,`0x76`,`0x78`
115	leal -`176`(%rbp), %edi
116	movq %rsi, %r12
117	pushq %rbx
118	.cfi_escape `0x10`,`0x3`,`0x2`,`0x76`,`0x68`
119	movq %rdi, %rbx
120	subl $`280`, %esp
121	vmovdqa %ymm1, -`208`(%ebp)
122	vmovdqa %ymm2, -`240`(%ebp)
123	vmovapd %zmm0, -`304`(%ebp)
124	call HIDDEN_JUMPTARGET(\callee)
125	leal `32`(%r12), %esi
126	vmovupd -`272`(%ebp), %ymm0
127	leal `32`(%rbx), %edi
128	call HIDDEN_JUMPTARGET(\callee)
129	movl -`208`(%ebp), %eax
130	vmovsd -`176`(%ebp), %xmm0
131	vmovsd %xmm0, (%eax)
132	movl -`204`(%ebp), %eax
133	vmovsd -`168`(%ebp), %xmm0
134	vmovsd %xmm0, (%eax)
135	movl -`200`(%ebp), %eax
136	vmovsd -`160`(%ebp), %xmm0
137	vmovsd %xmm0, (%eax)
138	movl -`196`(%ebp), %eax
139	vmovsd -`152`(%ebp), %xmm0
140	vmovsd %xmm0, (%eax)
141	movl -`192`(%ebp), %eax
142	vmovsd -`144`(%ebp), %xmm0
143	vmovsd %xmm0, (%eax)
144	movl -`188`(%ebp), %eax
145	vmovsd -`136`(%ebp), %xmm0
146	vmovsd %xmm0, (%eax)
147	movl -`184`(%ebp), %eax
148	vmovsd -`128`(%ebp), %xmm0
149	vmovsd %xmm0, (%eax)
150	movl -`180`(%ebp), %eax
151	vmovsd -`120`(%ebp), %xmm0
152	vmovsd %xmm0, (%eax)
153	movl -`240`(%ebp), %eax
154	vmovsd -`112`(%ebp), %xmm0
155	vmovsd %xmm0, (%eax)
156	movl -`236`(%ebp), %eax
157	vmovsd -`104`(%ebp), %xmm0
158	vmovsd %xmm0, (%eax)
159	movl -`232`(%ebp), %eax
160	vmovsd -`96`(%ebp), %xmm0
161	vmovsd %xmm0, (%eax)
162	movl -`228`(%ebp), %eax
163	vmovsd -`88`(%ebp), %xmm0
164	vmovsd %xmm0, (%eax)
165	movl -`224`(%ebp), %eax
166	vmovsd -`80`(%ebp), %xmm0
167	vmovsd %xmm0, (%eax)
168	movl -`220`(%ebp), %eax
169	vmovsd -`72`(%ebp), %xmm0
170	vmovsd %xmm0, (%eax)
171	movl -`216`(%ebp), %eax
172	vmovsd -`64`(%ebp), %xmm0
173	vmovsd %xmm0, (%eax)
174	movl -`212`(%ebp), %eax
175	vmovsd -`56`(%ebp), %xmm0
176	vmovsd %xmm0, (%eax)
177	addl $`280`, %esp
178	popq %rbx
179	popq %r10
180	.cfi_def_cfa `10`, `0`
181	popq %r12
182	popq %rbp
183	leal -`8`(%r10), %esp
184	.cfi_def_cfa `7`, `8`
185	ret
186	#endif
187	.endm
188
189	ENTRY (_ZGVeN8vvv_sincos)
190	WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN4vl8l8_sincos
191	END (_ZGVeN8vvv_sincos)
192

source code of glibc/sysdeps/x86_64/fpu/svml_d_sincos8_core.S