svml_d_sincos4_core_avx.S source code [glibc/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S]

1	/ Function sincos vectorized in AVX ISA as wrapper to SSE4 ISA version.*
2	Copyright (C) 2014-2022 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <sysdep.h>
20	#include "svml_d_wrapper_impl.h"
21
22	.text
23	ENTRY (_ZGVcN4vl8l8_sincos)
24	WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos
25	END (_ZGVcN4vl8l8_sincos)
26
27	/ AVX ISA version as wrapper to SSE ISA version (for vector*
28	function declared with #pragma omp declare simd notinbranch). /*
29	.macro WRAPPER_IMPL_AVX_fFF_vvv callee
30	#ifndef __ILP32__
31	pushq %rbp
32	movq %rsp, %rbp
33	andq $-`32`, %rsp
34	subq $`160`, %rsp
35	vmovupd %ymm0, `64`(%rsp)
36	lea (%rsp), %rdi
37	vmovdqu %xmm1, `96`(%rdi)
38	vmovdqu %xmm2, `112`(%rdi)
39	vmovdqu %xmm3, `128`(%rdi)
40	vmovdqu %xmm4, `144`(%rdi)
41	lea `32`(%rsp), %rsi
42	vzeroupper
43	call HIDDEN_JUMPTARGET(\callee)
44	vmovdqu `80`(%rsp), %xmm0
45	lea `16`(%rsp), %rdi
46	lea `48`(%rsp), %rsi
47	call HIDDEN_JUMPTARGET(\callee)
48	movq `96`(%rsp), %rdx
49	movq `104`(%rsp), %rsi
50	movq `112`(%rsp), %r8
51	movq `120`(%rsp), %r10
52	movq (%rsp), %rax
53	movq `8`(%rsp), %rcx
54	movq `16`(%rsp), %rdi
55	movq `24`(%rsp), %r9
56	movq %rax, (%rdx)
57	movq %rcx, (%rsi)
58	movq `128`(%rsp), %rax
59	movq `136`(%rsp), %rcx
60	movq %rdi, (%r8)
61	movq %r9, (%r10)
62	movq `144`(%rsp), %rdi
63	movq `152`(%rsp), %r9
64	movq `32`(%rsp), %r11
65	movq `40`(%rsp), %rdx
66	movq `48`(%rsp), %rsi
67	movq `56`(%rsp), %r8
68	movq %r11, (%rax)
69	movq %rdx, (%rcx)
70	movq %rsi, (%rdi)
71	movq %r8, (%r9)
72	movq %rbp, %rsp
73	popq %rbp
74	ret
75	#else
76	leal `8`(%rsp), %r10d
77	.cfi_def_cfa `10`, `0`
78	andl $-`32`, %esp
79	pushq -`8`(%r10d)
80	pushq %rbp
81	.cfi_escape `0x10`,`0x6`,`0x2`,`0x76`,`0`
82	movl %esp, %ebp
83	pushq %r12
84	leal -`80`(%rbp), %esi
85	pushq %r10
86	.cfi_escape `0xf`,`0x3`,`0x76`,`0x70`,`0x6`
87	.cfi_escape `0x10`,`0xc`,`0x2`,`0x76`,`0x78`
88	leal -`112`(%rbp), %edi
89	movq %rsi, %r12
90	pushq %rbx
91	.cfi_escape `0x10`,`0x3`,`0x2`,`0x76`,`0x68`
92	movq %rdi, %rbx
93	subl $`152`, %esp
94	vmovaps %xmm1, -`128`(%ebp)
95	vmovaps %xmm2, -`144`(%ebp)
96	vmovapd %ymm0, -`176`(%ebp)
97	vzeroupper
98	call HIDDEN_JUMPTARGET(\callee)
99	leal `16`(%r12), %esi
100	vmovupd -`160`(%ebp), %xmm0
101	leal `16`(%rbx), %edi
102	call HIDDEN_JUMPTARGET(\callee)
103	movq -`128`(%ebp), %rax
104	vmovsd -`112`(%ebp), %xmm0
105	vmovdqa -`128`(%ebp), %xmm5
106	vmovdqa -`144`(%ebp), %xmm1
107	vmovsd %xmm0, (%eax)
108	vmovsd -`104`(%ebp), %xmm0
109	vpextrd $`1`, %xmm5, %eax
110	vmovsd %xmm0, (%eax)
111	movq -`120`(%ebp), %rax
112	vmovsd -`96`(%ebp), %xmm0
113	vmovsd %xmm0, (%eax)
114	vmovsd -`88`(%ebp), %xmm0
115	vpextrd $`3`, %xmm5, %eax
116	vmovsd %xmm0, (%eax)
117	movq -`144`(%ebp), %rax
118	vmovsd -`80`(%ebp), %xmm0
119	vmovsd %xmm0, (%eax)
120	vmovsd -`72`(%ebp), %xmm0
121	vpextrd $`1`, %xmm1, %eax
122	vmovsd %xmm0, (%eax)
123	movq -`136`(%ebp), %rax
124	vmovsd -`64`(%ebp), %xmm0
125	vmovsd %xmm0, (%eax)
126	vmovsd -`56`(%ebp), %xmm0
127	vpextrd $`3`, %xmm1, %eax
128	vmovsd %xmm0, (%eax)
129	addl $`152`, %esp
130	popq %rbx
131	popq %r10
132	.cfi_def_cfa `10`, `0`
133	popq %r12
134	popq %rbp
135	leal -`8`(%r10), %esp
136	.cfi_def_cfa `7`, `8`
137	ret
138	#endif
139	.endm
140
141	ENTRY (_ZGVcN4vvv_sincos)
142	WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN2vl8l8_sincos
143	END (_ZGVcN4vvv_sincos)
144

source code of glibc/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S