svml_s_sincosf4_core.S source code [glibc/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S]

1	/ Function sincosf vectorized with SSE2.*
2	Copyright (C) 2014-2022 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <sysdep.h>
20	#include "svml_s_wrapper_impl.h"
21
22	.text
23	ENTRY (_ZGVbN4vl4l4_sincosf)
24	WRAPPER_IMPL_SSE2_fFF sincosf
25	END (_ZGVbN4vl4l4_sincosf)
26	libmvec_hidden_def (_ZGVbN4vl4l4_sincosf)
27
28	/ SSE2 ISA version as wrapper to scalar (for vector*
29	function declared with #pragma omp declare simd notinbranch). /*
30	.macro WRAPPER_IMPL_SSE2_fFF_vvv callee
31	#ifndef __ILP32__
32	subq $`120`, %rsp
33	cfi_adjust_cfa_offset(`120`)
34	movaps %xmm0, `96`(%rsp)
35	lea (%rsp), %rdi
36	movdqa %xmm1, `32`(%rdi)
37	lea `16`(%rsp), %rsi
38	movdqa %xmm2, `32`(%rsi)
39	movdqa %xmm3, `48`(%rsi)
40	movdqa %xmm4, `64`(%rsi)
41	call JUMPTARGET(\callee)
42	movss `100`(%rsp), %xmm0
43	lea `4`(%rsp), %rdi
44	lea `20`(%rsp), %rsi
45	call JUMPTARGET(\callee)
46	movss `104`(%rsp), %xmm0
47	lea `8`(%rsp), %rdi
48	lea `24`(%rsp), %rsi
49	call JUMPTARGET(\callee)
50	movss `108`(%rsp), %xmm0
51	lea `12`(%rsp), %rdi
52	lea `28`(%rsp), %rsi
53	call JUMPTARGET(\callee)
54	movq `32`(%rsp), %rdx
55	movq `40`(%rsp), %rsi
56	movq `48`(%rsp), %r8
57	movq `56`(%rsp), %r10
58	movl (%rsp), %eax
59	movl `4`(%rsp), %ecx
60	movl `8`(%rsp), %edi
61	movl `12`(%rsp), %r9d
62	movl %eax, (%rdx)
63	movl %ecx, (%rsi)
64	movq `64`(%rsp), %rax
65	movq `72`(%rsp), %rcx
66	movl %edi, (%r8)
67	movl %r9d, (%r10)
68	movq `80`(%rsp), %rdi
69	movq `88`(%rsp), %r9
70	movl `16`(%rsp), %r11d
71	movl `20`(%rsp), %edx
72	movl `24`(%rsp), %esi
73	movl `28`(%rsp), %r8d
74	movl %r11d, (%rax)
75	movl %edx, (%rcx)
76	movl %esi, (%rdi)
77	movl %r8d, (%r9)
78	addq $`120`, %rsp
79	cfi_adjust_cfa_offset(-`120`)
80	ret
81	#else
82	pushq %rbp
83	.cfi_def_cfa_offset `16`
84	.cfi_offset `6`, -`16`
85	pushq %rbx
86	.cfi_def_cfa_offset `24`
87	.cfi_offset `3`, -`24`
88	subl $`88`, %esp
89	.cfi_def_cfa_offset `112`
90	leal `64`(%rsp), %esi
91	movaps %xmm1, (%esp)
92	leal `48`(%rsp), %edi
93	movaps %xmm2, `16`(%esp)
94	movq %rsi, %rbp
95	movq %rdi, %rbx
96	movaps %xmm0, `32`(%esp)
97	call JUMPTARGET(\callee)
98	movups `36`(%esp), %xmm0
99	leal `4`(%rbp), %esi
100	leal `4`(%rbx), %edi
101	call JUMPTARGET(\callee)
102	movups `40`(%esp), %xmm0
103	leal `8`(%rbp), %esi
104	leal `8`(%rbx), %edi
105	call JUMPTARGET(\callee)
106	movups `44`(%esp), %xmm0
107	leal `12`(%rbp), %esi
108	leal `12`(%rbx), %edi
109	call JUMPTARGET(\callee)
110	movq (%esp), %rax
111	movss `48`(%esp), %xmm0
112	movdqa (%esp), %xmm4
113	movdqa `16`(%esp), %xmm7
114	movss %xmm0, (%eax)
115	movss `52`(%esp), %xmm0
116	pextrd $`1`, %xmm4, %eax
117	movss %xmm0, (%eax)
118	movq `8`(%esp), %rax
119	movss `56`(%esp), %xmm0
120	movss %xmm0, (%eax)
121	movss `60`(%esp), %xmm0
122	pextrd $`3`, %xmm4, %eax
123	movss %xmm0, (%eax)
124	movq `16`(%esp), %rax
125	movss `64`(%esp), %xmm0
126	movss %xmm0, (%eax)
127	movss `68`(%esp), %xmm0
128	pextrd $`1`, %xmm7, %eax
129	movss %xmm0, (%eax)
130	movq `24`(%esp), %rax
131	movss `72`(%esp), %xmm0
132	movss %xmm0, (%eax)
133	movss `76`(%esp), %xmm0
134	pextrd $`3`, %xmm7, %eax
135	movss %xmm0, (%eax)
136	addl $`88`, %esp
137	.cfi_def_cfa_offset `24`
138	popq %rbx
139	.cfi_def_cfa_offset `16`
140	popq %rbp
141	.cfi_def_cfa_offset `8`
142	ret
143	#endif
144	.endm
145
146	ENTRY (_ZGVbN4vvv_sincosf)
147	WRAPPER_IMPL_SSE2_fFF_vvv sincosf
148	END (_ZGVbN4vvv_sincosf)
149
150	#ifndef USE_MULTIARCH
151	libmvec_hidden_def (_ZGVbN4vvv_sincosf)
152	#endif
153

source code of glibc/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S