1/* Function sincosf vectorized with SSE2.
2 Copyright (C) 2014-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#include "svml_s_wrapper_impl.h"
21
22 .text
23ENTRY (_ZGVbN4vl4l4_sincosf)
24WRAPPER_IMPL_SSE2_fFF sincosf
25END (_ZGVbN4vl4l4_sincosf)
26libmvec_hidden_def (_ZGVbN4vl4l4_sincosf)
27
28/* SSE2 ISA version as wrapper to scalar (for vector
29 function declared with #pragma omp declare simd notinbranch). */
30.macro WRAPPER_IMPL_SSE2_fFF_vvv callee
31#ifndef __ILP32__
32 subq $120, %rsp
33 cfi_adjust_cfa_offset(120)
34 movaps %xmm0, 96(%rsp)
35 lea (%rsp), %rdi
36 movdqa %xmm1, 32(%rdi)
37 lea 16(%rsp), %rsi
38 movdqa %xmm2, 32(%rsi)
39 movdqa %xmm3, 48(%rsi)
40 movdqa %xmm4, 64(%rsi)
41 call JUMPTARGET(\callee)
42 movss 100(%rsp), %xmm0
43 lea 4(%rsp), %rdi
44 lea 20(%rsp), %rsi
45 call JUMPTARGET(\callee)
46 movss 104(%rsp), %xmm0
47 lea 8(%rsp), %rdi
48 lea 24(%rsp), %rsi
49 call JUMPTARGET(\callee)
50 movss 108(%rsp), %xmm0
51 lea 12(%rsp), %rdi
52 lea 28(%rsp), %rsi
53 call JUMPTARGET(\callee)
54 movq 32(%rsp), %rdx
55 movq 40(%rsp), %rsi
56 movq 48(%rsp), %r8
57 movq 56(%rsp), %r10
58 movl (%rsp), %eax
59 movl 4(%rsp), %ecx
60 movl 8(%rsp), %edi
61 movl 12(%rsp), %r9d
62 movl %eax, (%rdx)
63 movl %ecx, (%rsi)
64 movq 64(%rsp), %rax
65 movq 72(%rsp), %rcx
66 movl %edi, (%r8)
67 movl %r9d, (%r10)
68 movq 80(%rsp), %rdi
69 movq 88(%rsp), %r9
70 movl 16(%rsp), %r11d
71 movl 20(%rsp), %edx
72 movl 24(%rsp), %esi
73 movl 28(%rsp), %r8d
74 movl %r11d, (%rax)
75 movl %edx, (%rcx)
76 movl %esi, (%rdi)
77 movl %r8d, (%r9)
78 addq $120, %rsp
79 cfi_adjust_cfa_offset(-120)
80 ret
81#else
82 pushq %rbp
83 .cfi_def_cfa_offset 16
84 .cfi_offset 6, -16
85 pushq %rbx
86 .cfi_def_cfa_offset 24
87 .cfi_offset 3, -24
88 subl $88, %esp
89 .cfi_def_cfa_offset 112
90 leal 64(%rsp), %esi
91 movaps %xmm1, (%esp)
92 leal 48(%rsp), %edi
93 movaps %xmm2, 16(%esp)
94 movq %rsi, %rbp
95 movq %rdi, %rbx
96 movaps %xmm0, 32(%esp)
97 call JUMPTARGET(\callee)
98 movups 36(%esp), %xmm0
99 leal 4(%rbp), %esi
100 leal 4(%rbx), %edi
101 call JUMPTARGET(\callee)
102 movups 40(%esp), %xmm0
103 leal 8(%rbp), %esi
104 leal 8(%rbx), %edi
105 call JUMPTARGET(\callee)
106 movups 44(%esp), %xmm0
107 leal 12(%rbp), %esi
108 leal 12(%rbx), %edi
109 call JUMPTARGET(\callee)
110 movq (%esp), %rax
111 movss 48(%esp), %xmm0
112 movdqa (%esp), %xmm4
113 movdqa 16(%esp), %xmm7
114 movss %xmm0, (%eax)
115 movss 52(%esp), %xmm0
116 pextrd $1, %xmm4, %eax
117 movss %xmm0, (%eax)
118 movq 8(%esp), %rax
119 movss 56(%esp), %xmm0
120 movss %xmm0, (%eax)
121 movss 60(%esp), %xmm0
122 pextrd $3, %xmm4, %eax
123 movss %xmm0, (%eax)
124 movq 16(%esp), %rax
125 movss 64(%esp), %xmm0
126 movss %xmm0, (%eax)
127 movss 68(%esp), %xmm0
128 pextrd $1, %xmm7, %eax
129 movss %xmm0, (%eax)
130 movq 24(%esp), %rax
131 movss 72(%esp), %xmm0
132 movss %xmm0, (%eax)
133 movss 76(%esp), %xmm0
134 pextrd $3, %xmm7, %eax
135 movss %xmm0, (%eax)
136 addl $88, %esp
137 .cfi_def_cfa_offset 24
138 popq %rbx
139 .cfi_def_cfa_offset 16
140 popq %rbp
141 .cfi_def_cfa_offset 8
142 ret
143#endif
144.endm
145
146ENTRY (_ZGVbN4vvv_sincosf)
147WRAPPER_IMPL_SSE2_fFF_vvv sincosf
148END (_ZGVbN4vvv_sincosf)
149
150#ifndef USE_MULTIARCH
151 libmvec_hidden_def (_ZGVbN4vvv_sincosf)
152#endif
153

source code of glibc/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S