1/* Function sincos vectorized in AVX ISA as wrapper to SSE4 ISA version.
2 Copyright (C) 2014-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#include "svml_d_wrapper_impl.h"
21
22 .text
23ENTRY (_ZGVcN4vl8l8_sincos)
24WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos
25END (_ZGVcN4vl8l8_sincos)
26
27/* AVX ISA version as wrapper to SSE ISA version (for vector
28 function declared with #pragma omp declare simd notinbranch). */
29.macro WRAPPER_IMPL_AVX_fFF_vvv callee
30#ifndef __ILP32__
31 pushq %rbp
32 movq %rsp, %rbp
33 andq $-32, %rsp
34 subq $160, %rsp
35 vmovupd %ymm0, 64(%rsp)
36 lea (%rsp), %rdi
37 vmovdqu %xmm1, 96(%rdi)
38 vmovdqu %xmm2, 112(%rdi)
39 vmovdqu %xmm3, 128(%rdi)
40 vmovdqu %xmm4, 144(%rdi)
41 lea 32(%rsp), %rsi
42 vzeroupper
43 call HIDDEN_JUMPTARGET(\callee)
44 vmovdqu 80(%rsp), %xmm0
45 lea 16(%rsp), %rdi
46 lea 48(%rsp), %rsi
47 call HIDDEN_JUMPTARGET(\callee)
48 movq 96(%rsp), %rdx
49 movq 104(%rsp), %rsi
50 movq 112(%rsp), %r8
51 movq 120(%rsp), %r10
52 movq (%rsp), %rax
53 movq 8(%rsp), %rcx
54 movq 16(%rsp), %rdi
55 movq 24(%rsp), %r9
56 movq %rax, (%rdx)
57 movq %rcx, (%rsi)
58 movq 128(%rsp), %rax
59 movq 136(%rsp), %rcx
60 movq %rdi, (%r8)
61 movq %r9, (%r10)
62 movq 144(%rsp), %rdi
63 movq 152(%rsp), %r9
64 movq 32(%rsp), %r11
65 movq 40(%rsp), %rdx
66 movq 48(%rsp), %rsi
67 movq 56(%rsp), %r8
68 movq %r11, (%rax)
69 movq %rdx, (%rcx)
70 movq %rsi, (%rdi)
71 movq %r8, (%r9)
72 movq %rbp, %rsp
73 popq %rbp
74 ret
75#else
76 leal 8(%rsp), %r10d
77 .cfi_def_cfa 10, 0
78 andl $-32, %esp
79 pushq -8(%r10d)
80 pushq %rbp
81 .cfi_escape 0x10,0x6,0x2,0x76,0
82 movl %esp, %ebp
83 pushq %r12
84 leal -80(%rbp), %esi
85 pushq %r10
86 .cfi_escape 0xf,0x3,0x76,0x70,0x6
87 .cfi_escape 0x10,0xc,0x2,0x76,0x78
88 leal -112(%rbp), %edi
89 movq %rsi, %r12
90 pushq %rbx
91 .cfi_escape 0x10,0x3,0x2,0x76,0x68
92 movq %rdi, %rbx
93 subl $152, %esp
94 vmovaps %xmm1, -128(%ebp)
95 vmovaps %xmm2, -144(%ebp)
96 vmovapd %ymm0, -176(%ebp)
97 vzeroupper
98 call HIDDEN_JUMPTARGET(\callee)
99 leal 16(%r12), %esi
100 vmovupd -160(%ebp), %xmm0
101 leal 16(%rbx), %edi
102 call HIDDEN_JUMPTARGET(\callee)
103 movq -128(%ebp), %rax
104 vmovsd -112(%ebp), %xmm0
105 vmovdqa -128(%ebp), %xmm5
106 vmovdqa -144(%ebp), %xmm1
107 vmovsd %xmm0, (%eax)
108 vmovsd -104(%ebp), %xmm0
109 vpextrd $1, %xmm5, %eax
110 vmovsd %xmm0, (%eax)
111 movq -120(%ebp), %rax
112 vmovsd -96(%ebp), %xmm0
113 vmovsd %xmm0, (%eax)
114 vmovsd -88(%ebp), %xmm0
115 vpextrd $3, %xmm5, %eax
116 vmovsd %xmm0, (%eax)
117 movq -144(%ebp), %rax
118 vmovsd -80(%ebp), %xmm0
119 vmovsd %xmm0, (%eax)
120 vmovsd -72(%ebp), %xmm0
121 vpextrd $1, %xmm1, %eax
122 vmovsd %xmm0, (%eax)
123 movq -136(%ebp), %rax
124 vmovsd -64(%ebp), %xmm0
125 vmovsd %xmm0, (%eax)
126 vmovsd -56(%ebp), %xmm0
127 vpextrd $3, %xmm1, %eax
128 vmovsd %xmm0, (%eax)
129 addl $152, %esp
130 popq %rbx
131 popq %r10
132 .cfi_def_cfa 10, 0
133 popq %r12
134 popq %rbp
135 leal -8(%r10), %esp
136 .cfi_def_cfa 7, 8
137 ret
138#endif
139.endm
140
141ENTRY (_ZGVcN4vvv_sincos)
142WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN2vl8l8_sincos
143END (_ZGVcN4vvv_sincos)
144

source code of glibc/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S