Warning: This file is not a C or C++ file. It does not have highlighting.
1 | /*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== |
---|---|
2 | * |
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | * See https://llvm.org/LICENSE.txt for license information. |
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | * |
7 | *===-----------------------------------------------------------------------=== |
8 | */ |
9 | |
10 | #ifndef __X86INTRIN_H |
11 | #error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead." |
12 | #endif |
13 | |
14 | #ifndef __FMA4INTRIN_H |
15 | #define __FMA4INTRIN_H |
16 | |
17 | #include <pmmintrin.h> |
18 | |
19 | /* Define the default attributes for the functions in this file. */ |
20 | #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128))) |
21 | #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256))) |
22 | |
23 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
24 | _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) |
25 | { |
26 | return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
27 | } |
28 | |
29 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
30 | _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) |
31 | { |
32 | return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); |
33 | } |
34 | |
35 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
36 | _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) |
37 | { |
38 | return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
39 | } |
40 | |
41 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
42 | _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) |
43 | { |
44 | return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); |
45 | } |
46 | |
47 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
48 | _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) |
49 | { |
50 | return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
51 | } |
52 | |
53 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
54 | _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) |
55 | { |
56 | return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); |
57 | } |
58 | |
59 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
60 | _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) |
61 | { |
62 | return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
63 | } |
64 | |
65 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
66 | _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) |
67 | { |
68 | return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C); |
69 | } |
70 | |
71 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
72 | _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) |
73 | { |
74 | return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
75 | } |
76 | |
77 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
78 | _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) |
79 | { |
80 | return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); |
81 | } |
82 | |
83 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
84 | _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) |
85 | { |
86 | return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
87 | } |
88 | |
89 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
90 | _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) |
91 | { |
92 | return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); |
93 | } |
94 | |
95 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
96 | _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) |
97 | { |
98 | return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
99 | } |
100 | |
101 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
102 | _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) |
103 | { |
104 | return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); |
105 | } |
106 | |
107 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
108 | _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) |
109 | { |
110 | return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
111 | } |
112 | |
113 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
114 | _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) |
115 | { |
116 | return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); |
117 | } |
118 | |
119 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
120 | _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) |
121 | { |
122 | return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
123 | } |
124 | |
125 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
126 | _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) |
127 | { |
128 | return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); |
129 | } |
130 | |
131 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
132 | _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) |
133 | { |
134 | return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
135 | } |
136 | |
137 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
138 | _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) |
139 | { |
140 | return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); |
141 | } |
142 | |
143 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
144 | _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) |
145 | { |
146 | return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); |
147 | } |
148 | |
149 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
150 | _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) |
151 | { |
152 | return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); |
153 | } |
154 | |
155 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
156 | _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) |
157 | { |
158 | return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); |
159 | } |
160 | |
161 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
162 | _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) |
163 | { |
164 | return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); |
165 | } |
166 | |
167 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
168 | _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) |
169 | { |
170 | return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); |
171 | } |
172 | |
173 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
174 | _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) |
175 | { |
176 | return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); |
177 | } |
178 | |
179 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
180 | _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) |
181 | { |
182 | return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); |
183 | } |
184 | |
185 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
186 | _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) |
187 | { |
188 | return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); |
189 | } |
190 | |
191 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
192 | _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) |
193 | { |
194 | return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); |
195 | } |
196 | |
197 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
198 | _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) |
199 | { |
200 | return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); |
201 | } |
202 | |
203 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
204 | _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) |
205 | { |
206 | return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); |
207 | } |
208 | |
209 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
210 | _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) |
211 | { |
212 | return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); |
213 | } |
214 | |
215 | #undef __DEFAULT_FN_ATTRS128 |
216 | #undef __DEFAULT_FN_ATTRS256 |
217 | |
218 | #endif /* __FMA4INTRIN_H */ |
219 |
Warning: This file is not a C or C++ file. It does not have highlighting.