Warning: This file is not a C or C++ file. It does not have highlighting.
1 | /*===---- fmaintrin.h - FMA intrinsics -------------------------------------=== |
---|---|
2 | * |
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | * See https://llvm.org/LICENSE.txt for license information. |
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | * |
7 | *===-----------------------------------------------------------------------=== |
8 | */ |
9 | |
10 | #ifndef __IMMINTRIN_H |
11 | #error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." |
12 | #endif |
13 | |
14 | #ifndef __FMAINTRIN_H |
15 | #define __FMAINTRIN_H |
16 | |
17 | /* Define the default attributes for the functions in this file. */ |
18 | #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) |
19 | #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) |
20 | |
21 | /// Computes a multiply-add of 128-bit vectors of [4 x float]. |
22 | /// For each element, computes <c> (__A * __B) + __C </c>. |
23 | /// |
24 | /// \headerfile <immintrin.h> |
25 | /// |
26 | /// This intrinsic corresponds to the \c VFMADD213PS instruction. |
27 | /// |
28 | /// \param __A |
29 | /// A 128-bit vector of [4 x float] containing the multiplicand. |
30 | /// \param __B |
31 | /// A 128-bit vector of [4 x float] containing the multiplier. |
32 | /// \param __C |
33 | /// A 128-bit vector of [4 x float] containing the addend. |
34 | /// \returns A 128-bit vector of [4 x float] containing the result. |
35 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
36 | _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) |
37 | { |
38 | return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
39 | } |
40 | |
41 | /// Computes a multiply-add of 128-bit vectors of [2 x double]. |
42 | /// For each element, computes <c> (__A * __B) + __C </c>. |
43 | /// |
44 | /// \headerfile <immintrin.h> |
45 | /// |
46 | /// This intrinsic corresponds to the \c VFMADD213PD instruction. |
47 | /// |
48 | /// \param __A |
49 | /// A 128-bit vector of [2 x double] containing the multiplicand. |
50 | /// \param __B |
51 | /// A 128-bit vector of [2 x double] containing the multiplier. |
52 | /// \param __C |
53 | /// A 128-bit vector of [2 x double] containing the addend. |
54 | /// \returns A 128-bit [2 x double] vector containing the result. |
55 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
56 | _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) |
57 | { |
58 | return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); |
59 | } |
60 | |
61 | /// Computes a scalar multiply-add of the single-precision values in the |
62 | /// low 32 bits of 128-bit vectors of [4 x float]. |
63 | /// \code |
64 | /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0] |
65 | /// result[127:32] = __A[127:32] |
66 | /// \endcode |
67 | /// |
68 | /// \headerfile <immintrin.h> |
69 | /// |
70 | /// This intrinsic corresponds to the \c VFMADD213SS instruction. |
71 | /// |
72 | /// \param __A |
73 | /// A 128-bit vector of [4 x float] containing the multiplicand in the low |
74 | /// 32 bits. |
75 | /// \param __B |
76 | /// A 128-bit vector of [4 x float] containing the multiplier in the low |
77 | /// 32 bits. |
78 | /// \param __C |
79 | /// A 128-bit vector of [4 x float] containing the addend in the low |
80 | /// 32 bits. |
81 | /// \returns A 128-bit vector of [4 x float] containing the result in the low |
82 | /// 32 bits and a copy of \a __A[127:32] in the upper 96 bits. |
83 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
84 | _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) |
85 | { |
86 | return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
87 | } |
88 | |
89 | /// Computes a scalar multiply-add of the double-precision values in the |
90 | /// low 64 bits of 128-bit vectors of [2 x double]. |
91 | /// \code |
92 | /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0] |
93 | /// result[127:64] = __A[127:64] |
94 | /// \endcode |
95 | /// |
96 | /// \headerfile <immintrin.h> |
97 | /// |
98 | /// This intrinsic corresponds to the \c VFMADD213SD instruction. |
99 | /// |
100 | /// \param __A |
101 | /// A 128-bit vector of [2 x double] containing the multiplicand in the low |
102 | /// 64 bits. |
103 | /// \param __B |
104 | /// A 128-bit vector of [2 x double] containing the multiplier in the low |
105 | /// 64 bits. |
106 | /// \param __C |
107 | /// A 128-bit vector of [2 x double] containing the addend in the low |
108 | /// 64 bits. |
109 | /// \returns A 128-bit vector of [2 x double] containing the result in the low |
110 | /// 64 bits and a copy of \a __A[127:64] in the upper 64 bits. |
111 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
112 | _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) |
113 | { |
114 | return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); |
115 | } |
116 | |
117 | /// Computes a multiply-subtract of 128-bit vectors of [4 x float]. |
118 | /// For each element, computes <c> (__A * __B) - __C </c>. |
119 | /// |
120 | /// \headerfile <immintrin.h> |
121 | /// |
122 | /// This intrinsic corresponds to the \c VFMSUB213PS instruction. |
123 | /// |
124 | /// \param __A |
125 | /// A 128-bit vector of [4 x float] containing the multiplicand. |
126 | /// \param __B |
127 | /// A 128-bit vector of [4 x float] containing the multiplier. |
128 | /// \param __C |
129 | /// A 128-bit vector of [4 x float] containing the subtrahend. |
130 | /// \returns A 128-bit vector of [4 x float] containing the result. |
131 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
132 | _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) |
133 | { |
134 | return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
135 | } |
136 | |
137 | /// Computes a multiply-subtract of 128-bit vectors of [2 x double]. |
138 | /// For each element, computes <c> (__A * __B) - __C </c>. |
139 | /// |
140 | /// \headerfile <immintrin.h> |
141 | /// |
142 | /// This intrinsic corresponds to the \c VFMSUB213PD instruction. |
143 | /// |
144 | /// \param __A |
145 | /// A 128-bit vector of [2 x double] containing the multiplicand. |
146 | /// \param __B |
147 | /// A 128-bit vector of [2 x double] containing the multiplier. |
148 | /// \param __C |
149 | /// A 128-bit vector of [2 x double] containing the addend. |
150 | /// \returns A 128-bit vector of [2 x double] containing the result. |
151 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
152 | _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) |
153 | { |
154 | return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); |
155 | } |
156 | |
157 | /// Computes a scalar multiply-subtract of the single-precision values in |
158 | /// the low 32 bits of 128-bit vectors of [4 x float]. |
159 | /// \code |
160 | /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0] |
161 | /// result[127:32] = __A[127:32] |
162 | /// \endcode |
163 | /// |
164 | /// \headerfile <immintrin.h> |
165 | /// |
166 | /// This intrinsic corresponds to the \c VFMSUB213SS instruction. |
167 | /// |
168 | /// \param __A |
169 | /// A 128-bit vector of [4 x float] containing the multiplicand in the low |
170 | /// 32 bits. |
171 | /// \param __B |
172 | /// A 128-bit vector of [4 x float] containing the multiplier in the low |
173 | /// 32 bits. |
174 | /// \param __C |
175 | /// A 128-bit vector of [4 x float] containing the subtrahend in the low |
176 | /// 32 bits. |
177 | /// \returns A 128-bit vector of [4 x float] containing the result in the low |
178 | /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. |
179 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
180 | _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) |
181 | { |
182 | return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
183 | } |
184 | |
185 | /// Computes a scalar multiply-subtract of the double-precision values in |
186 | /// the low 64 bits of 128-bit vectors of [2 x double]. |
187 | /// \code |
188 | /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0] |
189 | /// result[127:64] = __A[127:64] |
190 | /// \endcode |
191 | /// |
192 | /// \headerfile <immintrin.h> |
193 | /// |
194 | /// This intrinsic corresponds to the \c VFMSUB213SD instruction. |
195 | /// |
196 | /// \param __A |
197 | /// A 128-bit vector of [2 x double] containing the multiplicand in the low |
198 | /// 64 bits. |
199 | /// \param __B |
200 | /// A 128-bit vector of [2 x double] containing the multiplier in the low |
201 | /// 64 bits. |
202 | /// \param __C |
203 | /// A 128-bit vector of [2 x double] containing the subtrahend in the low |
204 | /// 64 bits. |
205 | /// \returns A 128-bit vector of [2 x double] containing the result in the low |
206 | /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. |
207 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
208 | _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) |
209 | { |
210 | return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C); |
211 | } |
212 | |
213 | /// Computes a negated multiply-add of 128-bit vectors of [4 x float]. |
214 | /// For each element, computes <c> -(__A * __B) + __C </c>. |
215 | /// |
216 | /// \headerfile <immintrin.h> |
217 | /// |
218 | /// This intrinsic corresponds to the \c VFNMADD213DPS instruction. |
219 | /// |
220 | /// \param __A |
221 | /// A 128-bit vector of [4 x float] containing the multiplicand. |
222 | /// \param __B |
223 | /// A 128-bit vector of [4 x float] containing the multiplier. |
224 | /// \param __C |
225 | /// A 128-bit vector of [4 x float] containing the addend. |
226 | /// \returns A 128-bit [4 x float] vector containing the result. |
227 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
228 | _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) |
229 | { |
230 | return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
231 | } |
232 | |
233 | /// Computes a negated multiply-add of 128-bit vectors of [2 x double]. |
234 | /// For each element, computes <c> -(__A * __B) + __C </c>. |
235 | /// |
236 | /// \headerfile <immintrin.h> |
237 | /// |
238 | /// This intrinsic corresponds to the \c VFNMADD213PD instruction. |
239 | /// |
240 | /// \param __A |
241 | /// A 128-bit vector of [2 x double] containing the multiplicand. |
242 | /// \param __B |
243 | /// A 128-bit vector of [2 x double] containing the multiplier. |
244 | /// \param __C |
245 | /// A 128-bit vector of [2 x double] containing the addend. |
246 | /// \returns A 128-bit vector of [2 x double] containing the result. |
247 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
248 | _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) |
249 | { |
250 | return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); |
251 | } |
252 | |
253 | /// Computes a scalar negated multiply-add of the single-precision values in |
254 | /// the low 32 bits of 128-bit vectors of [4 x float]. |
255 | /// \code |
256 | /// result[31:0] = -(__A[31:0] * __B[31:0]) + __C[31:0] |
257 | /// result[127:32] = __A[127:32] |
258 | /// \endcode |
259 | /// |
260 | /// \headerfile <immintrin.h> |
261 | /// |
262 | /// This intrinsic corresponds to the \c VFNMADD213SS instruction. |
263 | /// |
264 | /// \param __A |
265 | /// A 128-bit vector of [4 x float] containing the multiplicand in the low |
266 | /// 32 bits. |
267 | /// \param __B |
268 | /// A 128-bit vector of [4 x float] containing the multiplier in the low |
269 | /// 32 bits. |
270 | /// \param __C |
271 | /// A 128-bit vector of [4 x float] containing the addend in the low |
272 | /// 32 bits. |
273 | /// \returns A 128-bit vector of [4 x float] containing the result in the low |
274 | /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. |
275 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
276 | _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) |
277 | { |
278 | return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); |
279 | } |
280 | |
281 | /// Computes a scalar negated multiply-add of the double-precision values |
282 | /// in the low 64 bits of 128-bit vectors of [2 x double]. |
283 | /// \code |
284 | /// result[63:0] = -(__A[63:0] * __B[63:0]) + __C[63:0] |
285 | /// result[127:64] = __A[127:64] |
286 | /// \endcode |
287 | /// |
288 | /// \headerfile <immintrin.h> |
289 | /// |
290 | /// This intrinsic corresponds to the \c VFNMADD213SD instruction. |
291 | /// |
292 | /// \param __A |
293 | /// A 128-bit vector of [2 x double] containing the multiplicand in the low |
294 | /// 64 bits. |
295 | /// \param __B |
296 | /// A 128-bit vector of [2 x double] containing the multiplier in the low |
297 | /// 64 bits. |
298 | /// \param __C |
299 | /// A 128-bit vector of [2 x double] containing the addend in the low |
300 | /// 64 bits. |
301 | /// \returns A 128-bit vector of [2 x double] containing the result in the low |
302 | /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. |
303 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
304 | _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) |
305 | { |
306 | return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C); |
307 | } |
308 | |
309 | /// Computes a negated multiply-subtract of 128-bit vectors of [4 x float]. |
310 | /// For each element, computes <c> -(__A * __B) - __C </c>. |
311 | /// |
312 | /// \headerfile <immintrin.h> |
313 | /// |
314 | /// This intrinsic corresponds to the \c VFNMSUB213PS instruction. |
315 | /// |
316 | /// \param __A |
317 | /// A 128-bit vector of [4 x float] containing the multiplicand. |
318 | /// \param __B |
319 | /// A 128-bit vector of [4 x float] containing the multiplier. |
320 | /// \param __C |
321 | /// A 128-bit vector of [4 x float] containing the subtrahend. |
322 | /// \returns A 128-bit vector of [4 x float] containing the result. |
323 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
324 | _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) |
325 | { |
326 | return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
327 | } |
328 | |
329 | /// Computes a negated multiply-subtract of 128-bit vectors of [2 x double]. |
330 | /// For each element, computes <c> -(__A * __B) - __C </c>. |
331 | /// |
332 | /// \headerfile <immintrin.h> |
333 | /// |
334 | /// This intrinsic corresponds to the \c VFNMSUB213PD instruction. |
335 | /// |
336 | /// \param __A |
337 | /// A 128-bit vector of [2 x double] containing the multiplicand. |
338 | /// \param __B |
339 | /// A 128-bit vector of [2 x double] containing the multiplier. |
340 | /// \param __C |
341 | /// A 128-bit vector of [2 x double] containing the subtrahend. |
342 | /// \returns A 128-bit vector of [2 x double] containing the result. |
343 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
344 | _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) |
345 | { |
346 | return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); |
347 | } |
348 | |
349 | /// Computes a scalar negated multiply-subtract of the single-precision |
350 | /// values in the low 32 bits of 128-bit vectors of [4 x float]. |
351 | /// \code |
352 | /// result[31:0] = -(__A[31:0] * __B[31:0]) - __C[31:0] |
353 | /// result[127:32] = __A[127:32] |
354 | /// \endcode |
355 | /// |
356 | /// \headerfile <immintrin.h> |
357 | /// |
358 | /// This intrinsic corresponds to the \c VFNMSUB213SS instruction. |
359 | /// |
360 | /// \param __A |
361 | /// A 128-bit vector of [4 x float] containing the multiplicand in the low |
362 | /// 32 bits. |
363 | /// \param __B |
364 | /// A 128-bit vector of [4 x float] containing the multiplier in the low |
365 | /// 32 bits. |
366 | /// \param __C |
367 | /// A 128-bit vector of [4 x float] containing the subtrahend in the low |
368 | /// 32 bits. |
369 | /// \returns A 128-bit vector of [4 x float] containing the result in the low |
370 | /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. |
371 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
372 | _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) |
373 | { |
374 | return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); |
375 | } |
376 | |
377 | /// Computes a scalar negated multiply-subtract of the double-precision |
378 | /// values in the low 64 bits of 128-bit vectors of [2 x double]. |
379 | /// \code |
380 | /// result[63:0] = -(__A[63:0] * __B[63:0]) - __C[63:0] |
381 | /// result[127:64] = __A[127:64] |
382 | /// \endcode |
383 | /// |
384 | /// \headerfile <immintrin.h> |
385 | /// |
386 | /// This intrinsic corresponds to the \c VFNMSUB213SD instruction. |
387 | /// |
388 | /// \param __A |
389 | /// A 128-bit vector of [2 x double] containing the multiplicand in the low |
390 | /// 64 bits. |
391 | /// \param __B |
392 | /// A 128-bit vector of [2 x double] containing the multiplier in the low |
393 | /// 64 bits. |
394 | /// \param __C |
395 | /// A 128-bit vector of [2 x double] containing the subtrahend in the low |
396 | /// 64 bits. |
397 | /// \returns A 128-bit vector of [2 x double] containing the result in the low |
398 | /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. |
399 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
400 | _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) |
401 | { |
402 | return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C); |
403 | } |
404 | |
405 | /// Computes a multiply with alternating add/subtract of 128-bit vectors of |
406 | /// [4 x float]. |
407 | /// \code |
408 | /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0] |
409 | /// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32] |
410 | /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64] |
411 | /// result[127:96] = (__A[127:96] * __B[127:96]) + __C[127:96] |
412 | /// \endcode |
413 | /// |
414 | /// \headerfile <immintrin.h> |
415 | /// |
416 | /// This intrinsic corresponds to the \c VFMADDSUB213PS instruction. |
417 | /// |
418 | /// \param __A |
419 | /// A 128-bit vector of [4 x float] containing the multiplicand. |
420 | /// \param __B |
421 | /// A 128-bit vector of [4 x float] containing the multiplier. |
422 | /// \param __C |
423 | /// A 128-bit vector of [4 x float] containing the addend/subtrahend. |
424 | /// \returns A 128-bit vector of [4 x float] containing the result. |
425 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
426 | _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) |
427 | { |
428 | return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); |
429 | } |
430 | |
431 | /// Computes a multiply with alternating add/subtract of 128-bit vectors of |
432 | /// [2 x double]. |
433 | /// \code |
434 | /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0] |
435 | /// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64] |
436 | /// \endcode |
437 | /// |
438 | /// \headerfile <immintrin.h> |
439 | /// |
440 | /// This intrinsic corresponds to the \c VFMADDSUB213PD instruction. |
441 | /// |
442 | /// \param __A |
443 | /// A 128-bit vector of [2 x double] containing the multiplicand. |
444 | /// \param __B |
445 | /// A 128-bit vector of [2 x double] containing the multiplier. |
446 | /// \param __C |
447 | /// A 128-bit vector of [2 x double] containing the addend/subtrahend. |
448 | /// \returns A 128-bit vector of [2 x double] containing the result. |
449 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
450 | _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) |
451 | { |
452 | return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); |
453 | } |
454 | |
455 | /// Computes a multiply with alternating add/subtract of 128-bit vectors of |
456 | /// [4 x float]. |
457 | /// \code |
458 | /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0] |
459 | /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32] |
460 | /// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64] |
461 | /// result[127:96 = (__A[127:96] * __B[127:96]) - __C[127:96] |
462 | /// \endcode |
463 | /// |
464 | /// \headerfile <immintrin.h> |
465 | /// |
466 | /// This intrinsic corresponds to the \c VFMSUBADD213PS instruction. |
467 | /// |
468 | /// \param __A |
469 | /// A 128-bit vector of [4 x float] containing the multiplicand. |
470 | /// \param __B |
471 | /// A 128-bit vector of [4 x float] containing the multiplier. |
472 | /// \param __C |
473 | /// A 128-bit vector of [4 x float] containing the addend/subtrahend. |
474 | /// \returns A 128-bit vector of [4 x float] containing the result. |
475 | static __inline__ __m128 __DEFAULT_FN_ATTRS128 |
476 | _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) |
477 | { |
478 | return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); |
479 | } |
480 | |
481 | /// Computes a multiply with alternating add/subtract of 128-bit vectors of |
482 | /// [2 x double]. |
483 | /// \code |
484 | /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0] |
485 | /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64] |
486 | /// \endcode |
487 | /// |
488 | /// \headerfile <immintrin.h> |
489 | /// |
490 | /// This intrinsic corresponds to the \c VFMADDSUB213PD instruction. |
491 | /// |
492 | /// \param __A |
493 | /// A 128-bit vector of [2 x double] containing the multiplicand. |
494 | /// \param __B |
495 | /// A 128-bit vector of [2 x double] containing the multiplier. |
496 | /// \param __C |
497 | /// A 128-bit vector of [2 x double] containing the addend/subtrahend. |
498 | /// \returns A 128-bit vector of [2 x double] containing the result. |
499 | static __inline__ __m128d __DEFAULT_FN_ATTRS128 |
500 | _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) |
501 | { |
502 | return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); |
503 | } |
504 | |
505 | /// Computes a multiply-add of 256-bit vectors of [8 x float]. |
506 | /// For each element, computes <c> (__A * __B) + __C </c>. |
507 | /// |
508 | /// \headerfile <immintrin.h> |
509 | /// |
510 | /// This intrinsic corresponds to the \c VFMADD213PS instruction. |
511 | /// |
512 | /// \param __A |
513 | /// A 256-bit vector of [8 x float] containing the multiplicand. |
514 | /// \param __B |
515 | /// A 256-bit vector of [8 x float] containing the multiplier. |
516 | /// \param __C |
517 | /// A 256-bit vector of [8 x float] containing the addend. |
518 | /// \returns A 256-bit vector of [8 x float] containing the result. |
519 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
520 | _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) |
521 | { |
522 | return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); |
523 | } |
524 | |
525 | /// Computes a multiply-add of 256-bit vectors of [4 x double]. |
526 | /// For each element, computes <c> (__A * __B) + __C </c>. |
527 | /// |
528 | /// \headerfile <immintrin.h> |
529 | /// |
530 | /// This intrinsic corresponds to the \c VFMADD213PD instruction. |
531 | /// |
532 | /// \param __A |
533 | /// A 256-bit vector of [4 x double] containing the multiplicand. |
534 | /// \param __B |
535 | /// A 256-bit vector of [4 x double] containing the multiplier. |
536 | /// \param __C |
537 | /// A 256-bit vector of [4 x double] containing the addend. |
538 | /// \returns A 256-bit vector of [4 x double] containing the result. |
539 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
540 | _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) |
541 | { |
542 | return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); |
543 | } |
544 | |
545 | /// Computes a multiply-subtract of 256-bit vectors of [8 x float]. |
546 | /// For each element, computes <c> (__A * __B) - __C </c>. |
547 | /// |
548 | /// \headerfile <immintrin.h> |
549 | /// |
550 | /// This intrinsic corresponds to the \c VFMSUB213PS instruction. |
551 | /// |
552 | /// \param __A |
553 | /// A 256-bit vector of [8 x float] containing the multiplicand. |
554 | /// \param __B |
555 | /// A 256-bit vector of [8 x float] containing the multiplier. |
556 | /// \param __C |
557 | /// A 256-bit vector of [8 x float] containing the subtrahend. |
558 | /// \returns A 256-bit vector of [8 x float] containing the result. |
559 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
560 | _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) |
561 | { |
562 | return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); |
563 | } |
564 | |
565 | /// Computes a multiply-subtract of 256-bit vectors of [4 x double]. |
566 | /// For each element, computes <c> (__A * __B) - __C </c>. |
567 | /// |
568 | /// \headerfile <immintrin.h> |
569 | /// |
570 | /// This intrinsic corresponds to the \c VFMSUB213PD instruction. |
571 | /// |
572 | /// \param __A |
573 | /// A 256-bit vector of [4 x double] containing the multiplicand. |
574 | /// \param __B |
575 | /// A 256-bit vector of [4 x double] containing the multiplier. |
576 | /// \param __C |
577 | /// A 256-bit vector of [4 x double] containing the subtrahend. |
578 | /// \returns A 256-bit vector of [4 x double] containing the result. |
579 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
580 | _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) |
581 | { |
582 | return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); |
583 | } |
584 | |
585 | /// Computes a negated multiply-add of 256-bit vectors of [8 x float]. |
586 | /// For each element, computes <c> -(__A * __B) + __C </c>. |
587 | /// |
588 | /// \headerfile <immintrin.h> |
589 | /// |
590 | /// This intrinsic corresponds to the \c VFNMADD213PS instruction. |
591 | /// |
592 | /// \param __A |
593 | /// A 256-bit vector of [8 x float] containing the multiplicand. |
594 | /// \param __B |
595 | /// A 256-bit vector of [8 x float] containing the multiplier. |
596 | /// \param __C |
597 | /// A 256-bit vector of [8 x float] containing the addend. |
598 | /// \returns A 256-bit vector of [8 x float] containing the result. |
599 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
600 | _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) |
601 | { |
602 | return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); |
603 | } |
604 | |
605 | /// Computes a negated multiply-add of 256-bit vectors of [4 x double]. |
606 | /// For each element, computes <c> -(__A * __B) + __C </c>. |
607 | /// |
608 | /// \headerfile <immintrin.h> |
609 | /// |
610 | /// This intrinsic corresponds to the \c VFNMADD213PD instruction. |
611 | /// |
612 | /// \param __A |
613 | /// A 256-bit vector of [4 x double] containing the multiplicand. |
614 | /// \param __B |
615 | /// A 256-bit vector of [4 x double] containing the multiplier. |
616 | /// \param __C |
617 | /// A 256-bit vector of [4 x double] containing the addend. |
618 | /// \returns A 256-bit vector of [4 x double] containing the result. |
619 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
620 | _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) |
621 | { |
622 | return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); |
623 | } |
624 | |
625 | /// Computes a negated multiply-subtract of 256-bit vectors of [8 x float]. |
626 | /// For each element, computes <c> -(__A * __B) - __C </c>. |
627 | /// |
628 | /// \headerfile <immintrin.h> |
629 | /// |
630 | /// This intrinsic corresponds to the \c VFNMSUB213PS instruction. |
631 | /// |
632 | /// \param __A |
633 | /// A 256-bit vector of [8 x float] containing the multiplicand. |
634 | /// \param __B |
635 | /// A 256-bit vector of [8 x float] containing the multiplier. |
636 | /// \param __C |
637 | /// A 256-bit vector of [8 x float] containing the subtrahend. |
638 | /// \returns A 256-bit vector of [8 x float] containing the result. |
639 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
640 | _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) |
641 | { |
642 | return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); |
643 | } |
644 | |
645 | /// Computes a negated multiply-subtract of 256-bit vectors of [4 x double]. |
646 | /// For each element, computes <c> -(__A * __B) - __C </c>. |
647 | /// |
648 | /// \headerfile <immintrin.h> |
649 | /// |
650 | /// This intrinsic corresponds to the \c VFNMSUB213PD instruction. |
651 | /// |
652 | /// \param __A |
653 | /// A 256-bit vector of [4 x double] containing the multiplicand. |
654 | /// \param __B |
655 | /// A 256-bit vector of [4 x double] containing the multiplier. |
656 | /// \param __C |
657 | /// A 256-bit vector of [4 x double] containing the subtrahend. |
658 | /// \returns A 256-bit vector of [4 x double] containing the result. |
659 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
660 | _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) |
661 | { |
662 | return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); |
663 | } |
664 | |
665 | /// Computes a multiply with alternating add/subtract of 256-bit vectors of |
666 | /// [8 x float]. |
667 | /// \code |
668 | /// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0] |
669 | /// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32] |
670 | /// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64] |
671 | /// result[127:96] = (__A[127:96] * __B[127:96]) + __C[127:96] |
672 | /// result[159:128] = (__A[159:128] * __B[159:128]) - __C[159:128] |
673 | /// result[191:160] = (__A[191:160] * __B[191:160]) + __C[191:160] |
674 | /// result[223:192] = (__A[223:192] * __B[223:192]) - __C[223:192] |
675 | /// result[255:224] = (__A[255:224] * __B[255:224]) + __C[255:224] |
676 | /// \endcode |
677 | /// |
678 | /// \headerfile <immintrin.h> |
679 | /// |
680 | /// This intrinsic corresponds to the \c VFMADDSUB213PS instruction. |
681 | /// |
682 | /// \param __A |
683 | /// A 256-bit vector of [8 x float] containing the multiplicand. |
684 | /// \param __B |
685 | /// A 256-bit vector of [8 x float] containing the multiplier. |
686 | /// \param __C |
687 | /// A 256-bit vector of [8 x float] containing the addend/subtrahend. |
688 | /// \returns A 256-bit vector of [8 x float] containing the result. |
689 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
690 | _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) |
691 | { |
692 | return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); |
693 | } |
694 | |
695 | /// Computes a multiply with alternating add/subtract of 256-bit vectors of |
696 | /// [4 x double]. |
697 | /// \code |
698 | /// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0] |
699 | /// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64] |
700 | /// result[191:128] = (__A[191:128] * __B[191:128]) - __C[191:128] |
701 | /// result[255:192] = (__A[255:192] * __B[255:192]) + __C[255:192] |
702 | /// \endcode |
703 | /// |
704 | /// \headerfile <immintrin.h> |
705 | /// |
706 | /// This intrinsic corresponds to the \c VFMADDSUB213PD instruction. |
707 | /// |
708 | /// \param __A |
709 | /// A 256-bit vector of [4 x double] containing the multiplicand. |
710 | /// \param __B |
711 | /// A 256-bit vector of [4 x double] containing the multiplier. |
712 | /// \param __C |
713 | /// A 256-bit vector of [4 x double] containing the addend/subtrahend. |
714 | /// \returns A 256-bit vector of [4 x double] containing the result. |
715 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
716 | _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) |
717 | { |
718 | return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); |
719 | } |
720 | |
721 | /// Computes a vector multiply with alternating add/subtract of 256-bit |
722 | /// vectors of [8 x float]. |
723 | /// \code |
724 | /// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0] |
725 | /// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32] |
726 | /// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64] |
727 | /// result[127:96] = (__A[127:96] * __B[127:96]) - __C[127:96] |
728 | /// result[159:128] = (__A[159:128] * __B[159:128]) + __C[159:128] |
729 | /// result[191:160] = (__A[191:160] * __B[191:160]) - __C[191:160] |
730 | /// result[223:192] = (__A[223:192] * __B[223:192]) + __C[223:192] |
731 | /// result[255:224] = (__A[255:224] * __B[255:224]) - __C[255:224] |
732 | /// \endcode |
733 | /// |
734 | /// \headerfile <immintrin.h> |
735 | /// |
736 | /// This intrinsic corresponds to the \c VFMSUBADD213PS instruction. |
737 | /// |
738 | /// \param __A |
739 | /// A 256-bit vector of [8 x float] containing the multiplicand. |
740 | /// \param __B |
741 | /// A 256-bit vector of [8 x float] containing the multiplier. |
742 | /// \param __C |
743 | /// A 256-bit vector of [8 x float] containing the addend/subtrahend. |
744 | /// \returns A 256-bit vector of [8 x float] containing the result. |
745 | static __inline__ __m256 __DEFAULT_FN_ATTRS256 |
746 | _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) |
747 | { |
748 | return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); |
749 | } |
750 | |
751 | /// Computes a vector multiply with alternating add/subtract of 256-bit |
752 | /// vectors of [4 x double]. |
753 | /// \code |
754 | /// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0] |
755 | /// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64] |
756 | /// result[191:128] = (__A[191:128] * __B[191:128]) + __C[191:128] |
757 | /// result[255:192] = (__A[255:192] * __B[255:192]) - __C[255:192] |
758 | /// \endcode |
759 | /// |
760 | /// \headerfile <immintrin.h> |
761 | /// |
762 | /// This intrinsic corresponds to the \c VFMSUBADD213PD instruction. |
763 | /// |
764 | /// \param __A |
765 | /// A 256-bit vector of [4 x double] containing the multiplicand. |
766 | /// \param __B |
767 | /// A 256-bit vector of [4 x double] containing the multiplier. |
768 | /// \param __C |
769 | /// A 256-bit vector of [4 x double] containing the addend/subtrahend. |
770 | /// \returns A 256-bit vector of [4 x double] containing the result. |
771 | static __inline__ __m256d __DEFAULT_FN_ATTRS256 |
772 | _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) |
773 | { |
774 | return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); |
775 | } |
776 | |
777 | #undef __DEFAULT_FN_ATTRS128 |
778 | #undef __DEFAULT_FN_ATTRS256 |
779 | |
780 | #endif /* __FMAINTRIN_H */ |
781 |
Warning: This file is not a C or C++ file. It does not have highlighting.