Warning: This file is not a C or C++ file. It does not have highlighting.

1/*===---- fmaintrin.h - FMA intrinsics -------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __FMAINTRIN_H
15#define __FMAINTRIN_H
16
17/* Define the default attributes for the functions in this file. */
18#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128)))
19#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256)))
20
21/// Computes a multiply-add of 128-bit vectors of [4 x float].
22/// For each element, computes <c> (__A * __B) + __C </c>.
23///
24/// \headerfile <immintrin.h>
25///
26/// This intrinsic corresponds to the \c VFMADD213PS instruction.
27///
28/// \param __A
29/// A 128-bit vector of [4 x float] containing the multiplicand.
30/// \param __B
31/// A 128-bit vector of [4 x float] containing the multiplier.
32/// \param __C
33/// A 128-bit vector of [4 x float] containing the addend.
34/// \returns A 128-bit vector of [4 x float] containing the result.
35static __inline__ __m128 __DEFAULT_FN_ATTRS128
36_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
37{
38 return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
39}
40
41/// Computes a multiply-add of 128-bit vectors of [2 x double].
42/// For each element, computes <c> (__A * __B) + __C </c>.
43///
44/// \headerfile <immintrin.h>
45///
46/// This intrinsic corresponds to the \c VFMADD213PD instruction.
47///
48/// \param __A
49/// A 128-bit vector of [2 x double] containing the multiplicand.
50/// \param __B
51/// A 128-bit vector of [2 x double] containing the multiplier.
52/// \param __C
53/// A 128-bit vector of [2 x double] containing the addend.
54/// \returns A 128-bit [2 x double] vector containing the result.
55static __inline__ __m128d __DEFAULT_FN_ATTRS128
56_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
57{
58 return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
59}
60
61/// Computes a scalar multiply-add of the single-precision values in the
62/// low 32 bits of 128-bit vectors of [4 x float].
63/// \code
64/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
65/// result[127:32] = __A[127:32]
66/// \endcode
67///
68/// \headerfile <immintrin.h>
69///
70/// This intrinsic corresponds to the \c VFMADD213SS instruction.
71///
72/// \param __A
73/// A 128-bit vector of [4 x float] containing the multiplicand in the low
74/// 32 bits.
75/// \param __B
76/// A 128-bit vector of [4 x float] containing the multiplier in the low
77/// 32 bits.
78/// \param __C
79/// A 128-bit vector of [4 x float] containing the addend in the low
80/// 32 bits.
81/// \returns A 128-bit vector of [4 x float] containing the result in the low
82/// 32 bits and a copy of \a __A[127:32] in the upper 96 bits.
83static __inline__ __m128 __DEFAULT_FN_ATTRS128
84_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
85{
86 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
87}
88
89/// Computes a scalar multiply-add of the double-precision values in the
90/// low 64 bits of 128-bit vectors of [2 x double].
91/// \code
92/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
93/// result[127:64] = __A[127:64]
94/// \endcode
95///
96/// \headerfile <immintrin.h>
97///
98/// This intrinsic corresponds to the \c VFMADD213SD instruction.
99///
100/// \param __A
101/// A 128-bit vector of [2 x double] containing the multiplicand in the low
102/// 64 bits.
103/// \param __B
104/// A 128-bit vector of [2 x double] containing the multiplier in the low
105/// 64 bits.
106/// \param __C
107/// A 128-bit vector of [2 x double] containing the addend in the low
108/// 64 bits.
109/// \returns A 128-bit vector of [2 x double] containing the result in the low
110/// 64 bits and a copy of \a __A[127:64] in the upper 64 bits.
111static __inline__ __m128d __DEFAULT_FN_ATTRS128
112_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
113{
114 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
115}
116
117/// Computes a multiply-subtract of 128-bit vectors of [4 x float].
118/// For each element, computes <c> (__A * __B) - __C </c>.
119///
120/// \headerfile <immintrin.h>
121///
122/// This intrinsic corresponds to the \c VFMSUB213PS instruction.
123///
124/// \param __A
125/// A 128-bit vector of [4 x float] containing the multiplicand.
126/// \param __B
127/// A 128-bit vector of [4 x float] containing the multiplier.
128/// \param __C
129/// A 128-bit vector of [4 x float] containing the subtrahend.
130/// \returns A 128-bit vector of [4 x float] containing the result.
131static __inline__ __m128 __DEFAULT_FN_ATTRS128
132_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
133{
134 return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
135}
136
137/// Computes a multiply-subtract of 128-bit vectors of [2 x double].
138/// For each element, computes <c> (__A * __B) - __C </c>.
139///
140/// \headerfile <immintrin.h>
141///
142/// This intrinsic corresponds to the \c VFMSUB213PD instruction.
143///
144/// \param __A
145/// A 128-bit vector of [2 x double] containing the multiplicand.
146/// \param __B
147/// A 128-bit vector of [2 x double] containing the multiplier.
148/// \param __C
149/// A 128-bit vector of [2 x double] containing the addend.
150/// \returns A 128-bit vector of [2 x double] containing the result.
151static __inline__ __m128d __DEFAULT_FN_ATTRS128
152_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
153{
154 return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
155}
156
157/// Computes a scalar multiply-subtract of the single-precision values in
158/// the low 32 bits of 128-bit vectors of [4 x float].
159/// \code
160/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
161/// result[127:32] = __A[127:32]
162/// \endcode
163///
164/// \headerfile <immintrin.h>
165///
166/// This intrinsic corresponds to the \c VFMSUB213SS instruction.
167///
168/// \param __A
169/// A 128-bit vector of [4 x float] containing the multiplicand in the low
170/// 32 bits.
171/// \param __B
172/// A 128-bit vector of [4 x float] containing the multiplier in the low
173/// 32 bits.
174/// \param __C
175/// A 128-bit vector of [4 x float] containing the subtrahend in the low
176/// 32 bits.
177/// \returns A 128-bit vector of [4 x float] containing the result in the low
178/// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
179static __inline__ __m128 __DEFAULT_FN_ATTRS128
180_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
181{
182 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
183}
184
185/// Computes a scalar multiply-subtract of the double-precision values in
186/// the low 64 bits of 128-bit vectors of [2 x double].
187/// \code
188/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
189/// result[127:64] = __A[127:64]
190/// \endcode
191///
192/// \headerfile <immintrin.h>
193///
194/// This intrinsic corresponds to the \c VFMSUB213SD instruction.
195///
196/// \param __A
197/// A 128-bit vector of [2 x double] containing the multiplicand in the low
198/// 64 bits.
199/// \param __B
200/// A 128-bit vector of [2 x double] containing the multiplier in the low
201/// 64 bits.
202/// \param __C
203/// A 128-bit vector of [2 x double] containing the subtrahend in the low
204/// 64 bits.
205/// \returns A 128-bit vector of [2 x double] containing the result in the low
206/// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
207static __inline__ __m128d __DEFAULT_FN_ATTRS128
208_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
209{
210 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
211}
212
213/// Computes a negated multiply-add of 128-bit vectors of [4 x float].
214/// For each element, computes <c> -(__A * __B) + __C </c>.
215///
216/// \headerfile <immintrin.h>
217///
218/// This intrinsic corresponds to the \c VFNMADD213DPS instruction.
219///
220/// \param __A
221/// A 128-bit vector of [4 x float] containing the multiplicand.
222/// \param __B
223/// A 128-bit vector of [4 x float] containing the multiplier.
224/// \param __C
225/// A 128-bit vector of [4 x float] containing the addend.
226/// \returns A 128-bit [4 x float] vector containing the result.
227static __inline__ __m128 __DEFAULT_FN_ATTRS128
228_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
229{
230 return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
231}
232
233/// Computes a negated multiply-add of 128-bit vectors of [2 x double].
234/// For each element, computes <c> -(__A * __B) + __C </c>.
235///
236/// \headerfile <immintrin.h>
237///
238/// This intrinsic corresponds to the \c VFNMADD213PD instruction.
239///
240/// \param __A
241/// A 128-bit vector of [2 x double] containing the multiplicand.
242/// \param __B
243/// A 128-bit vector of [2 x double] containing the multiplier.
244/// \param __C
245/// A 128-bit vector of [2 x double] containing the addend.
246/// \returns A 128-bit vector of [2 x double] containing the result.
247static __inline__ __m128d __DEFAULT_FN_ATTRS128
248_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
249{
250 return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
251}
252
253/// Computes a scalar negated multiply-add of the single-precision values in
254/// the low 32 bits of 128-bit vectors of [4 x float].
255/// \code
256/// result[31:0] = -(__A[31:0] * __B[31:0]) + __C[31:0]
257/// result[127:32] = __A[127:32]
258/// \endcode
259///
260/// \headerfile <immintrin.h>
261///
262/// This intrinsic corresponds to the \c VFNMADD213SS instruction.
263///
264/// \param __A
265/// A 128-bit vector of [4 x float] containing the multiplicand in the low
266/// 32 bits.
267/// \param __B
268/// A 128-bit vector of [4 x float] containing the multiplier in the low
269/// 32 bits.
270/// \param __C
271/// A 128-bit vector of [4 x float] containing the addend in the low
272/// 32 bits.
273/// \returns A 128-bit vector of [4 x float] containing the result in the low
274/// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
275static __inline__ __m128 __DEFAULT_FN_ATTRS128
276_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
277{
278 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
279}
280
281/// Computes a scalar negated multiply-add of the double-precision values
282/// in the low 64 bits of 128-bit vectors of [2 x double].
283/// \code
284/// result[63:0] = -(__A[63:0] * __B[63:0]) + __C[63:0]
285/// result[127:64] = __A[127:64]
286/// \endcode
287///
288/// \headerfile <immintrin.h>
289///
290/// This intrinsic corresponds to the \c VFNMADD213SD instruction.
291///
292/// \param __A
293/// A 128-bit vector of [2 x double] containing the multiplicand in the low
294/// 64 bits.
295/// \param __B
296/// A 128-bit vector of [2 x double] containing the multiplier in the low
297/// 64 bits.
298/// \param __C
299/// A 128-bit vector of [2 x double] containing the addend in the low
300/// 64 bits.
301/// \returns A 128-bit vector of [2 x double] containing the result in the low
302/// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
303static __inline__ __m128d __DEFAULT_FN_ATTRS128
304_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
305{
306 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
307}
308
309/// Computes a negated multiply-subtract of 128-bit vectors of [4 x float].
310/// For each element, computes <c> -(__A * __B) - __C </c>.
311///
312/// \headerfile <immintrin.h>
313///
314/// This intrinsic corresponds to the \c VFNMSUB213PS instruction.
315///
316/// \param __A
317/// A 128-bit vector of [4 x float] containing the multiplicand.
318/// \param __B
319/// A 128-bit vector of [4 x float] containing the multiplier.
320/// \param __C
321/// A 128-bit vector of [4 x float] containing the subtrahend.
322/// \returns A 128-bit vector of [4 x float] containing the result.
323static __inline__ __m128 __DEFAULT_FN_ATTRS128
324_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
325{
326 return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
327}
328
329/// Computes a negated multiply-subtract of 128-bit vectors of [2 x double].
330/// For each element, computes <c> -(__A * __B) - __C </c>.
331///
332/// \headerfile <immintrin.h>
333///
334/// This intrinsic corresponds to the \c VFNMSUB213PD instruction.
335///
336/// \param __A
337/// A 128-bit vector of [2 x double] containing the multiplicand.
338/// \param __B
339/// A 128-bit vector of [2 x double] containing the multiplier.
340/// \param __C
341/// A 128-bit vector of [2 x double] containing the subtrahend.
342/// \returns A 128-bit vector of [2 x double] containing the result.
343static __inline__ __m128d __DEFAULT_FN_ATTRS128
344_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
345{
346 return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
347}
348
349/// Computes a scalar negated multiply-subtract of the single-precision
350/// values in the low 32 bits of 128-bit vectors of [4 x float].
351/// \code
352/// result[31:0] = -(__A[31:0] * __B[31:0]) - __C[31:0]
353/// result[127:32] = __A[127:32]
354/// \endcode
355///
356/// \headerfile <immintrin.h>
357///
358/// This intrinsic corresponds to the \c VFNMSUB213SS instruction.
359///
360/// \param __A
361/// A 128-bit vector of [4 x float] containing the multiplicand in the low
362/// 32 bits.
363/// \param __B
364/// A 128-bit vector of [4 x float] containing the multiplier in the low
365/// 32 bits.
366/// \param __C
367/// A 128-bit vector of [4 x float] containing the subtrahend in the low
368/// 32 bits.
369/// \returns A 128-bit vector of [4 x float] containing the result in the low
370/// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
371static __inline__ __m128 __DEFAULT_FN_ATTRS128
372_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
373{
374 return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
375}
376
377/// Computes a scalar negated multiply-subtract of the double-precision
378/// values in the low 64 bits of 128-bit vectors of [2 x double].
379/// \code
380/// result[63:0] = -(__A[63:0] * __B[63:0]) - __C[63:0]
381/// result[127:64] = __A[127:64]
382/// \endcode
383///
384/// \headerfile <immintrin.h>
385///
386/// This intrinsic corresponds to the \c VFNMSUB213SD instruction.
387///
388/// \param __A
389/// A 128-bit vector of [2 x double] containing the multiplicand in the low
390/// 64 bits.
391/// \param __B
392/// A 128-bit vector of [2 x double] containing the multiplier in the low
393/// 64 bits.
394/// \param __C
395/// A 128-bit vector of [2 x double] containing the subtrahend in the low
396/// 64 bits.
397/// \returns A 128-bit vector of [2 x double] containing the result in the low
398/// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
399static __inline__ __m128d __DEFAULT_FN_ATTRS128
400_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
401{
402 return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
403}
404
405/// Computes a multiply with alternating add/subtract of 128-bit vectors of
406/// [4 x float].
407/// \code
408/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
409/// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32]
410/// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
411/// result[127:96] = (__A[127:96] * __B[127:96]) + __C[127:96]
412/// \endcode
413///
414/// \headerfile <immintrin.h>
415///
416/// This intrinsic corresponds to the \c VFMADDSUB213PS instruction.
417///
418/// \param __A
419/// A 128-bit vector of [4 x float] containing the multiplicand.
420/// \param __B
421/// A 128-bit vector of [4 x float] containing the multiplier.
422/// \param __C
423/// A 128-bit vector of [4 x float] containing the addend/subtrahend.
424/// \returns A 128-bit vector of [4 x float] containing the result.
425static __inline__ __m128 __DEFAULT_FN_ATTRS128
426_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
427{
428 return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
429}
430
431/// Computes a multiply with alternating add/subtract of 128-bit vectors of
432/// [2 x double].
433/// \code
434/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
435/// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64]
436/// \endcode
437///
438/// \headerfile <immintrin.h>
439///
440/// This intrinsic corresponds to the \c VFMADDSUB213PD instruction.
441///
442/// \param __A
443/// A 128-bit vector of [2 x double] containing the multiplicand.
444/// \param __B
445/// A 128-bit vector of [2 x double] containing the multiplier.
446/// \param __C
447/// A 128-bit vector of [2 x double] containing the addend/subtrahend.
448/// \returns A 128-bit vector of [2 x double] containing the result.
449static __inline__ __m128d __DEFAULT_FN_ATTRS128
450_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
451{
452 return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
453}
454
455/// Computes a multiply with alternating add/subtract of 128-bit vectors of
456/// [4 x float].
457/// \code
458/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
459/// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
460/// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64]
461/// result[127:96 = (__A[127:96] * __B[127:96]) - __C[127:96]
462/// \endcode
463///
464/// \headerfile <immintrin.h>
465///
466/// This intrinsic corresponds to the \c VFMSUBADD213PS instruction.
467///
468/// \param __A
469/// A 128-bit vector of [4 x float] containing the multiplicand.
470/// \param __B
471/// A 128-bit vector of [4 x float] containing the multiplier.
472/// \param __C
473/// A 128-bit vector of [4 x float] containing the addend/subtrahend.
474/// \returns A 128-bit vector of [4 x float] containing the result.
475static __inline__ __m128 __DEFAULT_FN_ATTRS128
476_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
477{
478 return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
479}
480
481/// Computes a multiply with alternating add/subtract of 128-bit vectors of
482/// [2 x double].
483/// \code
484/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
485/// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
486/// \endcode
487///
488/// \headerfile <immintrin.h>
489///
490/// This intrinsic corresponds to the \c VFMADDSUB213PD instruction.
491///
492/// \param __A
493/// A 128-bit vector of [2 x double] containing the multiplicand.
494/// \param __B
495/// A 128-bit vector of [2 x double] containing the multiplier.
496/// \param __C
497/// A 128-bit vector of [2 x double] containing the addend/subtrahend.
498/// \returns A 128-bit vector of [2 x double] containing the result.
499static __inline__ __m128d __DEFAULT_FN_ATTRS128
500_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
501{
502 return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
503}
504
505/// Computes a multiply-add of 256-bit vectors of [8 x float].
506/// For each element, computes <c> (__A * __B) + __C </c>.
507///
508/// \headerfile <immintrin.h>
509///
510/// This intrinsic corresponds to the \c VFMADD213PS instruction.
511///
512/// \param __A
513/// A 256-bit vector of [8 x float] containing the multiplicand.
514/// \param __B
515/// A 256-bit vector of [8 x float] containing the multiplier.
516/// \param __C
517/// A 256-bit vector of [8 x float] containing the addend.
518/// \returns A 256-bit vector of [8 x float] containing the result.
519static __inline__ __m256 __DEFAULT_FN_ATTRS256
520_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
521{
522 return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
523}
524
525/// Computes a multiply-add of 256-bit vectors of [4 x double].
526/// For each element, computes <c> (__A * __B) + __C </c>.
527///
528/// \headerfile <immintrin.h>
529///
530/// This intrinsic corresponds to the \c VFMADD213PD instruction.
531///
532/// \param __A
533/// A 256-bit vector of [4 x double] containing the multiplicand.
534/// \param __B
535/// A 256-bit vector of [4 x double] containing the multiplier.
536/// \param __C
537/// A 256-bit vector of [4 x double] containing the addend.
538/// \returns A 256-bit vector of [4 x double] containing the result.
539static __inline__ __m256d __DEFAULT_FN_ATTRS256
540_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
541{
542 return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
543}
544
545/// Computes a multiply-subtract of 256-bit vectors of [8 x float].
546/// For each element, computes <c> (__A * __B) - __C </c>.
547///
548/// \headerfile <immintrin.h>
549///
550/// This intrinsic corresponds to the \c VFMSUB213PS instruction.
551///
552/// \param __A
553/// A 256-bit vector of [8 x float] containing the multiplicand.
554/// \param __B
555/// A 256-bit vector of [8 x float] containing the multiplier.
556/// \param __C
557/// A 256-bit vector of [8 x float] containing the subtrahend.
558/// \returns A 256-bit vector of [8 x float] containing the result.
559static __inline__ __m256 __DEFAULT_FN_ATTRS256
560_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
561{
562 return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
563}
564
565/// Computes a multiply-subtract of 256-bit vectors of [4 x double].
566/// For each element, computes <c> (__A * __B) - __C </c>.
567///
568/// \headerfile <immintrin.h>
569///
570/// This intrinsic corresponds to the \c VFMSUB213PD instruction.
571///
572/// \param __A
573/// A 256-bit vector of [4 x double] containing the multiplicand.
574/// \param __B
575/// A 256-bit vector of [4 x double] containing the multiplier.
576/// \param __C
577/// A 256-bit vector of [4 x double] containing the subtrahend.
578/// \returns A 256-bit vector of [4 x double] containing the result.
579static __inline__ __m256d __DEFAULT_FN_ATTRS256
580_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
581{
582 return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
583}
584
585/// Computes a negated multiply-add of 256-bit vectors of [8 x float].
586/// For each element, computes <c> -(__A * __B) + __C </c>.
587///
588/// \headerfile <immintrin.h>
589///
590/// This intrinsic corresponds to the \c VFNMADD213PS instruction.
591///
592/// \param __A
593/// A 256-bit vector of [8 x float] containing the multiplicand.
594/// \param __B
595/// A 256-bit vector of [8 x float] containing the multiplier.
596/// \param __C
597/// A 256-bit vector of [8 x float] containing the addend.
598/// \returns A 256-bit vector of [8 x float] containing the result.
599static __inline__ __m256 __DEFAULT_FN_ATTRS256
600_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
601{
602 return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
603}
604
605/// Computes a negated multiply-add of 256-bit vectors of [4 x double].
606/// For each element, computes <c> -(__A * __B) + __C </c>.
607///
608/// \headerfile <immintrin.h>
609///
610/// This intrinsic corresponds to the \c VFNMADD213PD instruction.
611///
612/// \param __A
613/// A 256-bit vector of [4 x double] containing the multiplicand.
614/// \param __B
615/// A 256-bit vector of [4 x double] containing the multiplier.
616/// \param __C
617/// A 256-bit vector of [4 x double] containing the addend.
618/// \returns A 256-bit vector of [4 x double] containing the result.
619static __inline__ __m256d __DEFAULT_FN_ATTRS256
620_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
621{
622 return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
623}
624
625/// Computes a negated multiply-subtract of 256-bit vectors of [8 x float].
626/// For each element, computes <c> -(__A * __B) - __C </c>.
627///
628/// \headerfile <immintrin.h>
629///
630/// This intrinsic corresponds to the \c VFNMSUB213PS instruction.
631///
632/// \param __A
633/// A 256-bit vector of [8 x float] containing the multiplicand.
634/// \param __B
635/// A 256-bit vector of [8 x float] containing the multiplier.
636/// \param __C
637/// A 256-bit vector of [8 x float] containing the subtrahend.
638/// \returns A 256-bit vector of [8 x float] containing the result.
639static __inline__ __m256 __DEFAULT_FN_ATTRS256
640_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
641{
642 return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
643}
644
645/// Computes a negated multiply-subtract of 256-bit vectors of [4 x double].
646/// For each element, computes <c> -(__A * __B) - __C </c>.
647///
648/// \headerfile <immintrin.h>
649///
650/// This intrinsic corresponds to the \c VFNMSUB213PD instruction.
651///
652/// \param __A
653/// A 256-bit vector of [4 x double] containing the multiplicand.
654/// \param __B
655/// A 256-bit vector of [4 x double] containing the multiplier.
656/// \param __C
657/// A 256-bit vector of [4 x double] containing the subtrahend.
658/// \returns A 256-bit vector of [4 x double] containing the result.
659static __inline__ __m256d __DEFAULT_FN_ATTRS256
660_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
661{
662 return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
663}
664
665/// Computes a multiply with alternating add/subtract of 256-bit vectors of
666/// [8 x float].
667/// \code
668/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
669/// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32]
670/// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
671/// result[127:96] = (__A[127:96] * __B[127:96]) + __C[127:96]
672/// result[159:128] = (__A[159:128] * __B[159:128]) - __C[159:128]
673/// result[191:160] = (__A[191:160] * __B[191:160]) + __C[191:160]
674/// result[223:192] = (__A[223:192] * __B[223:192]) - __C[223:192]
675/// result[255:224] = (__A[255:224] * __B[255:224]) + __C[255:224]
676/// \endcode
677///
678/// \headerfile <immintrin.h>
679///
680/// This intrinsic corresponds to the \c VFMADDSUB213PS instruction.
681///
682/// \param __A
683/// A 256-bit vector of [8 x float] containing the multiplicand.
684/// \param __B
685/// A 256-bit vector of [8 x float] containing the multiplier.
686/// \param __C
687/// A 256-bit vector of [8 x float] containing the addend/subtrahend.
688/// \returns A 256-bit vector of [8 x float] containing the result.
689static __inline__ __m256 __DEFAULT_FN_ATTRS256
690_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
691{
692 return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
693}
694
695/// Computes a multiply with alternating add/subtract of 256-bit vectors of
696/// [4 x double].
697/// \code
698/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
699/// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64]
700/// result[191:128] = (__A[191:128] * __B[191:128]) - __C[191:128]
701/// result[255:192] = (__A[255:192] * __B[255:192]) + __C[255:192]
702/// \endcode
703///
704/// \headerfile <immintrin.h>
705///
706/// This intrinsic corresponds to the \c VFMADDSUB213PD instruction.
707///
708/// \param __A
709/// A 256-bit vector of [4 x double] containing the multiplicand.
710/// \param __B
711/// A 256-bit vector of [4 x double] containing the multiplier.
712/// \param __C
713/// A 256-bit vector of [4 x double] containing the addend/subtrahend.
714/// \returns A 256-bit vector of [4 x double] containing the result.
715static __inline__ __m256d __DEFAULT_FN_ATTRS256
716_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
717{
718 return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
719}
720
721/// Computes a vector multiply with alternating add/subtract of 256-bit
722/// vectors of [8 x float].
723/// \code
724/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
725/// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
726/// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64]
727/// result[127:96] = (__A[127:96] * __B[127:96]) - __C[127:96]
728/// result[159:128] = (__A[159:128] * __B[159:128]) + __C[159:128]
729/// result[191:160] = (__A[191:160] * __B[191:160]) - __C[191:160]
730/// result[223:192] = (__A[223:192] * __B[223:192]) + __C[223:192]
731/// result[255:224] = (__A[255:224] * __B[255:224]) - __C[255:224]
732/// \endcode
733///
734/// \headerfile <immintrin.h>
735///
736/// This intrinsic corresponds to the \c VFMSUBADD213PS instruction.
737///
738/// \param __A
739/// A 256-bit vector of [8 x float] containing the multiplicand.
740/// \param __B
741/// A 256-bit vector of [8 x float] containing the multiplier.
742/// \param __C
743/// A 256-bit vector of [8 x float] containing the addend/subtrahend.
744/// \returns A 256-bit vector of [8 x float] containing the result.
745static __inline__ __m256 __DEFAULT_FN_ATTRS256
746_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
747{
748 return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
749}
750
751/// Computes a vector multiply with alternating add/subtract of 256-bit
752/// vectors of [4 x double].
753/// \code
754/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
755/// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
756/// result[191:128] = (__A[191:128] * __B[191:128]) + __C[191:128]
757/// result[255:192] = (__A[255:192] * __B[255:192]) - __C[255:192]
758/// \endcode
759///
760/// \headerfile <immintrin.h>
761///
762/// This intrinsic corresponds to the \c VFMSUBADD213PD instruction.
763///
764/// \param __A
765/// A 256-bit vector of [4 x double] containing the multiplicand.
766/// \param __B
767/// A 256-bit vector of [4 x double] containing the multiplier.
768/// \param __C
769/// A 256-bit vector of [4 x double] containing the addend/subtrahend.
770/// \returns A 256-bit vector of [4 x double] containing the result.
771static __inline__ __m256d __DEFAULT_FN_ATTRS256
772_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
773{
774 return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
775}
776
777#undef __DEFAULT_FN_ATTRS128
778#undef __DEFAULT_FN_ATTRS256
779
780#endif /* __FMAINTRIN_H */
781

Warning: This file is not a C or C++ file. It does not have highlighting.

source code of clang/lib/Headers/fmaintrin.h