fmaintrin.h source code [clang/lib/Headers/fmaintrin.h]

Warning: This file is not a C or C++ file. It does not have highlighting.

1	/*===---- fmaintrin.h - FMA intrinsics -------------------------------------===
2	*
3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	* See https://llvm.org/LICENSE.txt for license information.
5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	*
7	*===-----------------------------------------------------------------------===
8	*/
9
10	#ifndef __IMMINTRIN_H
11	#error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
12	#endif
13
14	#ifndef __FMAINTRIN_H
15	#define __FMAINTRIN_H
16
17	/* Define the default attributes for the functions in this file. */
18	#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128)))
19	#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256)))
20
21	/// Computes a multiply-add of 128-bit vectors of [4 x float].
22	/// For each element, computes <c> (__A * __B) + __C </c>.
23	///
24	/// \headerfile <immintrin.h>
25	///
26	/// This intrinsic corresponds to the \c VFMADD213PS instruction.
27	///
28	/// \param __A
29	/// A 128-bit vector of [4 x float] containing the multiplicand.
30	/// \param __B
31	/// A 128-bit vector of [4 x float] containing the multiplier.
32	/// \param __C
33	/// A 128-bit vector of [4 x float] containing the addend.
34	/// \returns A 128-bit vector of [4 x float] containing the result.
35	static __inline__ __m128 __DEFAULT_FN_ATTRS128
36	_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
37	{
38	return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
39	}
40
41	/// Computes a multiply-add of 128-bit vectors of [2 x double].
42	/// For each element, computes <c> (__A * __B) + __C </c>.
43	///
44	/// \headerfile <immintrin.h>
45	///
46	/// This intrinsic corresponds to the \c VFMADD213PD instruction.
47	///
48	/// \param __A
49	/// A 128-bit vector of [2 x double] containing the multiplicand.
50	/// \param __B
51	/// A 128-bit vector of [2 x double] containing the multiplier.
52	/// \param __C
53	/// A 128-bit vector of [2 x double] containing the addend.
54	/// \returns A 128-bit [2 x double] vector containing the result.
55	static __inline__ __m128d __DEFAULT_FN_ATTRS128
56	_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
57	{
58	return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
59	}
60
61	/// Computes a scalar multiply-add of the single-precision values in the
62	/// low 32 bits of 128-bit vectors of [4 x float].
63	/// \code
64	/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
65	/// result[127:32] = __A[127:32]
66	/// \endcode
67	///
68	/// \headerfile <immintrin.h>
69	///
70	/// This intrinsic corresponds to the \c VFMADD213SS instruction.
71	///
72	/// \param __A
73	/// A 128-bit vector of [4 x float] containing the multiplicand in the low
74	/// 32 bits.
75	/// \param __B
76	/// A 128-bit vector of [4 x float] containing the multiplier in the low
77	/// 32 bits.
78	/// \param __C
79	/// A 128-bit vector of [4 x float] containing the addend in the low
80	/// 32 bits.
81	/// \returns A 128-bit vector of [4 x float] containing the result in the low
82	/// 32 bits and a copy of \a __A[127:32] in the upper 96 bits.
83	static __inline__ __m128 __DEFAULT_FN_ATTRS128
84	_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
85	{
86	return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
87	}
88
89	/// Computes a scalar multiply-add of the double-precision values in the
90	/// low 64 bits of 128-bit vectors of [2 x double].
91	/// \code
92	/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
93	/// result[127:64] = __A[127:64]
94	/// \endcode
95	///
96	/// \headerfile <immintrin.h>
97	///
98	/// This intrinsic corresponds to the \c VFMADD213SD instruction.
99	///
100	/// \param __A
101	/// A 128-bit vector of [2 x double] containing the multiplicand in the low
102	/// 64 bits.
103	/// \param __B
104	/// A 128-bit vector of [2 x double] containing the multiplier in the low
105	/// 64 bits.
106	/// \param __C
107	/// A 128-bit vector of [2 x double] containing the addend in the low
108	/// 64 bits.
109	/// \returns A 128-bit vector of [2 x double] containing the result in the low
110	/// 64 bits and a copy of \a __A[127:64] in the upper 64 bits.
111	static __inline__ __m128d __DEFAULT_FN_ATTRS128
112	_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
113	{
114	return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
115	}
116
117	/// Computes a multiply-subtract of 128-bit vectors of [4 x float].
118	/// For each element, computes <c> (__A * __B) - __C </c>.
119	///
120	/// \headerfile <immintrin.h>
121	///
122	/// This intrinsic corresponds to the \c VFMSUB213PS instruction.
123	///
124	/// \param __A
125	/// A 128-bit vector of [4 x float] containing the multiplicand.
126	/// \param __B
127	/// A 128-bit vector of [4 x float] containing the multiplier.
128	/// \param __C
129	/// A 128-bit vector of [4 x float] containing the subtrahend.
130	/// \returns A 128-bit vector of [4 x float] containing the result.
131	static __inline__ __m128 __DEFAULT_FN_ATTRS128
132	_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
133	{
134	return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
135	}
136
137	/// Computes a multiply-subtract of 128-bit vectors of [2 x double].
138	/// For each element, computes <c> (__A * __B) - __C </c>.
139	///
140	/// \headerfile <immintrin.h>
141	///
142	/// This intrinsic corresponds to the \c VFMSUB213PD instruction.
143	///
144	/// \param __A
145	/// A 128-bit vector of [2 x double] containing the multiplicand.
146	/// \param __B
147	/// A 128-bit vector of [2 x double] containing the multiplier.
148	/// \param __C
149	/// A 128-bit vector of [2 x double] containing the addend.
150	/// \returns A 128-bit vector of [2 x double] containing the result.
151	static __inline__ __m128d __DEFAULT_FN_ATTRS128
152	_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
153	{
154	return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
155	}
156
157	/// Computes a scalar multiply-subtract of the single-precision values in
158	/// the low 32 bits of 128-bit vectors of [4 x float].
159	/// \code
160	/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
161	/// result[127:32] = __A[127:32]
162	/// \endcode
163	///
164	/// \headerfile <immintrin.h>
165	///
166	/// This intrinsic corresponds to the \c VFMSUB213SS instruction.
167	///
168	/// \param __A
169	/// A 128-bit vector of [4 x float] containing the multiplicand in the low
170	/// 32 bits.
171	/// \param __B
172	/// A 128-bit vector of [4 x float] containing the multiplier in the low
173	/// 32 bits.
174	/// \param __C
175	/// A 128-bit vector of [4 x float] containing the subtrahend in the low
176	/// 32 bits.
177	/// \returns A 128-bit vector of [4 x float] containing the result in the low
178	/// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
179	static __inline__ __m128 __DEFAULT_FN_ATTRS128
180	_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
181	{
182	return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
183	}
184
185	/// Computes a scalar multiply-subtract of the double-precision values in
186	/// the low 64 bits of 128-bit vectors of [2 x double].
187	/// \code
188	/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
189	/// result[127:64] = __A[127:64]
190	/// \endcode
191	///
192	/// \headerfile <immintrin.h>
193	///
194	/// This intrinsic corresponds to the \c VFMSUB213SD instruction.
195	///
196	/// \param __A
197	/// A 128-bit vector of [2 x double] containing the multiplicand in the low
198	/// 64 bits.
199	/// \param __B
200	/// A 128-bit vector of [2 x double] containing the multiplier in the low
201	/// 64 bits.
202	/// \param __C
203	/// A 128-bit vector of [2 x double] containing the subtrahend in the low
204	/// 64 bits.
205	/// \returns A 128-bit vector of [2 x double] containing the result in the low
206	/// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
207	static __inline__ __m128d __DEFAULT_FN_ATTRS128
208	_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
209	{
210	return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
211	}
212
213	/// Computes a negated multiply-add of 128-bit vectors of [4 x float].
214	/// For each element, computes <c> -(__A * __B) + __C </c>.
215	///
216	/// \headerfile <immintrin.h>
217	///
218	/// This intrinsic corresponds to the \c VFNMADD213DPS instruction.
219	///
220	/// \param __A
221	/// A 128-bit vector of [4 x float] containing the multiplicand.
222	/// \param __B
223	/// A 128-bit vector of [4 x float] containing the multiplier.
224	/// \param __C
225	/// A 128-bit vector of [4 x float] containing the addend.
226	/// \returns A 128-bit [4 x float] vector containing the result.
227	static __inline__ __m128 __DEFAULT_FN_ATTRS128
228	_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
229	{
230	return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
231	}
232
233	/// Computes a negated multiply-add of 128-bit vectors of [2 x double].
234	/// For each element, computes <c> -(__A * __B) + __C </c>.
235	///
236	/// \headerfile <immintrin.h>
237	///
238	/// This intrinsic corresponds to the \c VFNMADD213PD instruction.
239	///
240	/// \param __A
241	/// A 128-bit vector of [2 x double] containing the multiplicand.
242	/// \param __B
243	/// A 128-bit vector of [2 x double] containing the multiplier.
244	/// \param __C
245	/// A 128-bit vector of [2 x double] containing the addend.
246	/// \returns A 128-bit vector of [2 x double] containing the result.
247	static __inline__ __m128d __DEFAULT_FN_ATTRS128
248	_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
249	{
250	return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
251	}
252
253	/// Computes a scalar negated multiply-add of the single-precision values in
254	/// the low 32 bits of 128-bit vectors of [4 x float].
255	/// \code
256	/// result[31:0] = -(__A[31:0] * __B[31:0]) + __C[31:0]
257	/// result[127:32] = __A[127:32]
258	/// \endcode
259	///
260	/// \headerfile <immintrin.h>
261	///
262	/// This intrinsic corresponds to the \c VFNMADD213SS instruction.
263	///
264	/// \param __A
265	/// A 128-bit vector of [4 x float] containing the multiplicand in the low
266	/// 32 bits.
267	/// \param __B
268	/// A 128-bit vector of [4 x float] containing the multiplier in the low
269	/// 32 bits.
270	/// \param __C
271	/// A 128-bit vector of [4 x float] containing the addend in the low
272	/// 32 bits.
273	/// \returns A 128-bit vector of [4 x float] containing the result in the low
274	/// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
275	static __inline__ __m128 __DEFAULT_FN_ATTRS128
276	_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
277	{
278	return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
279	}
280
281	/// Computes a scalar negated multiply-add of the double-precision values
282	/// in the low 64 bits of 128-bit vectors of [2 x double].
283	/// \code
284	/// result[63:0] = -(__A[63:0] * __B[63:0]) + __C[63:0]
285	/// result[127:64] = __A[127:64]
286	/// \endcode
287	///
288	/// \headerfile <immintrin.h>
289	///
290	/// This intrinsic corresponds to the \c VFNMADD213SD instruction.
291	///
292	/// \param __A
293	/// A 128-bit vector of [2 x double] containing the multiplicand in the low
294	/// 64 bits.
295	/// \param __B
296	/// A 128-bit vector of [2 x double] containing the multiplier in the low
297	/// 64 bits.
298	/// \param __C
299	/// A 128-bit vector of [2 x double] containing the addend in the low
300	/// 64 bits.
301	/// \returns A 128-bit vector of [2 x double] containing the result in the low
302	/// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
303	static __inline__ __m128d __DEFAULT_FN_ATTRS128
304	_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
305	{
306	return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
307	}
308
309	/// Computes a negated multiply-subtract of 128-bit vectors of [4 x float].
310	/// For each element, computes <c> -(__A * __B) - __C </c>.
311	///
312	/// \headerfile <immintrin.h>
313	///
314	/// This intrinsic corresponds to the \c VFNMSUB213PS instruction.
315	///
316	/// \param __A
317	/// A 128-bit vector of [4 x float] containing the multiplicand.
318	/// \param __B
319	/// A 128-bit vector of [4 x float] containing the multiplier.
320	/// \param __C
321	/// A 128-bit vector of [4 x float] containing the subtrahend.
322	/// \returns A 128-bit vector of [4 x float] containing the result.
323	static __inline__ __m128 __DEFAULT_FN_ATTRS128
324	_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
325	{
326	return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
327	}
328
329	/// Computes a negated multiply-subtract of 128-bit vectors of [2 x double].
330	/// For each element, computes <c> -(__A * __B) - __C </c>.
331	///
332	/// \headerfile <immintrin.h>
333	///
334	/// This intrinsic corresponds to the \c VFNMSUB213PD instruction.
335	///
336	/// \param __A
337	/// A 128-bit vector of [2 x double] containing the multiplicand.
338	/// \param __B
339	/// A 128-bit vector of [2 x double] containing the multiplier.
340	/// \param __C
341	/// A 128-bit vector of [2 x double] containing the subtrahend.
342	/// \returns A 128-bit vector of [2 x double] containing the result.
343	static __inline__ __m128d __DEFAULT_FN_ATTRS128
344	_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
345	{
346	return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
347	}
348
349	/// Computes a scalar negated multiply-subtract of the single-precision
350	/// values in the low 32 bits of 128-bit vectors of [4 x float].
351	/// \code
352	/// result[31:0] = -(__A[31:0] * __B[31:0]) - __C[31:0]
353	/// result[127:32] = __A[127:32]
354	/// \endcode
355	///
356	/// \headerfile <immintrin.h>
357	///
358	/// This intrinsic corresponds to the \c VFNMSUB213SS instruction.
359	///
360	/// \param __A
361	/// A 128-bit vector of [4 x float] containing the multiplicand in the low
362	/// 32 bits.
363	/// \param __B
364	/// A 128-bit vector of [4 x float] containing the multiplier in the low
365	/// 32 bits.
366	/// \param __C
367	/// A 128-bit vector of [4 x float] containing the subtrahend in the low
368	/// 32 bits.
369	/// \returns A 128-bit vector of [4 x float] containing the result in the low
370	/// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
371	static __inline__ __m128 __DEFAULT_FN_ATTRS128
372	_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
373	{
374	return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
375	}
376
377	/// Computes a scalar negated multiply-subtract of the double-precision
378	/// values in the low 64 bits of 128-bit vectors of [2 x double].
379	/// \code
380	/// result[63:0] = -(__A[63:0] * __B[63:0]) - __C[63:0]
381	/// result[127:64] = __A[127:64]
382	/// \endcode
383	///
384	/// \headerfile <immintrin.h>
385	///
386	/// This intrinsic corresponds to the \c VFNMSUB213SD instruction.
387	///
388	/// \param __A
389	/// A 128-bit vector of [2 x double] containing the multiplicand in the low
390	/// 64 bits.
391	/// \param __B
392	/// A 128-bit vector of [2 x double] containing the multiplier in the low
393	/// 64 bits.
394	/// \param __C
395	/// A 128-bit vector of [2 x double] containing the subtrahend in the low
396	/// 64 bits.
397	/// \returns A 128-bit vector of [2 x double] containing the result in the low
398	/// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
399	static __inline__ __m128d __DEFAULT_FN_ATTRS128
400	_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
401	{
402	return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
403	}
404
405	/// Computes a multiply with alternating add/subtract of 128-bit vectors of
406	/// [4 x float].
407	/// \code
408	/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
409	/// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32]
410	/// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
411	/// result[127:96] = (__A[127:96] * __B[127:96]) + __C[127:96]
412	/// \endcode
413	///
414	/// \headerfile <immintrin.h>
415	///
416	/// This intrinsic corresponds to the \c VFMADDSUB213PS instruction.
417	///
418	/// \param __A
419	/// A 128-bit vector of [4 x float] containing the multiplicand.
420	/// \param __B
421	/// A 128-bit vector of [4 x float] containing the multiplier.
422	/// \param __C
423	/// A 128-bit vector of [4 x float] containing the addend/subtrahend.
424	/// \returns A 128-bit vector of [4 x float] containing the result.
425	static __inline__ __m128 __DEFAULT_FN_ATTRS128
426	_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
427	{
428	return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
429	}
430
431	/// Computes a multiply with alternating add/subtract of 128-bit vectors of
432	/// [2 x double].
433	/// \code
434	/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
435	/// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64]
436	/// \endcode
437	///
438	/// \headerfile <immintrin.h>
439	///
440	/// This intrinsic corresponds to the \c VFMADDSUB213PD instruction.
441	///
442	/// \param __A
443	/// A 128-bit vector of [2 x double] containing the multiplicand.
444	/// \param __B
445	/// A 128-bit vector of [2 x double] containing the multiplier.
446	/// \param __C
447	/// A 128-bit vector of [2 x double] containing the addend/subtrahend.
448	/// \returns A 128-bit vector of [2 x double] containing the result.
449	static __inline__ __m128d __DEFAULT_FN_ATTRS128
450	_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
451	{
452	return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
453	}
454
455	/// Computes a multiply with alternating add/subtract of 128-bit vectors of
456	/// [4 x float].
457	/// \code
458	/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
459	/// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
460	/// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64]
461	/// result[127:96 = (__A[127:96] * __B[127:96]) - __C[127:96]
462	/// \endcode
463	///
464	/// \headerfile <immintrin.h>
465	///
466	/// This intrinsic corresponds to the \c VFMSUBADD213PS instruction.
467	///
468	/// \param __A
469	/// A 128-bit vector of [4 x float] containing the multiplicand.
470	/// \param __B
471	/// A 128-bit vector of [4 x float] containing the multiplier.
472	/// \param __C
473	/// A 128-bit vector of [4 x float] containing the addend/subtrahend.
474	/// \returns A 128-bit vector of [4 x float] containing the result.
475	static __inline__ __m128 __DEFAULT_FN_ATTRS128
476	_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
477	{
478	return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
479	}
480
481	/// Computes a multiply with alternating add/subtract of 128-bit vectors of
482	/// [2 x double].
483	/// \code
484	/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
485	/// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
486	/// \endcode
487	///
488	/// \headerfile <immintrin.h>
489	///
490	/// This intrinsic corresponds to the \c VFMADDSUB213PD instruction.
491	///
492	/// \param __A
493	/// A 128-bit vector of [2 x double] containing the multiplicand.
494	/// \param __B
495	/// A 128-bit vector of [2 x double] containing the multiplier.
496	/// \param __C
497	/// A 128-bit vector of [2 x double] containing the addend/subtrahend.
498	/// \returns A 128-bit vector of [2 x double] containing the result.
499	static __inline__ __m128d __DEFAULT_FN_ATTRS128
500	_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
501	{
502	return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
503	}
504
505	/// Computes a multiply-add of 256-bit vectors of [8 x float].
506	/// For each element, computes <c> (__A * __B) + __C </c>.
507	///
508	/// \headerfile <immintrin.h>
509	///
510	/// This intrinsic corresponds to the \c VFMADD213PS instruction.
511	///
512	/// \param __A
513	/// A 256-bit vector of [8 x float] containing the multiplicand.
514	/// \param __B
515	/// A 256-bit vector of [8 x float] containing the multiplier.
516	/// \param __C
517	/// A 256-bit vector of [8 x float] containing the addend.
518	/// \returns A 256-bit vector of [8 x float] containing the result.
519	static __inline__ __m256 __DEFAULT_FN_ATTRS256
520	_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
521	{
522	return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
523	}
524
525	/// Computes a multiply-add of 256-bit vectors of [4 x double].
526	/// For each element, computes <c> (__A * __B) + __C </c>.
527	///
528	/// \headerfile <immintrin.h>
529	///
530	/// This intrinsic corresponds to the \c VFMADD213PD instruction.
531	///
532	/// \param __A
533	/// A 256-bit vector of [4 x double] containing the multiplicand.
534	/// \param __B
535	/// A 256-bit vector of [4 x double] containing the multiplier.
536	/// \param __C
537	/// A 256-bit vector of [4 x double] containing the addend.
538	/// \returns A 256-bit vector of [4 x double] containing the result.
539	static __inline__ __m256d __DEFAULT_FN_ATTRS256
540	_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
541	{
542	return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
543	}
544
545	/// Computes a multiply-subtract of 256-bit vectors of [8 x float].
546	/// For each element, computes <c> (__A * __B) - __C </c>.
547	///
548	/// \headerfile <immintrin.h>
549	///
550	/// This intrinsic corresponds to the \c VFMSUB213PS instruction.
551	///
552	/// \param __A
553	/// A 256-bit vector of [8 x float] containing the multiplicand.
554	/// \param __B
555	/// A 256-bit vector of [8 x float] containing the multiplier.
556	/// \param __C
557	/// A 256-bit vector of [8 x float] containing the subtrahend.
558	/// \returns A 256-bit vector of [8 x float] containing the result.
559	static __inline__ __m256 __DEFAULT_FN_ATTRS256
560	_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
561	{
562	return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
563	}
564
565	/// Computes a multiply-subtract of 256-bit vectors of [4 x double].
566	/// For each element, computes <c> (__A * __B) - __C </c>.
567	///
568	/// \headerfile <immintrin.h>
569	///
570	/// This intrinsic corresponds to the \c VFMSUB213PD instruction.
571	///
572	/// \param __A
573	/// A 256-bit vector of [4 x double] containing the multiplicand.
574	/// \param __B
575	/// A 256-bit vector of [4 x double] containing the multiplier.
576	/// \param __C
577	/// A 256-bit vector of [4 x double] containing the subtrahend.
578	/// \returns A 256-bit vector of [4 x double] containing the result.
579	static __inline__ __m256d __DEFAULT_FN_ATTRS256
580	_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
581	{
582	return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
583	}
584
585	/// Computes a negated multiply-add of 256-bit vectors of [8 x float].
586	/// For each element, computes <c> -(__A * __B) + __C </c>.
587	///
588	/// \headerfile <immintrin.h>
589	///
590	/// This intrinsic corresponds to the \c VFNMADD213PS instruction.
591	///
592	/// \param __A
593	/// A 256-bit vector of [8 x float] containing the multiplicand.
594	/// \param __B
595	/// A 256-bit vector of [8 x float] containing the multiplier.
596	/// \param __C
597	/// A 256-bit vector of [8 x float] containing the addend.
598	/// \returns A 256-bit vector of [8 x float] containing the result.
599	static __inline__ __m256 __DEFAULT_FN_ATTRS256
600	_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
601	{
602	return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
603	}
604
605	/// Computes a negated multiply-add of 256-bit vectors of [4 x double].
606	/// For each element, computes <c> -(__A * __B) + __C </c>.
607	///
608	/// \headerfile <immintrin.h>
609	///
610	/// This intrinsic corresponds to the \c VFNMADD213PD instruction.
611	///
612	/// \param __A
613	/// A 256-bit vector of [4 x double] containing the multiplicand.
614	/// \param __B
615	/// A 256-bit vector of [4 x double] containing the multiplier.
616	/// \param __C
617	/// A 256-bit vector of [4 x double] containing the addend.
618	/// \returns A 256-bit vector of [4 x double] containing the result.
619	static __inline__ __m256d __DEFAULT_FN_ATTRS256
620	_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
621	{
622	return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
623	}
624
625	/// Computes a negated multiply-subtract of 256-bit vectors of [8 x float].
626	/// For each element, computes <c> -(__A * __B) - __C </c>.
627	///
628	/// \headerfile <immintrin.h>
629	///
630	/// This intrinsic corresponds to the \c VFNMSUB213PS instruction.
631	///
632	/// \param __A
633	/// A 256-bit vector of [8 x float] containing the multiplicand.
634	/// \param __B
635	/// A 256-bit vector of [8 x float] containing the multiplier.
636	/// \param __C
637	/// A 256-bit vector of [8 x float] containing the subtrahend.
638	/// \returns A 256-bit vector of [8 x float] containing the result.
639	static __inline__ __m256 __DEFAULT_FN_ATTRS256
640	_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
641	{
642	return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
643	}
644
645	/// Computes a negated multiply-subtract of 256-bit vectors of [4 x double].
646	/// For each element, computes <c> -(__A * __B) - __C </c>.
647	///
648	/// \headerfile <immintrin.h>
649	///
650	/// This intrinsic corresponds to the \c VFNMSUB213PD instruction.
651	///
652	/// \param __A
653	/// A 256-bit vector of [4 x double] containing the multiplicand.
654	/// \param __B
655	/// A 256-bit vector of [4 x double] containing the multiplier.
656	/// \param __C
657	/// A 256-bit vector of [4 x double] containing the subtrahend.
658	/// \returns A 256-bit vector of [4 x double] containing the result.
659	static __inline__ __m256d __DEFAULT_FN_ATTRS256
660	_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
661	{
662	return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
663	}
664
665	/// Computes a multiply with alternating add/subtract of 256-bit vectors of
666	/// [8 x float].
667	/// \code
668	/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
669	/// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32]
670	/// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
671	/// result[127:96] = (__A[127:96] * __B[127:96]) + __C[127:96]
672	/// result[159:128] = (__A[159:128] * __B[159:128]) - __C[159:128]
673	/// result[191:160] = (__A[191:160] * __B[191:160]) + __C[191:160]
674	/// result[223:192] = (__A[223:192] * __B[223:192]) - __C[223:192]
675	/// result[255:224] = (__A[255:224] * __B[255:224]) + __C[255:224]
676	/// \endcode
677	///
678	/// \headerfile <immintrin.h>
679	///
680	/// This intrinsic corresponds to the \c VFMADDSUB213PS instruction.
681	///
682	/// \param __A
683	/// A 256-bit vector of [8 x float] containing the multiplicand.
684	/// \param __B
685	/// A 256-bit vector of [8 x float] containing the multiplier.
686	/// \param __C
687	/// A 256-bit vector of [8 x float] containing the addend/subtrahend.
688	/// \returns A 256-bit vector of [8 x float] containing the result.
689	static __inline__ __m256 __DEFAULT_FN_ATTRS256
690	_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
691	{
692	return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
693	}
694
695	/// Computes a multiply with alternating add/subtract of 256-bit vectors of
696	/// [4 x double].
697	/// \code
698	/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
699	/// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64]
700	/// result[191:128] = (__A[191:128] * __B[191:128]) - __C[191:128]
701	/// result[255:192] = (__A[255:192] * __B[255:192]) + __C[255:192]
702	/// \endcode
703	///
704	/// \headerfile <immintrin.h>
705	///
706	/// This intrinsic corresponds to the \c VFMADDSUB213PD instruction.
707	///
708	/// \param __A
709	/// A 256-bit vector of [4 x double] containing the multiplicand.
710	/// \param __B
711	/// A 256-bit vector of [4 x double] containing the multiplier.
712	/// \param __C
713	/// A 256-bit vector of [4 x double] containing the addend/subtrahend.
714	/// \returns A 256-bit vector of [4 x double] containing the result.
715	static __inline__ __m256d __DEFAULT_FN_ATTRS256
716	_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
717	{
718	return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
719	}
720
721	/// Computes a vector multiply with alternating add/subtract of 256-bit
722	/// vectors of [8 x float].
723	/// \code
724	/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
725	/// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
726	/// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64]
727	/// result[127:96] = (__A[127:96] * __B[127:96]) - __C[127:96]
728	/// result[159:128] = (__A[159:128] * __B[159:128]) + __C[159:128]
729	/// result[191:160] = (__A[191:160] * __B[191:160]) - __C[191:160]
730	/// result[223:192] = (__A[223:192] * __B[223:192]) + __C[223:192]
731	/// result[255:224] = (__A[255:224] * __B[255:224]) - __C[255:224]
732	/// \endcode
733	///
734	/// \headerfile <immintrin.h>
735	///
736	/// This intrinsic corresponds to the \c VFMSUBADD213PS instruction.
737	///
738	/// \param __A
739	/// A 256-bit vector of [8 x float] containing the multiplicand.
740	/// \param __B
741	/// A 256-bit vector of [8 x float] containing the multiplier.
742	/// \param __C
743	/// A 256-bit vector of [8 x float] containing the addend/subtrahend.
744	/// \returns A 256-bit vector of [8 x float] containing the result.
745	static __inline__ __m256 __DEFAULT_FN_ATTRS256
746	_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
747	{
748	return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
749	}
750
751	/// Computes a vector multiply with alternating add/subtract of 256-bit
752	/// vectors of [4 x double].
753	/// \code
754	/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
755	/// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
756	/// result[191:128] = (__A[191:128] * __B[191:128]) + __C[191:128]
757	/// result[255:192] = (__A[255:192] * __B[255:192]) - __C[255:192]
758	/// \endcode
759	///
760	/// \headerfile <immintrin.h>
761	///
762	/// This intrinsic corresponds to the \c VFMSUBADD213PD instruction.
763	///
764	/// \param __A
765	/// A 256-bit vector of [4 x double] containing the multiplicand.
766	/// \param __B
767	/// A 256-bit vector of [4 x double] containing the multiplier.
768	/// \param __C
769	/// A 256-bit vector of [4 x double] containing the addend/subtrahend.
770	/// \returns A 256-bit vector of [4 x double] containing the result.
771	static __inline__ __m256d __DEFAULT_FN_ATTRS256
772	_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
773	{
774	return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
775	}
776
777	#undef __DEFAULT_FN_ATTRS128
778	#undef __DEFAULT_FN_ATTRS256
779
780	#endif /* __FMAINTRIN_H */
781

Warning: This file is not a C or C++ file. It does not have highlighting.

source code of clang/lib/Headers/fmaintrin.h