pmmintrin.h source code [clang/lib/Headers/pmmintrin.h]

Warning: This file is not a C or C++ file. It does not have highlighting.

1	/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===
2	*
3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	* See https://llvm.org/LICENSE.txt for license information.
5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	*
7	*===-----------------------------------------------------------------------===
8	*/
9
10	#ifndef __PMMINTRIN_H
11	#define __PMMINTRIN_H
12
13	#if !defined(__i386__) && !defined(__x86_64__)
14	#error "This header is only meant to be used on x86 and x64 architecture"
15	#endif
16
17	#include <emmintrin.h>
18
19	/* Define the default attributes for the functions in this file. */
20	#define __DEFAULT_FN_ATTRS \
21	__attribute__((__always_inline__, __nodebug__, \
22	__target__("sse3,no-evex512"), __min_vector_width__(128)))
23
24	/// Loads data from an unaligned memory location to elements in a 128-bit
25	/// vector.
26	///
27	/// If the address of the data is not 16-byte aligned, the instruction may
28	/// read two adjacent aligned blocks of memory to retrieve the requested
29	/// data.
30	///
31	/// \headerfile <x86intrin.h>
32	///
33	/// This intrinsic corresponds to the <c> VLDDQU </c> instruction.
34	///
35	/// \param __p
36	/// A pointer to a 128-bit integer vector containing integer values.
37	/// \returns A 128-bit vector containing the moved values.
38	static __inline__ __m128i __DEFAULT_FN_ATTRS
39	_mm_lddqu_si128(__m128i_u const *__p)
40	{
41	return (__m128i)__builtin_ia32_lddqu((char const *)__p);
42	}
43
44	/// Adds the even-indexed values and subtracts the odd-indexed values of
45	/// two 128-bit vectors of [4 x float].
46	///
47	/// \headerfile <x86intrin.h>
48	///
49	/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.
50	///
51	/// \param __a
52	/// A 128-bit vector of [4 x float] containing the left source operand.
53	/// \param __b
54	/// A 128-bit vector of [4 x float] containing the right source operand.
55	/// \returns A 128-bit vector of [4 x float] containing the alternating sums and
56	/// differences of both operands.
57	static __inline__ __m128 __DEFAULT_FN_ATTRS
58	_mm_addsub_ps(__m128 __a, __m128 __b)
59	{
60	return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
61	}
62
63	/// Horizontally adds the adjacent pairs of values contained in two
64	/// 128-bit vectors of [4 x float].
65	///
66	/// \headerfile <x86intrin.h>
67	///
68	/// This intrinsic corresponds to the <c> VHADDPS </c> instruction.
69	///
70	/// \param __a
71	/// A 128-bit vector of [4 x float] containing one of the source operands.
72	/// The horizontal sums of the values are stored in the lower bits of the
73	/// destination.
74	/// \param __b
75	/// A 128-bit vector of [4 x float] containing one of the source operands.
76	/// The horizontal sums of the values are stored in the upper bits of the
77	/// destination.
78	/// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
79	/// both operands.
80	static __inline__ __m128 __DEFAULT_FN_ATTRS
81	_mm_hadd_ps(__m128 __a, __m128 __b)
82	{
83	return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
84	}
85
86	/// Horizontally subtracts the adjacent pairs of values contained in two
87	/// 128-bit vectors of [4 x float].
88	///
89	/// \headerfile <x86intrin.h>
90	///
91	/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.
92	///
93	/// \param __a
94	/// A 128-bit vector of [4 x float] containing one of the source operands.
95	/// The horizontal differences between the values are stored in the lower
96	/// bits of the destination.
97	/// \param __b
98	/// A 128-bit vector of [4 x float] containing one of the source operands.
99	/// The horizontal differences between the values are stored in the upper
100	/// bits of the destination.
101	/// \returns A 128-bit vector of [4 x float] containing the horizontal
102	/// differences of both operands.
103	static __inline__ __m128 __DEFAULT_FN_ATTRS
104	_mm_hsub_ps(__m128 __a, __m128 __b)
105	{
106	return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
107	}
108
109	/// Moves and duplicates odd-indexed values from a 128-bit vector
110	/// of [4 x float] to float values stored in a 128-bit vector of
111	/// [4 x float].
112	///
113	/// \headerfile <x86intrin.h>
114	///
115	/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.
116	///
117	/// \param __a
118	/// A 128-bit vector of [4 x float]. \n
119	/// Bits [127:96] of the source are written to bits [127:96] and [95:64] of
120	/// the destination. \n
121	/// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the
122	/// destination.
123	/// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
124	/// values.
125	static __inline__ __m128 __DEFAULT_FN_ATTRS
126	_mm_movehdup_ps(__m128 __a)
127	{
128	return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
129	}
130
131	/// Duplicates even-indexed values from a 128-bit vector of
132	/// [4 x float] to float values stored in a 128-bit vector of [4 x float].
133	///
134	/// \headerfile <x86intrin.h>
135	///
136	/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.
137	///
138	/// \param __a
139	/// A 128-bit vector of [4 x float] \n
140	/// Bits [95:64] of the source are written to bits [127:96] and [95:64] of
141	/// the destination. \n
142	/// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the
143	/// destination.
144	/// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
145	/// values.
146	static __inline__ __m128 __DEFAULT_FN_ATTRS
147	_mm_moveldup_ps(__m128 __a)
148	{
149	return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);
150	}
151
152	/// Adds the even-indexed values and subtracts the odd-indexed values of
153	/// two 128-bit vectors of [2 x double].
154	///
155	/// \headerfile <x86intrin.h>
156	///
157	/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.
158	///
159	/// \param __a
160	/// A 128-bit vector of [2 x double] containing the left source operand.
161	/// \param __b
162	/// A 128-bit vector of [2 x double] containing the right source operand.
163	/// \returns A 128-bit vector of [2 x double] containing the alternating sums
164	/// and differences of both operands.
165	static __inline__ __m128d __DEFAULT_FN_ATTRS
166	_mm_addsub_pd(__m128d __a, __m128d __b)
167	{
168	return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
169	}
170
171	/// Horizontally adds the pairs of values contained in two 128-bit
172	/// vectors of [2 x double].
173	///
174	/// \headerfile <x86intrin.h>
175	///
176	/// This intrinsic corresponds to the <c> VHADDPD </c> instruction.
177	///
178	/// \param __a
179	/// A 128-bit vector of [2 x double] containing one of the source operands.
180	/// The horizontal sum of the values is stored in the lower bits of the
181	/// destination.
182	/// \param __b
183	/// A 128-bit vector of [2 x double] containing one of the source operands.
184	/// The horizontal sum of the values is stored in the upper bits of the
185	/// destination.
186	/// \returns A 128-bit vector of [2 x double] containing the horizontal sums of
187	/// both operands.
188	static __inline__ __m128d __DEFAULT_FN_ATTRS
189	_mm_hadd_pd(__m128d __a, __m128d __b)
190	{
191	return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
192	}
193
194	/// Horizontally subtracts the pairs of values contained in two 128-bit
195	/// vectors of [2 x double].
196	///
197	/// \headerfile <x86intrin.h>
198	///
199	/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.
200	///
201	/// \param __a
202	/// A 128-bit vector of [2 x double] containing one of the source operands.
203	/// The horizontal difference of the values is stored in the lower bits of
204	/// the destination.
205	/// \param __b
206	/// A 128-bit vector of [2 x double] containing one of the source operands.
207	/// The horizontal difference of the values is stored in the upper bits of
208	/// the destination.
209	/// \returns A 128-bit vector of [2 x double] containing the horizontal
210	/// differences of both operands.
211	static __inline__ __m128d __DEFAULT_FN_ATTRS
212	_mm_hsub_pd(__m128d __a, __m128d __b)
213	{
214	return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
215	}
216
217	/// Moves and duplicates one double-precision value to double-precision
218	/// values stored in a 128-bit vector of [2 x double].
219	///
220	/// \headerfile <x86intrin.h>
221	///
222	/// \code
223	/// __m128d _mm_loaddup_pd(double const *dp);
224	/// \endcode
225	///
226	/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
227	///
228	/// \param dp
229	/// A pointer to a double-precision value to be moved and duplicated.
230	/// \returns A 128-bit vector of [2 x double] containing the moved and
231	/// duplicated values.
232	#define _mm_loaddup_pd(dp) _mm_load1_pd(dp)
233
234	/// Moves and duplicates the double-precision value in the lower bits of
235	/// a 128-bit vector of [2 x double] to double-precision values stored in a
236	/// 128-bit vector of [2 x double].
237	///
238	/// \headerfile <x86intrin.h>
239	///
240	/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
241	///
242	/// \param __a
243	/// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits
244	/// [127:64] and [63:0] of the destination.
245	/// \returns A 128-bit vector of [2 x double] containing the moved and
246	/// duplicated values.
247	static __inline__ __m128d __DEFAULT_FN_ATTRS
248	_mm_movedup_pd(__m128d __a)
249	{
250	return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
251	}
252
253	/// Establishes a linear address memory range to be monitored and puts
254	/// the processor in the monitor event pending state. Data stored in the
255	/// monitored address range causes the processor to exit the pending state.
256	///
257	/// The \c MONITOR instruction can be used in kernel mode, and in other modes
258	/// if MSR <c> C001_0015h[MonMwaitUserEn] </c> is set.
259	///
260	/// \headerfile <x86intrin.h>
261	///
262	/// This intrinsic corresponds to the \c MONITOR instruction.
263	///
264	/// \param __p
265	/// The memory range to be monitored. The size of the range is determined by
266	/// CPUID function 0000_0005h.
267	/// \param __extensions
268	/// Optional extensions for the monitoring state.
269	/// \param __hints
270	/// Optional hints for the monitoring state.
271	static __inline__ void __DEFAULT_FN_ATTRS
272	_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
273	{
274	__builtin_ia32_monitor(__p, __extensions, __hints);
275	}
276
277	/// Used with the \c MONITOR instruction to wait while the processor is in
278	/// the monitor event pending state. Data stored in the monitored address
279	/// range, or an interrupt, causes the processor to exit the pending state.
280	///
281	/// The \c MWAIT instruction can be used in kernel mode, and in other modes if
282	/// MSR <c> C001_0015h[MonMwaitUserEn] </c> is set.
283	///
284	/// \headerfile <x86intrin.h>
285	///
286	/// This intrinsic corresponds to the \c MWAIT instruction.
287	///
288	/// \param __extensions
289	/// Optional extensions for the monitoring state, which can vary by
290	/// processor.
291	/// \param __hints
292	/// Optional hints for the monitoring state, which can vary by processor.
293	static __inline__ void __DEFAULT_FN_ATTRS
294	_mm_mwait(unsigned __extensions, unsigned __hints)
295	{
296	__builtin_ia32_mwait(__extensions, __hints);
297	}
298
299	#undef __DEFAULT_FN_ATTRS
300
301	#endif /* __PMMINTRIN_H */
302

Warning: This file is not a C or C++ file. It does not have highlighting.

source code of clang/lib/Headers/pmmintrin.h