Warning: This file is not a C or C++ file. It does not have highlighting.
1 | /*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------=== |
---|---|
2 | * |
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | * See https://llvm.org/LICENSE.txt for license information. |
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | * |
7 | *===-----------------------------------------------------------------------=== |
8 | */ |
9 | |
10 | #ifndef __PMMINTRIN_H |
11 | #define __PMMINTRIN_H |
12 | |
13 | #if !defined(__i386__) && !defined(__x86_64__) |
14 | #error "This header is only meant to be used on x86 and x64 architecture" |
15 | #endif |
16 | |
17 | #include <emmintrin.h> |
18 | |
19 | /* Define the default attributes for the functions in this file. */ |
20 | #define __DEFAULT_FN_ATTRS \ |
21 | __attribute__((__always_inline__, __nodebug__, \ |
22 | __target__("sse3,no-evex512"), __min_vector_width__(128))) |
23 | |
24 | /// Loads data from an unaligned memory location to elements in a 128-bit |
25 | /// vector. |
26 | /// |
27 | /// If the address of the data is not 16-byte aligned, the instruction may |
28 | /// read two adjacent aligned blocks of memory to retrieve the requested |
29 | /// data. |
30 | /// |
31 | /// \headerfile <x86intrin.h> |
32 | /// |
33 | /// This intrinsic corresponds to the <c> VLDDQU </c> instruction. |
34 | /// |
35 | /// \param __p |
36 | /// A pointer to a 128-bit integer vector containing integer values. |
37 | /// \returns A 128-bit vector containing the moved values. |
38 | static __inline__ __m128i __DEFAULT_FN_ATTRS |
39 | _mm_lddqu_si128(__m128i_u const *__p) |
40 | { |
41 | return (__m128i)__builtin_ia32_lddqu((char const *)__p); |
42 | } |
43 | |
44 | /// Adds the even-indexed values and subtracts the odd-indexed values of |
45 | /// two 128-bit vectors of [4 x float]. |
46 | /// |
47 | /// \headerfile <x86intrin.h> |
48 | /// |
49 | /// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction. |
50 | /// |
51 | /// \param __a |
52 | /// A 128-bit vector of [4 x float] containing the left source operand. |
53 | /// \param __b |
54 | /// A 128-bit vector of [4 x float] containing the right source operand. |
55 | /// \returns A 128-bit vector of [4 x float] containing the alternating sums and |
56 | /// differences of both operands. |
57 | static __inline__ __m128 __DEFAULT_FN_ATTRS |
58 | _mm_addsub_ps(__m128 __a, __m128 __b) |
59 | { |
60 | return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); |
61 | } |
62 | |
63 | /// Horizontally adds the adjacent pairs of values contained in two |
64 | /// 128-bit vectors of [4 x float]. |
65 | /// |
66 | /// \headerfile <x86intrin.h> |
67 | /// |
68 | /// This intrinsic corresponds to the <c> VHADDPS </c> instruction. |
69 | /// |
70 | /// \param __a |
71 | /// A 128-bit vector of [4 x float] containing one of the source operands. |
72 | /// The horizontal sums of the values are stored in the lower bits of the |
73 | /// destination. |
74 | /// \param __b |
75 | /// A 128-bit vector of [4 x float] containing one of the source operands. |
76 | /// The horizontal sums of the values are stored in the upper bits of the |
77 | /// destination. |
78 | /// \returns A 128-bit vector of [4 x float] containing the horizontal sums of |
79 | /// both operands. |
80 | static __inline__ __m128 __DEFAULT_FN_ATTRS |
81 | _mm_hadd_ps(__m128 __a, __m128 __b) |
82 | { |
83 | return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b); |
84 | } |
85 | |
86 | /// Horizontally subtracts the adjacent pairs of values contained in two |
87 | /// 128-bit vectors of [4 x float]. |
88 | /// |
89 | /// \headerfile <x86intrin.h> |
90 | /// |
91 | /// This intrinsic corresponds to the <c> VHSUBPS </c> instruction. |
92 | /// |
93 | /// \param __a |
94 | /// A 128-bit vector of [4 x float] containing one of the source operands. |
95 | /// The horizontal differences between the values are stored in the lower |
96 | /// bits of the destination. |
97 | /// \param __b |
98 | /// A 128-bit vector of [4 x float] containing one of the source operands. |
99 | /// The horizontal differences between the values are stored in the upper |
100 | /// bits of the destination. |
101 | /// \returns A 128-bit vector of [4 x float] containing the horizontal |
102 | /// differences of both operands. |
103 | static __inline__ __m128 __DEFAULT_FN_ATTRS |
104 | _mm_hsub_ps(__m128 __a, __m128 __b) |
105 | { |
106 | return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b); |
107 | } |
108 | |
109 | /// Moves and duplicates odd-indexed values from a 128-bit vector |
110 | /// of [4 x float] to float values stored in a 128-bit vector of |
111 | /// [4 x float]. |
112 | /// |
113 | /// \headerfile <x86intrin.h> |
114 | /// |
115 | /// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction. |
116 | /// |
117 | /// \param __a |
118 | /// A 128-bit vector of [4 x float]. \n |
119 | /// Bits [127:96] of the source are written to bits [127:96] and [95:64] of |
120 | /// the destination. \n |
121 | /// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the |
122 | /// destination. |
123 | /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated |
124 | /// values. |
125 | static __inline__ __m128 __DEFAULT_FN_ATTRS |
126 | _mm_movehdup_ps(__m128 __a) |
127 | { |
128 | return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3); |
129 | } |
130 | |
131 | /// Duplicates even-indexed values from a 128-bit vector of |
132 | /// [4 x float] to float values stored in a 128-bit vector of [4 x float]. |
133 | /// |
134 | /// \headerfile <x86intrin.h> |
135 | /// |
136 | /// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction. |
137 | /// |
138 | /// \param __a |
139 | /// A 128-bit vector of [4 x float] \n |
140 | /// Bits [95:64] of the source are written to bits [127:96] and [95:64] of |
141 | /// the destination. \n |
142 | /// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the |
143 | /// destination. |
144 | /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated |
145 | /// values. |
146 | static __inline__ __m128 __DEFAULT_FN_ATTRS |
147 | _mm_moveldup_ps(__m128 __a) |
148 | { |
149 | return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2); |
150 | } |
151 | |
152 | /// Adds the even-indexed values and subtracts the odd-indexed values of |
153 | /// two 128-bit vectors of [2 x double]. |
154 | /// |
155 | /// \headerfile <x86intrin.h> |
156 | /// |
157 | /// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction. |
158 | /// |
159 | /// \param __a |
160 | /// A 128-bit vector of [2 x double] containing the left source operand. |
161 | /// \param __b |
162 | /// A 128-bit vector of [2 x double] containing the right source operand. |
163 | /// \returns A 128-bit vector of [2 x double] containing the alternating sums |
164 | /// and differences of both operands. |
165 | static __inline__ __m128d __DEFAULT_FN_ATTRS |
166 | _mm_addsub_pd(__m128d __a, __m128d __b) |
167 | { |
168 | return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); |
169 | } |
170 | |
171 | /// Horizontally adds the pairs of values contained in two 128-bit |
172 | /// vectors of [2 x double]. |
173 | /// |
174 | /// \headerfile <x86intrin.h> |
175 | /// |
176 | /// This intrinsic corresponds to the <c> VHADDPD </c> instruction. |
177 | /// |
178 | /// \param __a |
179 | /// A 128-bit vector of [2 x double] containing one of the source operands. |
180 | /// The horizontal sum of the values is stored in the lower bits of the |
181 | /// destination. |
182 | /// \param __b |
183 | /// A 128-bit vector of [2 x double] containing one of the source operands. |
184 | /// The horizontal sum of the values is stored in the upper bits of the |
185 | /// destination. |
186 | /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of |
187 | /// both operands. |
188 | static __inline__ __m128d __DEFAULT_FN_ATTRS |
189 | _mm_hadd_pd(__m128d __a, __m128d __b) |
190 | { |
191 | return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b); |
192 | } |
193 | |
194 | /// Horizontally subtracts the pairs of values contained in two 128-bit |
195 | /// vectors of [2 x double]. |
196 | /// |
197 | /// \headerfile <x86intrin.h> |
198 | /// |
199 | /// This intrinsic corresponds to the <c> VHSUBPD </c> instruction. |
200 | /// |
201 | /// \param __a |
202 | /// A 128-bit vector of [2 x double] containing one of the source operands. |
203 | /// The horizontal difference of the values is stored in the lower bits of |
204 | /// the destination. |
205 | /// \param __b |
206 | /// A 128-bit vector of [2 x double] containing one of the source operands. |
207 | /// The horizontal difference of the values is stored in the upper bits of |
208 | /// the destination. |
209 | /// \returns A 128-bit vector of [2 x double] containing the horizontal |
210 | /// differences of both operands. |
211 | static __inline__ __m128d __DEFAULT_FN_ATTRS |
212 | _mm_hsub_pd(__m128d __a, __m128d __b) |
213 | { |
214 | return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b); |
215 | } |
216 | |
217 | /// Moves and duplicates one double-precision value to double-precision |
218 | /// values stored in a 128-bit vector of [2 x double]. |
219 | /// |
220 | /// \headerfile <x86intrin.h> |
221 | /// |
222 | /// \code |
223 | /// __m128d _mm_loaddup_pd(double const *dp); |
224 | /// \endcode |
225 | /// |
226 | /// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction. |
227 | /// |
228 | /// \param dp |
229 | /// A pointer to a double-precision value to be moved and duplicated. |
230 | /// \returns A 128-bit vector of [2 x double] containing the moved and |
231 | /// duplicated values. |
232 | #define _mm_loaddup_pd(dp) _mm_load1_pd(dp) |
233 | |
234 | /// Moves and duplicates the double-precision value in the lower bits of |
235 | /// a 128-bit vector of [2 x double] to double-precision values stored in a |
236 | /// 128-bit vector of [2 x double]. |
237 | /// |
238 | /// \headerfile <x86intrin.h> |
239 | /// |
240 | /// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction. |
241 | /// |
242 | /// \param __a |
243 | /// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits |
244 | /// [127:64] and [63:0] of the destination. |
245 | /// \returns A 128-bit vector of [2 x double] containing the moved and |
246 | /// duplicated values. |
247 | static __inline__ __m128d __DEFAULT_FN_ATTRS |
248 | _mm_movedup_pd(__m128d __a) |
249 | { |
250 | return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); |
251 | } |
252 | |
253 | /// Establishes a linear address memory range to be monitored and puts |
254 | /// the processor in the monitor event pending state. Data stored in the |
255 | /// monitored address range causes the processor to exit the pending state. |
256 | /// |
257 | /// The \c MONITOR instruction can be used in kernel mode, and in other modes |
258 | /// if MSR <c> C001_0015h[MonMwaitUserEn] </c> is set. |
259 | /// |
260 | /// \headerfile <x86intrin.h> |
261 | /// |
262 | /// This intrinsic corresponds to the \c MONITOR instruction. |
263 | /// |
264 | /// \param __p |
265 | /// The memory range to be monitored. The size of the range is determined by |
266 | /// CPUID function 0000_0005h. |
267 | /// \param __extensions |
268 | /// Optional extensions for the monitoring state. |
269 | /// \param __hints |
270 | /// Optional hints for the monitoring state. |
271 | static __inline__ void __DEFAULT_FN_ATTRS |
272 | _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints) |
273 | { |
274 | __builtin_ia32_monitor(__p, __extensions, __hints); |
275 | } |
276 | |
277 | /// Used with the \c MONITOR instruction to wait while the processor is in |
278 | /// the monitor event pending state. Data stored in the monitored address |
279 | /// range, or an interrupt, causes the processor to exit the pending state. |
280 | /// |
281 | /// The \c MWAIT instruction can be used in kernel mode, and in other modes if |
282 | /// MSR <c> C001_0015h[MonMwaitUserEn] </c> is set. |
283 | /// |
284 | /// \headerfile <x86intrin.h> |
285 | /// |
286 | /// This intrinsic corresponds to the \c MWAIT instruction. |
287 | /// |
288 | /// \param __extensions |
289 | /// Optional extensions for the monitoring state, which can vary by |
290 | /// processor. |
291 | /// \param __hints |
292 | /// Optional hints for the monitoring state, which can vary by processor. |
293 | static __inline__ void __DEFAULT_FN_ATTRS |
294 | _mm_mwait(unsigned __extensions, unsigned __hints) |
295 | { |
296 | __builtin_ia32_mwait(__extensions, __hints); |
297 | } |
298 | |
299 | #undef __DEFAULT_FN_ATTRS |
300 | |
301 | #endif /* __PMMINTRIN_H */ |
302 |
Warning: This file is not a C or C++ file. It does not have highlighting.