Warning: This file is not a C or C++ file. It does not have highlighting.
1 | /*===---- mmintrin.h - MMX intrinsics --------------------------------------=== |
---|---|
2 | * |
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | * See https://llvm.org/LICENSE.txt for license information. |
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | * |
7 | *===-----------------------------------------------------------------------=== |
8 | */ |
9 | |
10 | #ifndef __MMINTRIN_H |
11 | #define __MMINTRIN_H |
12 | |
13 | #if !defined(__i386__) && !defined(__x86_64__) |
14 | #error "This header is only meant to be used on x86 and x64 architecture" |
15 | #endif |
16 | |
17 | typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8))); |
18 | |
19 | typedef long long __v1di __attribute__((__vector_size__(8))); |
20 | typedef int __v2si __attribute__((__vector_size__(8))); |
21 | typedef short __v4hi __attribute__((__vector_size__(8))); |
22 | typedef char __v8qi __attribute__((__vector_size__(8))); |
23 | |
24 | /* Define the default attributes for the functions in this file. */ |
25 | #define __DEFAULT_FN_ATTRS \ |
26 | __attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \ |
27 | __min_vector_width__(64))) |
28 | |
29 | /// Clears the MMX state by setting the state of the x87 stack registers |
30 | /// to empty. |
31 | /// |
32 | /// \headerfile <x86intrin.h> |
33 | /// |
34 | /// This intrinsic corresponds to the <c> EMMS </c> instruction. |
35 | /// |
36 | static __inline__ void __attribute__((__always_inline__, __nodebug__, |
37 | __target__("mmx,no-evex512"))) |
38 | _mm_empty(void) { |
39 | __builtin_ia32_emms(); |
40 | } |
41 | |
42 | /// Constructs a 64-bit integer vector, setting the lower 32 bits to the |
43 | /// value of the 32-bit integer parameter and setting the upper 32 bits to 0. |
44 | /// |
45 | /// \headerfile <x86intrin.h> |
46 | /// |
47 | /// This intrinsic corresponds to the <c> MOVD </c> instruction. |
48 | /// |
49 | /// \param __i |
50 | /// A 32-bit integer value. |
51 | /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the |
52 | /// parameter. The upper 32 bits are set to 0. |
53 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
54 | _mm_cvtsi32_si64(int __i) |
55 | { |
56 | return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); |
57 | } |
58 | |
59 | /// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit |
60 | /// signed integer. |
61 | /// |
62 | /// \headerfile <x86intrin.h> |
63 | /// |
64 | /// This intrinsic corresponds to the <c> MOVD </c> instruction. |
65 | /// |
66 | /// \param __m |
67 | /// A 64-bit integer vector. |
68 | /// \returns A 32-bit signed integer value containing the lower 32 bits of the |
69 | /// parameter. |
70 | static __inline__ int __DEFAULT_FN_ATTRS |
71 | _mm_cvtsi64_si32(__m64 __m) |
72 | { |
73 | return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); |
74 | } |
75 | |
76 | /// Casts a 64-bit signed integer value into a 64-bit integer vector. |
77 | /// |
78 | /// \headerfile <x86intrin.h> |
79 | /// |
80 | /// This intrinsic corresponds to the <c> MOVQ </c> instruction. |
81 | /// |
82 | /// \param __i |
83 | /// A 64-bit signed integer. |
84 | /// \returns A 64-bit integer vector containing the same bitwise pattern as the |
85 | /// parameter. |
86 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
87 | _mm_cvtsi64_m64(long long __i) |
88 | { |
89 | return (__m64)__i; |
90 | } |
91 | |
92 | /// Casts a 64-bit integer vector into a 64-bit signed integer value. |
93 | /// |
94 | /// \headerfile <x86intrin.h> |
95 | /// |
96 | /// This intrinsic corresponds to the <c> MOVQ </c> instruction. |
97 | /// |
98 | /// \param __m |
99 | /// A 64-bit integer vector. |
100 | /// \returns A 64-bit signed integer containing the same bitwise pattern as the |
101 | /// parameter. |
102 | static __inline__ long long __DEFAULT_FN_ATTRS |
103 | _mm_cvtm64_si64(__m64 __m) |
104 | { |
105 | return (long long)__m; |
106 | } |
107 | |
108 | /// Converts 16-bit signed integers from both 64-bit integer vector |
109 | /// parameters of [4 x i16] into 8-bit signed integer values, and constructs |
110 | /// a 64-bit integer vector of [8 x i8] as the result. Positive values |
111 | /// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80 |
112 | /// are saturated to 0x80. |
113 | /// |
114 | /// \headerfile <x86intrin.h> |
115 | /// |
116 | /// This intrinsic corresponds to the <c> PACKSSWB </c> instruction. |
117 | /// |
118 | /// \param __m1 |
119 | /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a |
120 | /// 16-bit signed integer and is converted to an 8-bit signed integer with |
121 | /// saturation. Positive values greater than 0x7F are saturated to 0x7F. |
122 | /// Negative values less than 0x80 are saturated to 0x80. The converted |
123 | /// [4 x i8] values are written to the lower 32 bits of the result. |
124 | /// \param __m2 |
125 | /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a |
126 | /// 16-bit signed integer and is converted to an 8-bit signed integer with |
127 | /// saturation. Positive values greater than 0x7F are saturated to 0x7F. |
128 | /// Negative values less than 0x80 are saturated to 0x80. The converted |
129 | /// [4 x i8] values are written to the upper 32 bits of the result. |
130 | /// \returns A 64-bit integer vector of [8 x i8] containing the converted |
131 | /// values. |
132 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
133 | _mm_packs_pi16(__m64 __m1, __m64 __m2) |
134 | { |
135 | return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); |
136 | } |
137 | |
138 | /// Converts 32-bit signed integers from both 64-bit integer vector |
139 | /// parameters of [2 x i32] into 16-bit signed integer values, and constructs |
140 | /// a 64-bit integer vector of [4 x i16] as the result. Positive values |
141 | /// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than |
142 | /// 0x8000 are saturated to 0x8000. |
143 | /// |
144 | /// \headerfile <x86intrin.h> |
145 | /// |
146 | /// This intrinsic corresponds to the <c> PACKSSDW </c> instruction. |
147 | /// |
148 | /// \param __m1 |
149 | /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a |
150 | /// 32-bit signed integer and is converted to a 16-bit signed integer with |
151 | /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. |
152 | /// Negative values less than 0x8000 are saturated to 0x8000. The converted |
153 | /// [2 x i16] values are written to the lower 32 bits of the result. |
154 | /// \param __m2 |
155 | /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a |
156 | /// 32-bit signed integer and is converted to a 16-bit signed integer with |
157 | /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. |
158 | /// Negative values less than 0x8000 are saturated to 0x8000. The converted |
159 | /// [2 x i16] values are written to the upper 32 bits of the result. |
160 | /// \returns A 64-bit integer vector of [4 x i16] containing the converted |
161 | /// values. |
162 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
163 | _mm_packs_pi32(__m64 __m1, __m64 __m2) |
164 | { |
165 | return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); |
166 | } |
167 | |
168 | /// Converts 16-bit signed integers from both 64-bit integer vector |
169 | /// parameters of [4 x i16] into 8-bit unsigned integer values, and |
170 | /// constructs a 64-bit integer vector of [8 x i8] as the result. Values |
171 | /// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated |
172 | /// to 0. |
173 | /// |
174 | /// \headerfile <x86intrin.h> |
175 | /// |
176 | /// This intrinsic corresponds to the <c> PACKUSWB </c> instruction. |
177 | /// |
178 | /// \param __m1 |
179 | /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a |
180 | /// 16-bit signed integer and is converted to an 8-bit unsigned integer with |
181 | /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less |
182 | /// than 0 are saturated to 0. The converted [4 x i8] values are written to |
183 | /// the lower 32 bits of the result. |
184 | /// \param __m2 |
185 | /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a |
186 | /// 16-bit signed integer and is converted to an 8-bit unsigned integer with |
187 | /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less |
188 | /// than 0 are saturated to 0. The converted [4 x i8] values are written to |
189 | /// the upper 32 bits of the result. |
190 | /// \returns A 64-bit integer vector of [8 x i8] containing the converted |
191 | /// values. |
192 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
193 | _mm_packs_pu16(__m64 __m1, __m64 __m2) |
194 | { |
195 | return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); |
196 | } |
197 | |
198 | /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] |
199 | /// and interleaves them into a 64-bit integer vector of [8 x i8]. |
200 | /// |
201 | /// \headerfile <x86intrin.h> |
202 | /// |
203 | /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction. |
204 | /// |
205 | /// \param __m1 |
206 | /// A 64-bit integer vector of [8 x i8]. \n |
207 | /// Bits [39:32] are written to bits [7:0] of the result. \n |
208 | /// Bits [47:40] are written to bits [23:16] of the result. \n |
209 | /// Bits [55:48] are written to bits [39:32] of the result. \n |
210 | /// Bits [63:56] are written to bits [55:48] of the result. |
211 | /// \param __m2 |
212 | /// A 64-bit integer vector of [8 x i8]. |
213 | /// Bits [39:32] are written to bits [15:8] of the result. \n |
214 | /// Bits [47:40] are written to bits [31:24] of the result. \n |
215 | /// Bits [55:48] are written to bits [47:40] of the result. \n |
216 | /// Bits [63:56] are written to bits [63:56] of the result. |
217 | /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved |
218 | /// values. |
219 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
220 | _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) |
221 | { |
222 | return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); |
223 | } |
224 | |
225 | /// Unpacks the upper 32 bits from two 64-bit integer vectors of |
226 | /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. |
227 | /// |
228 | /// \headerfile <x86intrin.h> |
229 | /// |
230 | /// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction. |
231 | /// |
232 | /// \param __m1 |
233 | /// A 64-bit integer vector of [4 x i16]. |
234 | /// Bits [47:32] are written to bits [15:0] of the result. \n |
235 | /// Bits [63:48] are written to bits [47:32] of the result. |
236 | /// \param __m2 |
237 | /// A 64-bit integer vector of [4 x i16]. |
238 | /// Bits [47:32] are written to bits [31:16] of the result. \n |
239 | /// Bits [63:48] are written to bits [63:48] of the result. |
240 | /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved |
241 | /// values. |
242 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
243 | _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) |
244 | { |
245 | return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); |
246 | } |
247 | |
248 | /// Unpacks the upper 32 bits from two 64-bit integer vectors of |
249 | /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. |
250 | /// |
251 | /// \headerfile <x86intrin.h> |
252 | /// |
253 | /// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction. |
254 | /// |
255 | /// \param __m1 |
256 | /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to |
257 | /// the lower 32 bits of the result. |
258 | /// \param __m2 |
259 | /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to |
260 | /// the upper 32 bits of the result. |
261 | /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved |
262 | /// values. |
263 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
264 | _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) |
265 | { |
266 | return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); |
267 | } |
268 | |
269 | /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] |
270 | /// and interleaves them into a 64-bit integer vector of [8 x i8]. |
271 | /// |
272 | /// \headerfile <x86intrin.h> |
273 | /// |
274 | /// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction. |
275 | /// |
276 | /// \param __m1 |
277 | /// A 64-bit integer vector of [8 x i8]. |
278 | /// Bits [7:0] are written to bits [7:0] of the result. \n |
279 | /// Bits [15:8] are written to bits [23:16] of the result. \n |
280 | /// Bits [23:16] are written to bits [39:32] of the result. \n |
281 | /// Bits [31:24] are written to bits [55:48] of the result. |
282 | /// \param __m2 |
283 | /// A 64-bit integer vector of [8 x i8]. |
284 | /// Bits [7:0] are written to bits [15:8] of the result. \n |
285 | /// Bits [15:8] are written to bits [31:24] of the result. \n |
286 | /// Bits [23:16] are written to bits [47:40] of the result. \n |
287 | /// Bits [31:24] are written to bits [63:56] of the result. |
288 | /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved |
289 | /// values. |
290 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
291 | _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) |
292 | { |
293 | return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); |
294 | } |
295 | |
296 | /// Unpacks the lower 32 bits from two 64-bit integer vectors of |
297 | /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. |
298 | /// |
299 | /// \headerfile <x86intrin.h> |
300 | /// |
301 | /// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction. |
302 | /// |
303 | /// \param __m1 |
304 | /// A 64-bit integer vector of [4 x i16]. |
305 | /// Bits [15:0] are written to bits [15:0] of the result. \n |
306 | /// Bits [31:16] are written to bits [47:32] of the result. |
307 | /// \param __m2 |
308 | /// A 64-bit integer vector of [4 x i16]. |
309 | /// Bits [15:0] are written to bits [31:16] of the result. \n |
310 | /// Bits [31:16] are written to bits [63:48] of the result. |
311 | /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved |
312 | /// values. |
313 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
314 | _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) |
315 | { |
316 | return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); |
317 | } |
318 | |
319 | /// Unpacks the lower 32 bits from two 64-bit integer vectors of |
320 | /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. |
321 | /// |
322 | /// \headerfile <x86intrin.h> |
323 | /// |
324 | /// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction. |
325 | /// |
326 | /// \param __m1 |
327 | /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to |
328 | /// the lower 32 bits of the result. |
329 | /// \param __m2 |
330 | /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to |
331 | /// the upper 32 bits of the result. |
332 | /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved |
333 | /// values. |
334 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
335 | _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) |
336 | { |
337 | return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); |
338 | } |
339 | |
340 | /// Adds each 8-bit integer element of the first 64-bit integer vector |
341 | /// of [8 x i8] to the corresponding 8-bit integer element of the second |
342 | /// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are |
343 | /// packed into a 64-bit integer vector of [8 x i8]. |
344 | /// |
345 | /// \headerfile <x86intrin.h> |
346 | /// |
347 | /// This intrinsic corresponds to the <c> PADDB </c> instruction. |
348 | /// |
349 | /// \param __m1 |
350 | /// A 64-bit integer vector of [8 x i8]. |
351 | /// \param __m2 |
352 | /// A 64-bit integer vector of [8 x i8]. |
353 | /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both |
354 | /// parameters. |
355 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
356 | _mm_add_pi8(__m64 __m1, __m64 __m2) |
357 | { |
358 | return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); |
359 | } |
360 | |
361 | /// Adds each 16-bit integer element of the first 64-bit integer vector |
362 | /// of [4 x i16] to the corresponding 16-bit integer element of the second |
363 | /// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are |
364 | /// packed into a 64-bit integer vector of [4 x i16]. |
365 | /// |
366 | /// \headerfile <x86intrin.h> |
367 | /// |
368 | /// This intrinsic corresponds to the <c> PADDW </c> instruction. |
369 | /// |
370 | /// \param __m1 |
371 | /// A 64-bit integer vector of [4 x i16]. |
372 | /// \param __m2 |
373 | /// A 64-bit integer vector of [4 x i16]. |
374 | /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both |
375 | /// parameters. |
376 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
377 | _mm_add_pi16(__m64 __m1, __m64 __m2) |
378 | { |
379 | return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); |
380 | } |
381 | |
382 | /// Adds each 32-bit integer element of the first 64-bit integer vector |
383 | /// of [2 x i32] to the corresponding 32-bit integer element of the second |
384 | /// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are |
385 | /// packed into a 64-bit integer vector of [2 x i32]. |
386 | /// |
387 | /// \headerfile <x86intrin.h> |
388 | /// |
389 | /// This intrinsic corresponds to the <c> PADDD </c> instruction. |
390 | /// |
391 | /// \param __m1 |
392 | /// A 64-bit integer vector of [2 x i32]. |
393 | /// \param __m2 |
394 | /// A 64-bit integer vector of [2 x i32]. |
395 | /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both |
396 | /// parameters. |
397 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
398 | _mm_add_pi32(__m64 __m1, __m64 __m2) |
399 | { |
400 | return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); |
401 | } |
402 | |
403 | /// Adds each 8-bit signed integer element of the first 64-bit integer |
404 | /// vector of [8 x i8] to the corresponding 8-bit signed integer element of |
405 | /// the second 64-bit integer vector of [8 x i8]. Positive sums greater than |
406 | /// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to |
407 | /// 0x80. The results are packed into a 64-bit integer vector of [8 x i8]. |
408 | /// |
409 | /// \headerfile <x86intrin.h> |
410 | /// |
411 | /// This intrinsic corresponds to the <c> PADDSB </c> instruction. |
412 | /// |
413 | /// \param __m1 |
414 | /// A 64-bit integer vector of [8 x i8]. |
415 | /// \param __m2 |
416 | /// A 64-bit integer vector of [8 x i8]. |
417 | /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums |
418 | /// of both parameters. |
419 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
420 | _mm_adds_pi8(__m64 __m1, __m64 __m2) |
421 | { |
422 | return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); |
423 | } |
424 | |
425 | /// Adds each 16-bit signed integer element of the first 64-bit integer |
426 | /// vector of [4 x i16] to the corresponding 16-bit signed integer element of |
427 | /// the second 64-bit integer vector of [4 x i16]. Positive sums greater than |
428 | /// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are |
429 | /// saturated to 0x8000. The results are packed into a 64-bit integer vector |
430 | /// of [4 x i16]. |
431 | /// |
432 | /// \headerfile <x86intrin.h> |
433 | /// |
434 | /// This intrinsic corresponds to the <c> PADDSW </c> instruction. |
435 | /// |
436 | /// \param __m1 |
437 | /// A 64-bit integer vector of [4 x i16]. |
438 | /// \param __m2 |
439 | /// A 64-bit integer vector of [4 x i16]. |
440 | /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums |
441 | /// of both parameters. |
442 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
443 | _mm_adds_pi16(__m64 __m1, __m64 __m2) |
444 | { |
445 | return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); |
446 | } |
447 | |
448 | /// Adds each 8-bit unsigned integer element of the first 64-bit integer |
449 | /// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of |
450 | /// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are |
451 | /// saturated to 0xFF. The results are packed into a 64-bit integer vector of |
452 | /// [8 x i8]. |
453 | /// |
454 | /// \headerfile <x86intrin.h> |
455 | /// |
456 | /// This intrinsic corresponds to the <c> PADDUSB </c> instruction. |
457 | /// |
458 | /// \param __m1 |
459 | /// A 64-bit integer vector of [8 x i8]. |
460 | /// \param __m2 |
461 | /// A 64-bit integer vector of [8 x i8]. |
462 | /// \returns A 64-bit integer vector of [8 x i8] containing the saturated |
463 | /// unsigned sums of both parameters. |
464 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
465 | _mm_adds_pu8(__m64 __m1, __m64 __m2) |
466 | { |
467 | return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); |
468 | } |
469 | |
470 | /// Adds each 16-bit unsigned integer element of the first 64-bit integer |
471 | /// vector of [4 x i16] to the corresponding 16-bit unsigned integer element |
472 | /// of the second 64-bit integer vector of [4 x i16]. Sums greater than |
473 | /// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit |
474 | /// integer vector of [4 x i16]. |
475 | /// |
476 | /// \headerfile <x86intrin.h> |
477 | /// |
478 | /// This intrinsic corresponds to the <c> PADDUSW </c> instruction. |
479 | /// |
480 | /// \param __m1 |
481 | /// A 64-bit integer vector of [4 x i16]. |
482 | /// \param __m2 |
483 | /// A 64-bit integer vector of [4 x i16]. |
484 | /// \returns A 64-bit integer vector of [4 x i16] containing the saturated |
485 | /// unsigned sums of both parameters. |
486 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
487 | _mm_adds_pu16(__m64 __m1, __m64 __m2) |
488 | { |
489 | return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); |
490 | } |
491 | |
492 | /// Subtracts each 8-bit integer element of the second 64-bit integer |
493 | /// vector of [8 x i8] from the corresponding 8-bit integer element of the |
494 | /// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results |
495 | /// are packed into a 64-bit integer vector of [8 x i8]. |
496 | /// |
497 | /// \headerfile <x86intrin.h> |
498 | /// |
499 | /// This intrinsic corresponds to the <c> PSUBB </c> instruction. |
500 | /// |
501 | /// \param __m1 |
502 | /// A 64-bit integer vector of [8 x i8] containing the minuends. |
503 | /// \param __m2 |
504 | /// A 64-bit integer vector of [8 x i8] containing the subtrahends. |
505 | /// \returns A 64-bit integer vector of [8 x i8] containing the differences of |
506 | /// both parameters. |
507 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
508 | _mm_sub_pi8(__m64 __m1, __m64 __m2) |
509 | { |
510 | return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); |
511 | } |
512 | |
513 | /// Subtracts each 16-bit integer element of the second 64-bit integer |
514 | /// vector of [4 x i16] from the corresponding 16-bit integer element of the |
515 | /// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the |
516 | /// results are packed into a 64-bit integer vector of [4 x i16]. |
517 | /// |
518 | /// \headerfile <x86intrin.h> |
519 | /// |
520 | /// This intrinsic corresponds to the <c> PSUBW </c> instruction. |
521 | /// |
522 | /// \param __m1 |
523 | /// A 64-bit integer vector of [4 x i16] containing the minuends. |
524 | /// \param __m2 |
525 | /// A 64-bit integer vector of [4 x i16] containing the subtrahends. |
526 | /// \returns A 64-bit integer vector of [4 x i16] containing the differences of |
527 | /// both parameters. |
528 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
529 | _mm_sub_pi16(__m64 __m1, __m64 __m2) |
530 | { |
531 | return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); |
532 | } |
533 | |
534 | /// Subtracts each 32-bit integer element of the second 64-bit integer |
535 | /// vector of [2 x i32] from the corresponding 32-bit integer element of the |
536 | /// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the |
537 | /// results are packed into a 64-bit integer vector of [2 x i32]. |
538 | /// |
539 | /// \headerfile <x86intrin.h> |
540 | /// |
541 | /// This intrinsic corresponds to the <c> PSUBD </c> instruction. |
542 | /// |
543 | /// \param __m1 |
544 | /// A 64-bit integer vector of [2 x i32] containing the minuends. |
545 | /// \param __m2 |
546 | /// A 64-bit integer vector of [2 x i32] containing the subtrahends. |
547 | /// \returns A 64-bit integer vector of [2 x i32] containing the differences of |
548 | /// both parameters. |
549 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
550 | _mm_sub_pi32(__m64 __m1, __m64 __m2) |
551 | { |
552 | return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); |
553 | } |
554 | |
555 | /// Subtracts each 8-bit signed integer element of the second 64-bit |
556 | /// integer vector of [8 x i8] from the corresponding 8-bit signed integer |
557 | /// element of the first 64-bit integer vector of [8 x i8]. Positive results |
558 | /// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80 |
559 | /// are saturated to 0x80. The results are packed into a 64-bit integer |
560 | /// vector of [8 x i8]. |
561 | /// |
562 | /// \headerfile <x86intrin.h> |
563 | /// |
564 | /// This intrinsic corresponds to the <c> PSUBSB </c> instruction. |
565 | /// |
566 | /// \param __m1 |
567 | /// A 64-bit integer vector of [8 x i8] containing the minuends. |
568 | /// \param __m2 |
569 | /// A 64-bit integer vector of [8 x i8] containing the subtrahends. |
570 | /// \returns A 64-bit integer vector of [8 x i8] containing the saturated |
571 | /// differences of both parameters. |
572 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
573 | _mm_subs_pi8(__m64 __m1, __m64 __m2) |
574 | { |
575 | return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); |
576 | } |
577 | |
578 | /// Subtracts each 16-bit signed integer element of the second 64-bit |
579 | /// integer vector of [4 x i16] from the corresponding 16-bit signed integer |
580 | /// element of the first 64-bit integer vector of [4 x i16]. Positive results |
581 | /// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than |
582 | /// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit |
583 | /// integer vector of [4 x i16]. |
584 | /// |
585 | /// \headerfile <x86intrin.h> |
586 | /// |
587 | /// This intrinsic corresponds to the <c> PSUBSW </c> instruction. |
588 | /// |
589 | /// \param __m1 |
590 | /// A 64-bit integer vector of [4 x i16] containing the minuends. |
591 | /// \param __m2 |
592 | /// A 64-bit integer vector of [4 x i16] containing the subtrahends. |
593 | /// \returns A 64-bit integer vector of [4 x i16] containing the saturated |
594 | /// differences of both parameters. |
595 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
596 | _mm_subs_pi16(__m64 __m1, __m64 __m2) |
597 | { |
598 | return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); |
599 | } |
600 | |
601 | /// Subtracts each 8-bit unsigned integer element of the second 64-bit |
602 | /// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer |
603 | /// element of the first 64-bit integer vector of [8 x i8]. |
604 | /// |
605 | /// If an element of the first vector is less than the corresponding element |
606 | /// of the second vector, the result is saturated to 0. The results are |
607 | /// packed into a 64-bit integer vector of [8 x i8]. |
608 | /// |
609 | /// \headerfile <x86intrin.h> |
610 | /// |
611 | /// This intrinsic corresponds to the <c> PSUBUSB </c> instruction. |
612 | /// |
613 | /// \param __m1 |
614 | /// A 64-bit integer vector of [8 x i8] containing the minuends. |
615 | /// \param __m2 |
616 | /// A 64-bit integer vector of [8 x i8] containing the subtrahends. |
617 | /// \returns A 64-bit integer vector of [8 x i8] containing the saturated |
618 | /// differences of both parameters. |
619 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
620 | _mm_subs_pu8(__m64 __m1, __m64 __m2) |
621 | { |
622 | return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); |
623 | } |
624 | |
625 | /// Subtracts each 16-bit unsigned integer element of the second 64-bit |
626 | /// integer vector of [4 x i16] from the corresponding 16-bit unsigned |
627 | /// integer element of the first 64-bit integer vector of [4 x i16]. |
628 | /// |
629 | /// If an element of the first vector is less than the corresponding element |
630 | /// of the second vector, the result is saturated to 0. The results are |
631 | /// packed into a 64-bit integer vector of [4 x i16]. |
632 | /// |
633 | /// \headerfile <x86intrin.h> |
634 | /// |
635 | /// This intrinsic corresponds to the <c> PSUBUSW </c> instruction. |
636 | /// |
637 | /// \param __m1 |
638 | /// A 64-bit integer vector of [4 x i16] containing the minuends. |
639 | /// \param __m2 |
640 | /// A 64-bit integer vector of [4 x i16] containing the subtrahends. |
641 | /// \returns A 64-bit integer vector of [4 x i16] containing the saturated |
642 | /// differences of both parameters. |
643 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
644 | _mm_subs_pu16(__m64 __m1, __m64 __m2) |
645 | { |
646 | return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); |
647 | } |
648 | |
649 | /// Multiplies each 16-bit signed integer element of the first 64-bit |
650 | /// integer vector of [4 x i16] by the corresponding 16-bit signed integer |
651 | /// element of the second 64-bit integer vector of [4 x i16] and get four |
652 | /// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. |
653 | /// The lower 32 bits of these two sums are packed into a 64-bit integer |
654 | /// vector of [2 x i32]. |
655 | /// |
656 | /// For example, bits [15:0] of both parameters are multiplied, bits [31:16] |
657 | /// of both parameters are multiplied, and the sum of both results is written |
658 | /// to bits [31:0] of the result. |
659 | /// |
660 | /// \headerfile <x86intrin.h> |
661 | /// |
662 | /// This intrinsic corresponds to the <c> PMADDWD </c> instruction. |
663 | /// |
664 | /// \param __m1 |
665 | /// A 64-bit integer vector of [4 x i16]. |
666 | /// \param __m2 |
667 | /// A 64-bit integer vector of [4 x i16]. |
668 | /// \returns A 64-bit integer vector of [2 x i32] containing the sums of |
669 | /// products of both parameters. |
670 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
671 | _mm_madd_pi16(__m64 __m1, __m64 __m2) |
672 | { |
673 | return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); |
674 | } |
675 | |
676 | /// Multiplies each 16-bit signed integer element of the first 64-bit |
677 | /// integer vector of [4 x i16] by the corresponding 16-bit signed integer |
678 | /// element of the second 64-bit integer vector of [4 x i16]. Packs the upper |
679 | /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. |
680 | /// |
681 | /// \headerfile <x86intrin.h> |
682 | /// |
683 | /// This intrinsic corresponds to the <c> PMULHW </c> instruction. |
684 | /// |
685 | /// \param __m1 |
686 | /// A 64-bit integer vector of [4 x i16]. |
687 | /// \param __m2 |
688 | /// A 64-bit integer vector of [4 x i16]. |
689 | /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits |
690 | /// of the products of both parameters. |
691 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
692 | _mm_mulhi_pi16(__m64 __m1, __m64 __m2) |
693 | { |
694 | return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); |
695 | } |
696 | |
697 | /// Multiplies each 16-bit signed integer element of the first 64-bit |
698 | /// integer vector of [4 x i16] by the corresponding 16-bit signed integer |
699 | /// element of the second 64-bit integer vector of [4 x i16]. Packs the lower |
700 | /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. |
701 | /// |
702 | /// \headerfile <x86intrin.h> |
703 | /// |
704 | /// This intrinsic corresponds to the <c> PMULLW </c> instruction. |
705 | /// |
706 | /// \param __m1 |
707 | /// A 64-bit integer vector of [4 x i16]. |
708 | /// \param __m2 |
709 | /// A 64-bit integer vector of [4 x i16]. |
710 | /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits |
711 | /// of the products of both parameters. |
712 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
713 | _mm_mullo_pi16(__m64 __m1, __m64 __m2) |
714 | { |
715 | return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); |
716 | } |
717 | |
718 | /// Left-shifts each 16-bit signed integer element of the first |
719 | /// parameter, which is a 64-bit integer vector of [4 x i16], by the number |
720 | /// of bits specified by the second parameter, which is a 64-bit integer. The |
721 | /// lower 16 bits of the results are packed into a 64-bit integer vector of |
722 | /// [4 x i16]. |
723 | /// |
724 | /// \headerfile <x86intrin.h> |
725 | /// |
726 | /// This intrinsic corresponds to the <c> PSLLW </c> instruction. |
727 | /// |
728 | /// \param __m |
729 | /// A 64-bit integer vector of [4 x i16]. |
730 | /// \param __count |
731 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
732 | /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted |
733 | /// values. If \a __count is greater or equal to 16, the result is set to all |
734 | /// 0. |
735 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
736 | _mm_sll_pi16(__m64 __m, __m64 __count) |
737 | { |
738 | return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); |
739 | } |
740 | |
741 | /// Left-shifts each 16-bit signed integer element of a 64-bit integer |
742 | /// vector of [4 x i16] by the number of bits specified by a 32-bit integer. |
743 | /// The lower 16 bits of the results are packed into a 64-bit integer vector |
744 | /// of [4 x i16]. |
745 | /// |
746 | /// \headerfile <x86intrin.h> |
747 | /// |
748 | /// This intrinsic corresponds to the <c> PSLLW </c> instruction. |
749 | /// |
750 | /// \param __m |
751 | /// A 64-bit integer vector of [4 x i16]. |
752 | /// \param __count |
753 | /// A 32-bit integer value. |
754 | /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted |
755 | /// values. If \a __count is greater or equal to 16, the result is set to all |
756 | /// 0. |
757 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
758 | _mm_slli_pi16(__m64 __m, int __count) |
759 | { |
760 | return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); |
761 | } |
762 | |
763 | /// Left-shifts each 32-bit signed integer element of the first |
764 | /// parameter, which is a 64-bit integer vector of [2 x i32], by the number |
765 | /// of bits specified by the second parameter, which is a 64-bit integer. The |
766 | /// lower 32 bits of the results are packed into a 64-bit integer vector of |
767 | /// [2 x i32]. |
768 | /// |
769 | /// \headerfile <x86intrin.h> |
770 | /// |
771 | /// This intrinsic corresponds to the <c> PSLLD </c> instruction. |
772 | /// |
773 | /// \param __m |
774 | /// A 64-bit integer vector of [2 x i32]. |
775 | /// \param __count |
776 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
777 | /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted |
778 | /// values. If \a __count is greater or equal to 32, the result is set to all |
779 | /// 0. |
780 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
781 | _mm_sll_pi32(__m64 __m, __m64 __count) |
782 | { |
783 | return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); |
784 | } |
785 | |
786 | /// Left-shifts each 32-bit signed integer element of a 64-bit integer |
787 | /// vector of [2 x i32] by the number of bits specified by a 32-bit integer. |
788 | /// The lower 32 bits of the results are packed into a 64-bit integer vector |
789 | /// of [2 x i32]. |
790 | /// |
791 | /// \headerfile <x86intrin.h> |
792 | /// |
793 | /// This intrinsic corresponds to the <c> PSLLD </c> instruction. |
794 | /// |
795 | /// \param __m |
796 | /// A 64-bit integer vector of [2 x i32]. |
797 | /// \param __count |
798 | /// A 32-bit integer value. |
799 | /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted |
800 | /// values. If \a __count is greater or equal to 32, the result is set to all |
801 | /// 0. |
802 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
803 | _mm_slli_pi32(__m64 __m, int __count) |
804 | { |
805 | return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); |
806 | } |
807 | |
808 | /// Left-shifts the first 64-bit integer parameter by the number of bits |
809 | /// specified by the second 64-bit integer parameter. The lower 64 bits of |
810 | /// result are returned. |
811 | /// |
812 | /// \headerfile <x86intrin.h> |
813 | /// |
814 | /// This intrinsic corresponds to the <c> PSLLQ </c> instruction. |
815 | /// |
816 | /// \param __m |
817 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
818 | /// \param __count |
819 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
820 | /// \returns A 64-bit integer vector containing the left-shifted value. If |
821 | /// \a __count is greater or equal to 64, the result is set to 0. |
822 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
823 | _mm_sll_si64(__m64 __m, __m64 __count) |
824 | { |
825 | return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); |
826 | } |
827 | |
828 | /// Left-shifts the first parameter, which is a 64-bit integer, by the |
829 | /// number of bits specified by the second parameter, which is a 32-bit |
830 | /// integer. The lower 64 bits of result are returned. |
831 | /// |
832 | /// \headerfile <x86intrin.h> |
833 | /// |
834 | /// This intrinsic corresponds to the <c> PSLLQ </c> instruction. |
835 | /// |
836 | /// \param __m |
837 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
838 | /// \param __count |
839 | /// A 32-bit integer value. |
840 | /// \returns A 64-bit integer vector containing the left-shifted value. If |
841 | /// \a __count is greater or equal to 64, the result is set to 0. |
842 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
843 | _mm_slli_si64(__m64 __m, int __count) |
844 | { |
845 | return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); |
846 | } |
847 | |
848 | /// Right-shifts each 16-bit integer element of the first parameter, |
849 | /// which is a 64-bit integer vector of [4 x i16], by the number of bits |
850 | /// specified by the second parameter, which is a 64-bit integer. |
851 | /// |
852 | /// High-order bits are filled with the sign bit of the initial value of each |
853 | /// 16-bit element. The 16-bit results are packed into a 64-bit integer |
854 | /// vector of [4 x i16]. |
855 | /// |
856 | /// \headerfile <x86intrin.h> |
857 | /// |
858 | /// This intrinsic corresponds to the <c> PSRAW </c> instruction. |
859 | /// |
860 | /// \param __m |
861 | /// A 64-bit integer vector of [4 x i16]. |
862 | /// \param __count |
863 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
864 | /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted |
865 | /// values. |
866 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
867 | _mm_sra_pi16(__m64 __m, __m64 __count) |
868 | { |
869 | return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); |
870 | } |
871 | |
872 | /// Right-shifts each 16-bit integer element of a 64-bit integer vector |
873 | /// of [4 x i16] by the number of bits specified by a 32-bit integer. |
874 | /// |
875 | /// High-order bits are filled with the sign bit of the initial value of each |
876 | /// 16-bit element. The 16-bit results are packed into a 64-bit integer |
877 | /// vector of [4 x i16]. |
878 | /// |
879 | /// \headerfile <x86intrin.h> |
880 | /// |
881 | /// This intrinsic corresponds to the <c> PSRAW </c> instruction. |
882 | /// |
883 | /// \param __m |
884 | /// A 64-bit integer vector of [4 x i16]. |
885 | /// \param __count |
886 | /// A 32-bit integer value. |
887 | /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted |
888 | /// values. |
889 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
890 | _mm_srai_pi16(__m64 __m, int __count) |
891 | { |
892 | return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); |
893 | } |
894 | |
895 | /// Right-shifts each 32-bit integer element of the first parameter, |
896 | /// which is a 64-bit integer vector of [2 x i32], by the number of bits |
897 | /// specified by the second parameter, which is a 64-bit integer. |
898 | /// |
899 | /// High-order bits are filled with the sign bit of the initial value of each |
900 | /// 32-bit element. The 32-bit results are packed into a 64-bit integer |
901 | /// vector of [2 x i32]. |
902 | /// |
903 | /// \headerfile <x86intrin.h> |
904 | /// |
905 | /// This intrinsic corresponds to the <c> PSRAD </c> instruction. |
906 | /// |
907 | /// \param __m |
908 | /// A 64-bit integer vector of [2 x i32]. |
909 | /// \param __count |
910 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
911 | /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted |
912 | /// values. |
913 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
914 | _mm_sra_pi32(__m64 __m, __m64 __count) |
915 | { |
916 | return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); |
917 | } |
918 | |
919 | /// Right-shifts each 32-bit integer element of a 64-bit integer vector |
920 | /// of [2 x i32] by the number of bits specified by a 32-bit integer. |
921 | /// |
922 | /// High-order bits are filled with the sign bit of the initial value of each |
923 | /// 32-bit element. The 32-bit results are packed into a 64-bit integer |
924 | /// vector of [2 x i32]. |
925 | /// |
926 | /// \headerfile <x86intrin.h> |
927 | /// |
928 | /// This intrinsic corresponds to the <c> PSRAD </c> instruction. |
929 | /// |
930 | /// \param __m |
931 | /// A 64-bit integer vector of [2 x i32]. |
932 | /// \param __count |
933 | /// A 32-bit integer value. |
934 | /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted |
935 | /// values. |
936 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
937 | _mm_srai_pi32(__m64 __m, int __count) |
938 | { |
939 | return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); |
940 | } |
941 | |
942 | /// Right-shifts each 16-bit integer element of the first parameter, |
943 | /// which is a 64-bit integer vector of [4 x i16], by the number of bits |
944 | /// specified by the second parameter, which is a 64-bit integer. |
945 | /// |
946 | /// High-order bits are cleared. The 16-bit results are packed into a 64-bit |
947 | /// integer vector of [4 x i16]. |
948 | /// |
949 | /// \headerfile <x86intrin.h> |
950 | /// |
951 | /// This intrinsic corresponds to the <c> PSRLW </c> instruction. |
952 | /// |
953 | /// \param __m |
954 | /// A 64-bit integer vector of [4 x i16]. |
955 | /// \param __count |
956 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
957 | /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted |
958 | /// values. |
959 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
960 | _mm_srl_pi16(__m64 __m, __m64 __count) |
961 | { |
962 | return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); |
963 | } |
964 | |
965 | /// Right-shifts each 16-bit integer element of a 64-bit integer vector |
966 | /// of [4 x i16] by the number of bits specified by a 32-bit integer. |
967 | /// |
968 | /// High-order bits are cleared. The 16-bit results are packed into a 64-bit |
969 | /// integer vector of [4 x i16]. |
970 | /// |
971 | /// \headerfile <x86intrin.h> |
972 | /// |
973 | /// This intrinsic corresponds to the <c> PSRLW </c> instruction. |
974 | /// |
975 | /// \param __m |
976 | /// A 64-bit integer vector of [4 x i16]. |
977 | /// \param __count |
978 | /// A 32-bit integer value. |
979 | /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted |
980 | /// values. |
981 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
982 | _mm_srli_pi16(__m64 __m, int __count) |
983 | { |
984 | return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); |
985 | } |
986 | |
987 | /// Right-shifts each 32-bit integer element of the first parameter, |
988 | /// which is a 64-bit integer vector of [2 x i32], by the number of bits |
989 | /// specified by the second parameter, which is a 64-bit integer. |
990 | /// |
991 | /// High-order bits are cleared. The 32-bit results are packed into a 64-bit |
992 | /// integer vector of [2 x i32]. |
993 | /// |
994 | /// \headerfile <x86intrin.h> |
995 | /// |
996 | /// This intrinsic corresponds to the <c> PSRLD </c> instruction. |
997 | /// |
998 | /// \param __m |
999 | /// A 64-bit integer vector of [2 x i32]. |
1000 | /// \param __count |
1001 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
1002 | /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted |
1003 | /// values. |
1004 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1005 | _mm_srl_pi32(__m64 __m, __m64 __count) |
1006 | { |
1007 | return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); |
1008 | } |
1009 | |
1010 | /// Right-shifts each 32-bit integer element of a 64-bit integer vector |
1011 | /// of [2 x i32] by the number of bits specified by a 32-bit integer. |
1012 | /// |
1013 | /// High-order bits are cleared. The 32-bit results are packed into a 64-bit |
1014 | /// integer vector of [2 x i32]. |
1015 | /// |
1016 | /// \headerfile <x86intrin.h> |
1017 | /// |
1018 | /// This intrinsic corresponds to the <c> PSRLD </c> instruction. |
1019 | /// |
1020 | /// \param __m |
1021 | /// A 64-bit integer vector of [2 x i32]. |
1022 | /// \param __count |
1023 | /// A 32-bit integer value. |
1024 | /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted |
1025 | /// values. |
1026 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1027 | _mm_srli_pi32(__m64 __m, int __count) |
1028 | { |
1029 | return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); |
1030 | } |
1031 | |
1032 | /// Right-shifts the first 64-bit integer parameter by the number of bits |
1033 | /// specified by the second 64-bit integer parameter. |
1034 | /// |
1035 | /// High-order bits are cleared. |
1036 | /// |
1037 | /// \headerfile <x86intrin.h> |
1038 | /// |
1039 | /// This intrinsic corresponds to the <c> PSRLQ </c> instruction. |
1040 | /// |
1041 | /// \param __m |
1042 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
1043 | /// \param __count |
1044 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
1045 | /// \returns A 64-bit integer vector containing the right-shifted value. |
1046 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1047 | _mm_srl_si64(__m64 __m, __m64 __count) |
1048 | { |
1049 | return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); |
1050 | } |
1051 | |
1052 | /// Right-shifts the first parameter, which is a 64-bit integer, by the |
1053 | /// number of bits specified by the second parameter, which is a 32-bit |
1054 | /// integer. |
1055 | /// |
1056 | /// High-order bits are cleared. |
1057 | /// |
1058 | /// \headerfile <x86intrin.h> |
1059 | /// |
1060 | /// This intrinsic corresponds to the <c> PSRLQ </c> instruction. |
1061 | /// |
1062 | /// \param __m |
1063 | /// A 64-bit integer vector interpreted as a single 64-bit integer. |
1064 | /// \param __count |
1065 | /// A 32-bit integer value. |
1066 | /// \returns A 64-bit integer vector containing the right-shifted value. |
1067 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1068 | _mm_srli_si64(__m64 __m, int __count) |
1069 | { |
1070 | return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); |
1071 | } |
1072 | |
1073 | /// Performs a bitwise AND of two 64-bit integer vectors. |
1074 | /// |
1075 | /// \headerfile <x86intrin.h> |
1076 | /// |
1077 | /// This intrinsic corresponds to the <c> PAND </c> instruction. |
1078 | /// |
1079 | /// \param __m1 |
1080 | /// A 64-bit integer vector. |
1081 | /// \param __m2 |
1082 | /// A 64-bit integer vector. |
1083 | /// \returns A 64-bit integer vector containing the bitwise AND of both |
1084 | /// parameters. |
1085 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1086 | _mm_and_si64(__m64 __m1, __m64 __m2) |
1087 | { |
1088 | return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); |
1089 | } |
1090 | |
1091 | /// Performs a bitwise NOT of the first 64-bit integer vector, and then |
1092 | /// performs a bitwise AND of the intermediate result and the second 64-bit |
1093 | /// integer vector. |
1094 | /// |
1095 | /// \headerfile <x86intrin.h> |
1096 | /// |
1097 | /// This intrinsic corresponds to the <c> PANDN </c> instruction. |
1098 | /// |
1099 | /// \param __m1 |
1100 | /// A 64-bit integer vector. The one's complement of this parameter is used |
1101 | /// in the bitwise AND. |
1102 | /// \param __m2 |
1103 | /// A 64-bit integer vector. |
1104 | /// \returns A 64-bit integer vector containing the bitwise AND of the second |
1105 | /// parameter and the one's complement of the first parameter. |
1106 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1107 | _mm_andnot_si64(__m64 __m1, __m64 __m2) |
1108 | { |
1109 | return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); |
1110 | } |
1111 | |
1112 | /// Performs a bitwise OR of two 64-bit integer vectors. |
1113 | /// |
1114 | /// \headerfile <x86intrin.h> |
1115 | /// |
1116 | /// This intrinsic corresponds to the <c> POR </c> instruction. |
1117 | /// |
1118 | /// \param __m1 |
1119 | /// A 64-bit integer vector. |
1120 | /// \param __m2 |
1121 | /// A 64-bit integer vector. |
1122 | /// \returns A 64-bit integer vector containing the bitwise OR of both |
1123 | /// parameters. |
1124 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1125 | _mm_or_si64(__m64 __m1, __m64 __m2) |
1126 | { |
1127 | return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); |
1128 | } |
1129 | |
1130 | /// Performs a bitwise exclusive OR of two 64-bit integer vectors. |
1131 | /// |
1132 | /// \headerfile <x86intrin.h> |
1133 | /// |
1134 | /// This intrinsic corresponds to the <c> PXOR </c> instruction. |
1135 | /// |
1136 | /// \param __m1 |
1137 | /// A 64-bit integer vector. |
1138 | /// \param __m2 |
1139 | /// A 64-bit integer vector. |
1140 | /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both |
1141 | /// parameters. |
1142 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1143 | _mm_xor_si64(__m64 __m1, __m64 __m2) |
1144 | { |
1145 | return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); |
1146 | } |
1147 | |
1148 | /// Compares the 8-bit integer elements of two 64-bit integer vectors of |
1149 | /// [8 x i8] to determine if the element of the first vector is equal to the |
1150 | /// corresponding element of the second vector. |
1151 | /// |
1152 | /// The comparison yields 0 for false, 0xFF for true. |
1153 | /// |
1154 | /// \headerfile <x86intrin.h> |
1155 | /// |
1156 | /// This intrinsic corresponds to the <c> PCMPEQB </c> instruction. |
1157 | /// |
1158 | /// \param __m1 |
1159 | /// A 64-bit integer vector of [8 x i8]. |
1160 | /// \param __m2 |
1161 | /// A 64-bit integer vector of [8 x i8]. |
1162 | /// \returns A 64-bit integer vector of [8 x i8] containing the comparison |
1163 | /// results. |
1164 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1165 | _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) |
1166 | { |
1167 | return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); |
1168 | } |
1169 | |
1170 | /// Compares the 16-bit integer elements of two 64-bit integer vectors of |
1171 | /// [4 x i16] to determine if the element of the first vector is equal to the |
1172 | /// corresponding element of the second vector. |
1173 | /// |
1174 | /// The comparison yields 0 for false, 0xFFFF for true. |
1175 | /// |
1176 | /// \headerfile <x86intrin.h> |
1177 | /// |
1178 | /// This intrinsic corresponds to the <c> PCMPEQW </c> instruction. |
1179 | /// |
1180 | /// \param __m1 |
1181 | /// A 64-bit integer vector of [4 x i16]. |
1182 | /// \param __m2 |
1183 | /// A 64-bit integer vector of [4 x i16]. |
1184 | /// \returns A 64-bit integer vector of [4 x i16] containing the comparison |
1185 | /// results. |
1186 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1187 | _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) |
1188 | { |
1189 | return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); |
1190 | } |
1191 | |
1192 | /// Compares the 32-bit integer elements of two 64-bit integer vectors of |
1193 | /// [2 x i32] to determine if the element of the first vector is equal to the |
1194 | /// corresponding element of the second vector. |
1195 | /// |
1196 | /// The comparison yields 0 for false, 0xFFFFFFFF for true. |
1197 | /// |
1198 | /// \headerfile <x86intrin.h> |
1199 | /// |
1200 | /// This intrinsic corresponds to the <c> PCMPEQD </c> instruction. |
1201 | /// |
1202 | /// \param __m1 |
1203 | /// A 64-bit integer vector of [2 x i32]. |
1204 | /// \param __m2 |
1205 | /// A 64-bit integer vector of [2 x i32]. |
1206 | /// \returns A 64-bit integer vector of [2 x i32] containing the comparison |
1207 | /// results. |
1208 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1209 | _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) |
1210 | { |
1211 | return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); |
1212 | } |
1213 | |
1214 | /// Compares the 8-bit integer elements of two 64-bit integer vectors of |
1215 | /// [8 x i8] to determine if the element of the first vector is greater than |
1216 | /// the corresponding element of the second vector. |
1217 | /// |
1218 | /// The comparison yields 0 for false, 0xFF for true. |
1219 | /// |
1220 | /// \headerfile <x86intrin.h> |
1221 | /// |
1222 | /// This intrinsic corresponds to the <c> PCMPGTB </c> instruction. |
1223 | /// |
1224 | /// \param __m1 |
1225 | /// A 64-bit integer vector of [8 x i8]. |
1226 | /// \param __m2 |
1227 | /// A 64-bit integer vector of [8 x i8]. |
1228 | /// \returns A 64-bit integer vector of [8 x i8] containing the comparison |
1229 | /// results. |
1230 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1231 | _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) |
1232 | { |
1233 | return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); |
1234 | } |
1235 | |
1236 | /// Compares the 16-bit integer elements of two 64-bit integer vectors of |
1237 | /// [4 x i16] to determine if the element of the first vector is greater than |
1238 | /// the corresponding element of the second vector. |
1239 | /// |
1240 | /// The comparison yields 0 for false, 0xFFFF for true. |
1241 | /// |
1242 | /// \headerfile <x86intrin.h> |
1243 | /// |
1244 | /// This intrinsic corresponds to the <c> PCMPGTW </c> instruction. |
1245 | /// |
1246 | /// \param __m1 |
1247 | /// A 64-bit integer vector of [4 x i16]. |
1248 | /// \param __m2 |
1249 | /// A 64-bit integer vector of [4 x i16]. |
1250 | /// \returns A 64-bit integer vector of [4 x i16] containing the comparison |
1251 | /// results. |
1252 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1253 | _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) |
1254 | { |
1255 | return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); |
1256 | } |
1257 | |
1258 | /// Compares the 32-bit integer elements of two 64-bit integer vectors of |
1259 | /// [2 x i32] to determine if the element of the first vector is greater than |
1260 | /// the corresponding element of the second vector. |
1261 | /// |
1262 | /// The comparison yields 0 for false, 0xFFFFFFFF for true. |
1263 | /// |
1264 | /// \headerfile <x86intrin.h> |
1265 | /// |
1266 | /// This intrinsic corresponds to the <c> PCMPGTD </c> instruction. |
1267 | /// |
1268 | /// \param __m1 |
1269 | /// A 64-bit integer vector of [2 x i32]. |
1270 | /// \param __m2 |
1271 | /// A 64-bit integer vector of [2 x i32]. |
1272 | /// \returns A 64-bit integer vector of [2 x i32] containing the comparison |
1273 | /// results. |
1274 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1275 | _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) |
1276 | { |
1277 | return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); |
1278 | } |
1279 | |
1280 | /// Constructs a 64-bit integer vector initialized to zero. |
1281 | /// |
1282 | /// \headerfile <x86intrin.h> |
1283 | /// |
1284 | /// This intrinsic corresponds to the <c> PXOR </c> instruction. |
1285 | /// |
1286 | /// \returns An initialized 64-bit integer vector with all elements set to zero. |
1287 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1288 | _mm_setzero_si64(void) |
1289 | { |
1290 | return __extension__ (__m64){ 0LL }; |
1291 | } |
1292 | |
1293 | /// Constructs a 64-bit integer vector initialized with the specified |
1294 | /// 32-bit integer values. |
1295 | /// |
1296 | /// \headerfile <x86intrin.h> |
1297 | /// |
1298 | /// This intrinsic is a utility function and does not correspond to a specific |
1299 | /// instruction. |
1300 | /// |
1301 | /// \param __i1 |
1302 | /// A 32-bit integer value used to initialize the upper 32 bits of the |
1303 | /// result. |
1304 | /// \param __i0 |
1305 | /// A 32-bit integer value used to initialize the lower 32 bits of the |
1306 | /// result. |
1307 | /// \returns An initialized 64-bit integer vector. |
1308 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1309 | _mm_set_pi32(int __i1, int __i0) |
1310 | { |
1311 | return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); |
1312 | } |
1313 | |
1314 | /// Constructs a 64-bit integer vector initialized with the specified |
1315 | /// 16-bit integer values. |
1316 | /// |
1317 | /// \headerfile <x86intrin.h> |
1318 | /// |
1319 | /// This intrinsic is a utility function and does not correspond to a specific |
1320 | /// instruction. |
1321 | /// |
1322 | /// \param __s3 |
1323 | /// A 16-bit integer value used to initialize bits [63:48] of the result. |
1324 | /// \param __s2 |
1325 | /// A 16-bit integer value used to initialize bits [47:32] of the result. |
1326 | /// \param __s1 |
1327 | /// A 16-bit integer value used to initialize bits [31:16] of the result. |
1328 | /// \param __s0 |
1329 | /// A 16-bit integer value used to initialize bits [15:0] of the result. |
1330 | /// \returns An initialized 64-bit integer vector. |
1331 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1332 | _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) |
1333 | { |
1334 | return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); |
1335 | } |
1336 | |
1337 | /// Constructs a 64-bit integer vector initialized with the specified |
1338 | /// 8-bit integer values. |
1339 | /// |
1340 | /// \headerfile <x86intrin.h> |
1341 | /// |
1342 | /// This intrinsic is a utility function and does not correspond to a specific |
1343 | /// instruction. |
1344 | /// |
1345 | /// \param __b7 |
1346 | /// An 8-bit integer value used to initialize bits [63:56] of the result. |
1347 | /// \param __b6 |
1348 | /// An 8-bit integer value used to initialize bits [55:48] of the result. |
1349 | /// \param __b5 |
1350 | /// An 8-bit integer value used to initialize bits [47:40] of the result. |
1351 | /// \param __b4 |
1352 | /// An 8-bit integer value used to initialize bits [39:32] of the result. |
1353 | /// \param __b3 |
1354 | /// An 8-bit integer value used to initialize bits [31:24] of the result. |
1355 | /// \param __b2 |
1356 | /// An 8-bit integer value used to initialize bits [23:16] of the result. |
1357 | /// \param __b1 |
1358 | /// An 8-bit integer value used to initialize bits [15:8] of the result. |
1359 | /// \param __b0 |
1360 | /// An 8-bit integer value used to initialize bits [7:0] of the result. |
1361 | /// \returns An initialized 64-bit integer vector. |
1362 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1363 | _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, |
1364 | char __b1, char __b0) |
1365 | { |
1366 | return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, |
1367 | __b4, __b5, __b6, __b7); |
1368 | } |
1369 | |
1370 | /// Constructs a 64-bit integer vector of [2 x i32], with each of the |
1371 | /// 32-bit integer vector elements set to the specified 32-bit integer |
1372 | /// value. |
1373 | /// |
1374 | /// \headerfile <x86intrin.h> |
1375 | /// |
1376 | /// This intrinsic is a utility function and does not correspond to a specific |
1377 | /// instruction. |
1378 | /// |
1379 | /// \param __i |
1380 | /// A 32-bit integer value used to initialize each vector element of the |
1381 | /// result. |
1382 | /// \returns An initialized 64-bit integer vector of [2 x i32]. |
1383 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1384 | _mm_set1_pi32(int __i) |
1385 | { |
1386 | return _mm_set_pi32(__i, __i); |
1387 | } |
1388 | |
1389 | /// Constructs a 64-bit integer vector of [4 x i16], with each of the |
1390 | /// 16-bit integer vector elements set to the specified 16-bit integer |
1391 | /// value. |
1392 | /// |
1393 | /// \headerfile <x86intrin.h> |
1394 | /// |
1395 | /// This intrinsic is a utility function and does not correspond to a specific |
1396 | /// instruction. |
1397 | /// |
1398 | /// \param __w |
1399 | /// A 16-bit integer value used to initialize each vector element of the |
1400 | /// result. |
1401 | /// \returns An initialized 64-bit integer vector of [4 x i16]. |
1402 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1403 | _mm_set1_pi16(short __w) |
1404 | { |
1405 | return _mm_set_pi16(__w, __w, __w, __w); |
1406 | } |
1407 | |
1408 | /// Constructs a 64-bit integer vector of [8 x i8], with each of the |
1409 | /// 8-bit integer vector elements set to the specified 8-bit integer value. |
1410 | /// |
1411 | /// \headerfile <x86intrin.h> |
1412 | /// |
1413 | /// This intrinsic is a utility function and does not correspond to a specific |
1414 | /// instruction. |
1415 | /// |
1416 | /// \param __b |
1417 | /// An 8-bit integer value used to initialize each vector element of the |
1418 | /// result. |
1419 | /// \returns An initialized 64-bit integer vector of [8 x i8]. |
1420 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1421 | _mm_set1_pi8(char __b) |
1422 | { |
1423 | return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); |
1424 | } |
1425 | |
1426 | /// Constructs a 64-bit integer vector, initialized in reverse order with |
1427 | /// the specified 32-bit integer values. |
1428 | /// |
1429 | /// \headerfile <x86intrin.h> |
1430 | /// |
1431 | /// This intrinsic is a utility function and does not correspond to a specific |
1432 | /// instruction. |
1433 | /// |
1434 | /// \param __i0 |
1435 | /// A 32-bit integer value used to initialize the lower 32 bits of the |
1436 | /// result. |
1437 | /// \param __i1 |
1438 | /// A 32-bit integer value used to initialize the upper 32 bits of the |
1439 | /// result. |
1440 | /// \returns An initialized 64-bit integer vector. |
1441 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1442 | _mm_setr_pi32(int __i0, int __i1) |
1443 | { |
1444 | return _mm_set_pi32(__i1, __i0); |
1445 | } |
1446 | |
1447 | /// Constructs a 64-bit integer vector, initialized in reverse order with |
1448 | /// the specified 16-bit integer values. |
1449 | /// |
1450 | /// \headerfile <x86intrin.h> |
1451 | /// |
1452 | /// This intrinsic is a utility function and does not correspond to a specific |
1453 | /// instruction. |
1454 | /// |
1455 | /// \param __w0 |
1456 | /// A 16-bit integer value used to initialize bits [15:0] of the result. |
1457 | /// \param __w1 |
1458 | /// A 16-bit integer value used to initialize bits [31:16] of the result. |
1459 | /// \param __w2 |
1460 | /// A 16-bit integer value used to initialize bits [47:32] of the result. |
1461 | /// \param __w3 |
1462 | /// A 16-bit integer value used to initialize bits [63:48] of the result. |
1463 | /// \returns An initialized 64-bit integer vector. |
1464 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1465 | _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) |
1466 | { |
1467 | return _mm_set_pi16(__w3, __w2, __w1, __w0); |
1468 | } |
1469 | |
1470 | /// Constructs a 64-bit integer vector, initialized in reverse order with |
1471 | /// the specified 8-bit integer values. |
1472 | /// |
1473 | /// \headerfile <x86intrin.h> |
1474 | /// |
1475 | /// This intrinsic is a utility function and does not correspond to a specific |
1476 | /// instruction. |
1477 | /// |
1478 | /// \param __b0 |
1479 | /// An 8-bit integer value used to initialize bits [7:0] of the result. |
1480 | /// \param __b1 |
1481 | /// An 8-bit integer value used to initialize bits [15:8] of the result. |
1482 | /// \param __b2 |
1483 | /// An 8-bit integer value used to initialize bits [23:16] of the result. |
1484 | /// \param __b3 |
1485 | /// An 8-bit integer value used to initialize bits [31:24] of the result. |
1486 | /// \param __b4 |
1487 | /// An 8-bit integer value used to initialize bits [39:32] of the result. |
1488 | /// \param __b5 |
1489 | /// An 8-bit integer value used to initialize bits [47:40] of the result. |
1490 | /// \param __b6 |
1491 | /// An 8-bit integer value used to initialize bits [55:48] of the result. |
1492 | /// \param __b7 |
1493 | /// An 8-bit integer value used to initialize bits [63:56] of the result. |
1494 | /// \returns An initialized 64-bit integer vector. |
1495 | static __inline__ __m64 __DEFAULT_FN_ATTRS |
1496 | _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, |
1497 | char __b6, char __b7) |
1498 | { |
1499 | return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); |
1500 | } |
1501 | |
1502 | #undef __DEFAULT_FN_ATTRS |
1503 | |
1504 | /* Aliases for compatibility. */ |
1505 | #define _m_empty _mm_empty |
1506 | #define _m_from_int _mm_cvtsi32_si64 |
1507 | #define _m_from_int64 _mm_cvtsi64_m64 |
1508 | #define _m_to_int _mm_cvtsi64_si32 |
1509 | #define _m_to_int64 _mm_cvtm64_si64 |
1510 | #define _m_packsswb _mm_packs_pi16 |
1511 | #define _m_packssdw _mm_packs_pi32 |
1512 | #define _m_packuswb _mm_packs_pu16 |
1513 | #define _m_punpckhbw _mm_unpackhi_pi8 |
1514 | #define _m_punpckhwd _mm_unpackhi_pi16 |
1515 | #define _m_punpckhdq _mm_unpackhi_pi32 |
1516 | #define _m_punpcklbw _mm_unpacklo_pi8 |
1517 | #define _m_punpcklwd _mm_unpacklo_pi16 |
1518 | #define _m_punpckldq _mm_unpacklo_pi32 |
1519 | #define _m_paddb _mm_add_pi8 |
1520 | #define _m_paddw _mm_add_pi16 |
1521 | #define _m_paddd _mm_add_pi32 |
1522 | #define _m_paddsb _mm_adds_pi8 |
1523 | #define _m_paddsw _mm_adds_pi16 |
1524 | #define _m_paddusb _mm_adds_pu8 |
1525 | #define _m_paddusw _mm_adds_pu16 |
1526 | #define _m_psubb _mm_sub_pi8 |
1527 | #define _m_psubw _mm_sub_pi16 |
1528 | #define _m_psubd _mm_sub_pi32 |
1529 | #define _m_psubsb _mm_subs_pi8 |
1530 | #define _m_psubsw _mm_subs_pi16 |
1531 | #define _m_psubusb _mm_subs_pu8 |
1532 | #define _m_psubusw _mm_subs_pu16 |
1533 | #define _m_pmaddwd _mm_madd_pi16 |
1534 | #define _m_pmulhw _mm_mulhi_pi16 |
1535 | #define _m_pmullw _mm_mullo_pi16 |
1536 | #define _m_psllw _mm_sll_pi16 |
1537 | #define _m_psllwi _mm_slli_pi16 |
1538 | #define _m_pslld _mm_sll_pi32 |
1539 | #define _m_pslldi _mm_slli_pi32 |
1540 | #define _m_psllq _mm_sll_si64 |
1541 | #define _m_psllqi _mm_slli_si64 |
1542 | #define _m_psraw _mm_sra_pi16 |
1543 | #define _m_psrawi _mm_srai_pi16 |
1544 | #define _m_psrad _mm_sra_pi32 |
1545 | #define _m_psradi _mm_srai_pi32 |
1546 | #define _m_psrlw _mm_srl_pi16 |
1547 | #define _m_psrlwi _mm_srli_pi16 |
1548 | #define _m_psrld _mm_srl_pi32 |
1549 | #define _m_psrldi _mm_srli_pi32 |
1550 | #define _m_psrlq _mm_srl_si64 |
1551 | #define _m_psrlqi _mm_srli_si64 |
1552 | #define _m_pand _mm_and_si64 |
1553 | #define _m_pandn _mm_andnot_si64 |
1554 | #define _m_por _mm_or_si64 |
1555 | #define _m_pxor _mm_xor_si64 |
1556 | #define _m_pcmpeqb _mm_cmpeq_pi8 |
1557 | #define _m_pcmpeqw _mm_cmpeq_pi16 |
1558 | #define _m_pcmpeqd _mm_cmpeq_pi32 |
1559 | #define _m_pcmpgtb _mm_cmpgt_pi8 |
1560 | #define _m_pcmpgtw _mm_cmpgt_pi16 |
1561 | #define _m_pcmpgtd _mm_cmpgt_pi32 |
1562 | |
1563 | #endif /* __MMINTRIN_H */ |
1564 | |
1565 |
Warning: This file is not a C or C++ file. It does not have highlighting.