Warning: This file is not a C or C++ file. It does not have highlighting.

1/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __MMINTRIN_H
11#define __MMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8)));
18
19typedef long long __v1di __attribute__((__vector_size__(8)));
20typedef int __v2si __attribute__((__vector_size__(8)));
21typedef short __v4hi __attribute__((__vector_size__(8)));
22typedef char __v8qi __attribute__((__vector_size__(8)));
23
24/* Define the default attributes for the functions in this file. */
25#define __DEFAULT_FN_ATTRS \
26 __attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \
27 __min_vector_width__(64)))
28
29/// Clears the MMX state by setting the state of the x87 stack registers
30/// to empty.
31///
32/// \headerfile <x86intrin.h>
33///
34/// This intrinsic corresponds to the <c> EMMS </c> instruction.
35///
36static __inline__ void __attribute__((__always_inline__, __nodebug__,
37 __target__("mmx,no-evex512")))
38_mm_empty(void) {
39 __builtin_ia32_emms();
40}
41
42/// Constructs a 64-bit integer vector, setting the lower 32 bits to the
43/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.
44///
45/// \headerfile <x86intrin.h>
46///
47/// This intrinsic corresponds to the <c> MOVD </c> instruction.
48///
49/// \param __i
50/// A 32-bit integer value.
51/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
52/// parameter. The upper 32 bits are set to 0.
53static __inline__ __m64 __DEFAULT_FN_ATTRS
54_mm_cvtsi32_si64(int __i)
55{
56 return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
57}
58
59/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
60/// signed integer.
61///
62/// \headerfile <x86intrin.h>
63///
64/// This intrinsic corresponds to the <c> MOVD </c> instruction.
65///
66/// \param __m
67/// A 64-bit integer vector.
68/// \returns A 32-bit signed integer value containing the lower 32 bits of the
69/// parameter.
70static __inline__ int __DEFAULT_FN_ATTRS
71_mm_cvtsi64_si32(__m64 __m)
72{
73 return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
74}
75
76/// Casts a 64-bit signed integer value into a 64-bit integer vector.
77///
78/// \headerfile <x86intrin.h>
79///
80/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
81///
82/// \param __i
83/// A 64-bit signed integer.
84/// \returns A 64-bit integer vector containing the same bitwise pattern as the
85/// parameter.
86static __inline__ __m64 __DEFAULT_FN_ATTRS
87_mm_cvtsi64_m64(long long __i)
88{
89 return (__m64)__i;
90}
91
92/// Casts a 64-bit integer vector into a 64-bit signed integer value.
93///
94/// \headerfile <x86intrin.h>
95///
96/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
97///
98/// \param __m
99/// A 64-bit integer vector.
100/// \returns A 64-bit signed integer containing the same bitwise pattern as the
101/// parameter.
102static __inline__ long long __DEFAULT_FN_ATTRS
103_mm_cvtm64_si64(__m64 __m)
104{
105 return (long long)__m;
106}
107
108/// Converts 16-bit signed integers from both 64-bit integer vector
109/// parameters of [4 x i16] into 8-bit signed integer values, and constructs
110/// a 64-bit integer vector of [8 x i8] as the result. Positive values
111/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
112/// are saturated to 0x80.
113///
114/// \headerfile <x86intrin.h>
115///
116/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
117///
118/// \param __m1
119/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
120/// 16-bit signed integer and is converted to an 8-bit signed integer with
121/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
122/// Negative values less than 0x80 are saturated to 0x80. The converted
123/// [4 x i8] values are written to the lower 32 bits of the result.
124/// \param __m2
125/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
126/// 16-bit signed integer and is converted to an 8-bit signed integer with
127/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
128/// Negative values less than 0x80 are saturated to 0x80. The converted
129/// [4 x i8] values are written to the upper 32 bits of the result.
130/// \returns A 64-bit integer vector of [8 x i8] containing the converted
131/// values.
132static __inline__ __m64 __DEFAULT_FN_ATTRS
133_mm_packs_pi16(__m64 __m1, __m64 __m2)
134{
135 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
136}
137
138/// Converts 32-bit signed integers from both 64-bit integer vector
139/// parameters of [2 x i32] into 16-bit signed integer values, and constructs
140/// a 64-bit integer vector of [4 x i16] as the result. Positive values
141/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
142/// 0x8000 are saturated to 0x8000.
143///
144/// \headerfile <x86intrin.h>
145///
146/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
147///
148/// \param __m1
149/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
150/// 32-bit signed integer and is converted to a 16-bit signed integer with
151/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
152/// Negative values less than 0x8000 are saturated to 0x8000. The converted
153/// [2 x i16] values are written to the lower 32 bits of the result.
154/// \param __m2
155/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
156/// 32-bit signed integer and is converted to a 16-bit signed integer with
157/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
158/// Negative values less than 0x8000 are saturated to 0x8000. The converted
159/// [2 x i16] values are written to the upper 32 bits of the result.
160/// \returns A 64-bit integer vector of [4 x i16] containing the converted
161/// values.
162static __inline__ __m64 __DEFAULT_FN_ATTRS
163_mm_packs_pi32(__m64 __m1, __m64 __m2)
164{
165 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
166}
167
168/// Converts 16-bit signed integers from both 64-bit integer vector
169/// parameters of [4 x i16] into 8-bit unsigned integer values, and
170/// constructs a 64-bit integer vector of [8 x i8] as the result. Values
171/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
172/// to 0.
173///
174/// \headerfile <x86intrin.h>
175///
176/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
177///
178/// \param __m1
179/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
180/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
181/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
182/// than 0 are saturated to 0. The converted [4 x i8] values are written to
183/// the lower 32 bits of the result.
184/// \param __m2
185/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
186/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
187/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
188/// than 0 are saturated to 0. The converted [4 x i8] values are written to
189/// the upper 32 bits of the result.
190/// \returns A 64-bit integer vector of [8 x i8] containing the converted
191/// values.
192static __inline__ __m64 __DEFAULT_FN_ATTRS
193_mm_packs_pu16(__m64 __m1, __m64 __m2)
194{
195 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
196}
197
198/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
199/// and interleaves them into a 64-bit integer vector of [8 x i8].
200///
201/// \headerfile <x86intrin.h>
202///
203/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
204///
205/// \param __m1
206/// A 64-bit integer vector of [8 x i8]. \n
207/// Bits [39:32] are written to bits [7:0] of the result. \n
208/// Bits [47:40] are written to bits [23:16] of the result. \n
209/// Bits [55:48] are written to bits [39:32] of the result. \n
210/// Bits [63:56] are written to bits [55:48] of the result.
211/// \param __m2
212/// A 64-bit integer vector of [8 x i8].
213/// Bits [39:32] are written to bits [15:8] of the result. \n
214/// Bits [47:40] are written to bits [31:24] of the result. \n
215/// Bits [55:48] are written to bits [47:40] of the result. \n
216/// Bits [63:56] are written to bits [63:56] of the result.
217/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
218/// values.
219static __inline__ __m64 __DEFAULT_FN_ATTRS
220_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
221{
222 return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
223}
224
225/// Unpacks the upper 32 bits from two 64-bit integer vectors of
226/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
227///
228/// \headerfile <x86intrin.h>
229///
230/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.
231///
232/// \param __m1
233/// A 64-bit integer vector of [4 x i16].
234/// Bits [47:32] are written to bits [15:0] of the result. \n
235/// Bits [63:48] are written to bits [47:32] of the result.
236/// \param __m2
237/// A 64-bit integer vector of [4 x i16].
238/// Bits [47:32] are written to bits [31:16] of the result. \n
239/// Bits [63:48] are written to bits [63:48] of the result.
240/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
241/// values.
242static __inline__ __m64 __DEFAULT_FN_ATTRS
243_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
244{
245 return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
246}
247
248/// Unpacks the upper 32 bits from two 64-bit integer vectors of
249/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
250///
251/// \headerfile <x86intrin.h>
252///
253/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.
254///
255/// \param __m1
256/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
257/// the lower 32 bits of the result.
258/// \param __m2
259/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
260/// the upper 32 bits of the result.
261/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
262/// values.
263static __inline__ __m64 __DEFAULT_FN_ATTRS
264_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
265{
266 return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
267}
268
269/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
270/// and interleaves them into a 64-bit integer vector of [8 x i8].
271///
272/// \headerfile <x86intrin.h>
273///
274/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.
275///
276/// \param __m1
277/// A 64-bit integer vector of [8 x i8].
278/// Bits [7:0] are written to bits [7:0] of the result. \n
279/// Bits [15:8] are written to bits [23:16] of the result. \n
280/// Bits [23:16] are written to bits [39:32] of the result. \n
281/// Bits [31:24] are written to bits [55:48] of the result.
282/// \param __m2
283/// A 64-bit integer vector of [8 x i8].
284/// Bits [7:0] are written to bits [15:8] of the result. \n
285/// Bits [15:8] are written to bits [31:24] of the result. \n
286/// Bits [23:16] are written to bits [47:40] of the result. \n
287/// Bits [31:24] are written to bits [63:56] of the result.
288/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
289/// values.
290static __inline__ __m64 __DEFAULT_FN_ATTRS
291_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
292{
293 return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
294}
295
296/// Unpacks the lower 32 bits from two 64-bit integer vectors of
297/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
298///
299/// \headerfile <x86intrin.h>
300///
301/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.
302///
303/// \param __m1
304/// A 64-bit integer vector of [4 x i16].
305/// Bits [15:0] are written to bits [15:0] of the result. \n
306/// Bits [31:16] are written to bits [47:32] of the result.
307/// \param __m2
308/// A 64-bit integer vector of [4 x i16].
309/// Bits [15:0] are written to bits [31:16] of the result. \n
310/// Bits [31:16] are written to bits [63:48] of the result.
311/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
312/// values.
313static __inline__ __m64 __DEFAULT_FN_ATTRS
314_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
315{
316 return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
317}
318
319/// Unpacks the lower 32 bits from two 64-bit integer vectors of
320/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
321///
322/// \headerfile <x86intrin.h>
323///
324/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.
325///
326/// \param __m1
327/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
328/// the lower 32 bits of the result.
329/// \param __m2
330/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
331/// the upper 32 bits of the result.
332/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
333/// values.
334static __inline__ __m64 __DEFAULT_FN_ATTRS
335_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
336{
337 return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
338}
339
340/// Adds each 8-bit integer element of the first 64-bit integer vector
341/// of [8 x i8] to the corresponding 8-bit integer element of the second
342/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
343/// packed into a 64-bit integer vector of [8 x i8].
344///
345/// \headerfile <x86intrin.h>
346///
347/// This intrinsic corresponds to the <c> PADDB </c> instruction.
348///
349/// \param __m1
350/// A 64-bit integer vector of [8 x i8].
351/// \param __m2
352/// A 64-bit integer vector of [8 x i8].
353/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
354/// parameters.
355static __inline__ __m64 __DEFAULT_FN_ATTRS
356_mm_add_pi8(__m64 __m1, __m64 __m2)
357{
358 return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
359}
360
361/// Adds each 16-bit integer element of the first 64-bit integer vector
362/// of [4 x i16] to the corresponding 16-bit integer element of the second
363/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
364/// packed into a 64-bit integer vector of [4 x i16].
365///
366/// \headerfile <x86intrin.h>
367///
368/// This intrinsic corresponds to the <c> PADDW </c> instruction.
369///
370/// \param __m1
371/// A 64-bit integer vector of [4 x i16].
372/// \param __m2
373/// A 64-bit integer vector of [4 x i16].
374/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
375/// parameters.
376static __inline__ __m64 __DEFAULT_FN_ATTRS
377_mm_add_pi16(__m64 __m1, __m64 __m2)
378{
379 return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
380}
381
382/// Adds each 32-bit integer element of the first 64-bit integer vector
383/// of [2 x i32] to the corresponding 32-bit integer element of the second
384/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
385/// packed into a 64-bit integer vector of [2 x i32].
386///
387/// \headerfile <x86intrin.h>
388///
389/// This intrinsic corresponds to the <c> PADDD </c> instruction.
390///
391/// \param __m1
392/// A 64-bit integer vector of [2 x i32].
393/// \param __m2
394/// A 64-bit integer vector of [2 x i32].
395/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
396/// parameters.
397static __inline__ __m64 __DEFAULT_FN_ATTRS
398_mm_add_pi32(__m64 __m1, __m64 __m2)
399{
400 return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
401}
402
403/// Adds each 8-bit signed integer element of the first 64-bit integer
404/// vector of [8 x i8] to the corresponding 8-bit signed integer element of
405/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than
406/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
407/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].
408///
409/// \headerfile <x86intrin.h>
410///
411/// This intrinsic corresponds to the <c> PADDSB </c> instruction.
412///
413/// \param __m1
414/// A 64-bit integer vector of [8 x i8].
415/// \param __m2
416/// A 64-bit integer vector of [8 x i8].
417/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
418/// of both parameters.
419static __inline__ __m64 __DEFAULT_FN_ATTRS
420_mm_adds_pi8(__m64 __m1, __m64 __m2)
421{
422 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
423}
424
425/// Adds each 16-bit signed integer element of the first 64-bit integer
426/// vector of [4 x i16] to the corresponding 16-bit signed integer element of
427/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than
428/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
429/// saturated to 0x8000. The results are packed into a 64-bit integer vector
430/// of [4 x i16].
431///
432/// \headerfile <x86intrin.h>
433///
434/// This intrinsic corresponds to the <c> PADDSW </c> instruction.
435///
436/// \param __m1
437/// A 64-bit integer vector of [4 x i16].
438/// \param __m2
439/// A 64-bit integer vector of [4 x i16].
440/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
441/// of both parameters.
442static __inline__ __m64 __DEFAULT_FN_ATTRS
443_mm_adds_pi16(__m64 __m1, __m64 __m2)
444{
445 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
446}
447
448/// Adds each 8-bit unsigned integer element of the first 64-bit integer
449/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
450/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
451/// saturated to 0xFF. The results are packed into a 64-bit integer vector of
452/// [8 x i8].
453///
454/// \headerfile <x86intrin.h>
455///
456/// This intrinsic corresponds to the <c> PADDUSB </c> instruction.
457///
458/// \param __m1
459/// A 64-bit integer vector of [8 x i8].
460/// \param __m2
461/// A 64-bit integer vector of [8 x i8].
462/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
463/// unsigned sums of both parameters.
464static __inline__ __m64 __DEFAULT_FN_ATTRS
465_mm_adds_pu8(__m64 __m1, __m64 __m2)
466{
467 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
468}
469
470/// Adds each 16-bit unsigned integer element of the first 64-bit integer
471/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element
472/// of the second 64-bit integer vector of [4 x i16]. Sums greater than
473/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
474/// integer vector of [4 x i16].
475///
476/// \headerfile <x86intrin.h>
477///
478/// This intrinsic corresponds to the <c> PADDUSW </c> instruction.
479///
480/// \param __m1
481/// A 64-bit integer vector of [4 x i16].
482/// \param __m2
483/// A 64-bit integer vector of [4 x i16].
484/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
485/// unsigned sums of both parameters.
486static __inline__ __m64 __DEFAULT_FN_ATTRS
487_mm_adds_pu16(__m64 __m1, __m64 __m2)
488{
489 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
490}
491
492/// Subtracts each 8-bit integer element of the second 64-bit integer
493/// vector of [8 x i8] from the corresponding 8-bit integer element of the
494/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
495/// are packed into a 64-bit integer vector of [8 x i8].
496///
497/// \headerfile <x86intrin.h>
498///
499/// This intrinsic corresponds to the <c> PSUBB </c> instruction.
500///
501/// \param __m1
502/// A 64-bit integer vector of [8 x i8] containing the minuends.
503/// \param __m2
504/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
505/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
506/// both parameters.
507static __inline__ __m64 __DEFAULT_FN_ATTRS
508_mm_sub_pi8(__m64 __m1, __m64 __m2)
509{
510 return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
511}
512
513/// Subtracts each 16-bit integer element of the second 64-bit integer
514/// vector of [4 x i16] from the corresponding 16-bit integer element of the
515/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
516/// results are packed into a 64-bit integer vector of [4 x i16].
517///
518/// \headerfile <x86intrin.h>
519///
520/// This intrinsic corresponds to the <c> PSUBW </c> instruction.
521///
522/// \param __m1
523/// A 64-bit integer vector of [4 x i16] containing the minuends.
524/// \param __m2
525/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
526/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
527/// both parameters.
528static __inline__ __m64 __DEFAULT_FN_ATTRS
529_mm_sub_pi16(__m64 __m1, __m64 __m2)
530{
531 return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
532}
533
534/// Subtracts each 32-bit integer element of the second 64-bit integer
535/// vector of [2 x i32] from the corresponding 32-bit integer element of the
536/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
537/// results are packed into a 64-bit integer vector of [2 x i32].
538///
539/// \headerfile <x86intrin.h>
540///
541/// This intrinsic corresponds to the <c> PSUBD </c> instruction.
542///
543/// \param __m1
544/// A 64-bit integer vector of [2 x i32] containing the minuends.
545/// \param __m2
546/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
547/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
548/// both parameters.
549static __inline__ __m64 __DEFAULT_FN_ATTRS
550_mm_sub_pi32(__m64 __m1, __m64 __m2)
551{
552 return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
553}
554
555/// Subtracts each 8-bit signed integer element of the second 64-bit
556/// integer vector of [8 x i8] from the corresponding 8-bit signed integer
557/// element of the first 64-bit integer vector of [8 x i8]. Positive results
558/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
559/// are saturated to 0x80. The results are packed into a 64-bit integer
560/// vector of [8 x i8].
561///
562/// \headerfile <x86intrin.h>
563///
564/// This intrinsic corresponds to the <c> PSUBSB </c> instruction.
565///
566/// \param __m1
567/// A 64-bit integer vector of [8 x i8] containing the minuends.
568/// \param __m2
569/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
570/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
571/// differences of both parameters.
572static __inline__ __m64 __DEFAULT_FN_ATTRS
573_mm_subs_pi8(__m64 __m1, __m64 __m2)
574{
575 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
576}
577
578/// Subtracts each 16-bit signed integer element of the second 64-bit
579/// integer vector of [4 x i16] from the corresponding 16-bit signed integer
580/// element of the first 64-bit integer vector of [4 x i16]. Positive results
581/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
582/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit
583/// integer vector of [4 x i16].
584///
585/// \headerfile <x86intrin.h>
586///
587/// This intrinsic corresponds to the <c> PSUBSW </c> instruction.
588///
589/// \param __m1
590/// A 64-bit integer vector of [4 x i16] containing the minuends.
591/// \param __m2
592/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
593/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
594/// differences of both parameters.
595static __inline__ __m64 __DEFAULT_FN_ATTRS
596_mm_subs_pi16(__m64 __m1, __m64 __m2)
597{
598 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
599}
600
601/// Subtracts each 8-bit unsigned integer element of the second 64-bit
602/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
603/// element of the first 64-bit integer vector of [8 x i8].
604///
605/// If an element of the first vector is less than the corresponding element
606/// of the second vector, the result is saturated to 0. The results are
607/// packed into a 64-bit integer vector of [8 x i8].
608///
609/// \headerfile <x86intrin.h>
610///
611/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.
612///
613/// \param __m1
614/// A 64-bit integer vector of [8 x i8] containing the minuends.
615/// \param __m2
616/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
617/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
618/// differences of both parameters.
619static __inline__ __m64 __DEFAULT_FN_ATTRS
620_mm_subs_pu8(__m64 __m1, __m64 __m2)
621{
622 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
623}
624
625/// Subtracts each 16-bit unsigned integer element of the second 64-bit
626/// integer vector of [4 x i16] from the corresponding 16-bit unsigned
627/// integer element of the first 64-bit integer vector of [4 x i16].
628///
629/// If an element of the first vector is less than the corresponding element
630/// of the second vector, the result is saturated to 0. The results are
631/// packed into a 64-bit integer vector of [4 x i16].
632///
633/// \headerfile <x86intrin.h>
634///
635/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.
636///
637/// \param __m1
638/// A 64-bit integer vector of [4 x i16] containing the minuends.
639/// \param __m2
640/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
641/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
642/// differences of both parameters.
643static __inline__ __m64 __DEFAULT_FN_ATTRS
644_mm_subs_pu16(__m64 __m1, __m64 __m2)
645{
646 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
647}
648
649/// Multiplies each 16-bit signed integer element of the first 64-bit
650/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
651/// element of the second 64-bit integer vector of [4 x i16] and get four
652/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
653/// The lower 32 bits of these two sums are packed into a 64-bit integer
654/// vector of [2 x i32].
655///
656/// For example, bits [15:0] of both parameters are multiplied, bits [31:16]
657/// of both parameters are multiplied, and the sum of both results is written
658/// to bits [31:0] of the result.
659///
660/// \headerfile <x86intrin.h>
661///
662/// This intrinsic corresponds to the <c> PMADDWD </c> instruction.
663///
664/// \param __m1
665/// A 64-bit integer vector of [4 x i16].
666/// \param __m2
667/// A 64-bit integer vector of [4 x i16].
668/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
669/// products of both parameters.
670static __inline__ __m64 __DEFAULT_FN_ATTRS
671_mm_madd_pi16(__m64 __m1, __m64 __m2)
672{
673 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
674}
675
676/// Multiplies each 16-bit signed integer element of the first 64-bit
677/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
678/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper
679/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
680///
681/// \headerfile <x86intrin.h>
682///
683/// This intrinsic corresponds to the <c> PMULHW </c> instruction.
684///
685/// \param __m1
686/// A 64-bit integer vector of [4 x i16].
687/// \param __m2
688/// A 64-bit integer vector of [4 x i16].
689/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
690/// of the products of both parameters.
691static __inline__ __m64 __DEFAULT_FN_ATTRS
692_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
693{
694 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
695}
696
697/// Multiplies each 16-bit signed integer element of the first 64-bit
698/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
699/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower
700/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
701///
702/// \headerfile <x86intrin.h>
703///
704/// This intrinsic corresponds to the <c> PMULLW </c> instruction.
705///
706/// \param __m1
707/// A 64-bit integer vector of [4 x i16].
708/// \param __m2
709/// A 64-bit integer vector of [4 x i16].
710/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
711/// of the products of both parameters.
712static __inline__ __m64 __DEFAULT_FN_ATTRS
713_mm_mullo_pi16(__m64 __m1, __m64 __m2)
714{
715 return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
716}
717
718/// Left-shifts each 16-bit signed integer element of the first
719/// parameter, which is a 64-bit integer vector of [4 x i16], by the number
720/// of bits specified by the second parameter, which is a 64-bit integer. The
721/// lower 16 bits of the results are packed into a 64-bit integer vector of
722/// [4 x i16].
723///
724/// \headerfile <x86intrin.h>
725///
726/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
727///
728/// \param __m
729/// A 64-bit integer vector of [4 x i16].
730/// \param __count
731/// A 64-bit integer vector interpreted as a single 64-bit integer.
732/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
733/// values. If \a __count is greater or equal to 16, the result is set to all
734/// 0.
735static __inline__ __m64 __DEFAULT_FN_ATTRS
736_mm_sll_pi16(__m64 __m, __m64 __count)
737{
738 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
739}
740
741/// Left-shifts each 16-bit signed integer element of a 64-bit integer
742/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.
743/// The lower 16 bits of the results are packed into a 64-bit integer vector
744/// of [4 x i16].
745///
746/// \headerfile <x86intrin.h>
747///
748/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
749///
750/// \param __m
751/// A 64-bit integer vector of [4 x i16].
752/// \param __count
753/// A 32-bit integer value.
754/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
755/// values. If \a __count is greater or equal to 16, the result is set to all
756/// 0.
757static __inline__ __m64 __DEFAULT_FN_ATTRS
758_mm_slli_pi16(__m64 __m, int __count)
759{
760 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
761}
762
763/// Left-shifts each 32-bit signed integer element of the first
764/// parameter, which is a 64-bit integer vector of [2 x i32], by the number
765/// of bits specified by the second parameter, which is a 64-bit integer. The
766/// lower 32 bits of the results are packed into a 64-bit integer vector of
767/// [2 x i32].
768///
769/// \headerfile <x86intrin.h>
770///
771/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
772///
773/// \param __m
774/// A 64-bit integer vector of [2 x i32].
775/// \param __count
776/// A 64-bit integer vector interpreted as a single 64-bit integer.
777/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
778/// values. If \a __count is greater or equal to 32, the result is set to all
779/// 0.
780static __inline__ __m64 __DEFAULT_FN_ATTRS
781_mm_sll_pi32(__m64 __m, __m64 __count)
782{
783 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
784}
785
786/// Left-shifts each 32-bit signed integer element of a 64-bit integer
787/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.
788/// The lower 32 bits of the results are packed into a 64-bit integer vector
789/// of [2 x i32].
790///
791/// \headerfile <x86intrin.h>
792///
793/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
794///
795/// \param __m
796/// A 64-bit integer vector of [2 x i32].
797/// \param __count
798/// A 32-bit integer value.
799/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
800/// values. If \a __count is greater or equal to 32, the result is set to all
801/// 0.
802static __inline__ __m64 __DEFAULT_FN_ATTRS
803_mm_slli_pi32(__m64 __m, int __count)
804{
805 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
806}
807
808/// Left-shifts the first 64-bit integer parameter by the number of bits
809/// specified by the second 64-bit integer parameter. The lower 64 bits of
810/// result are returned.
811///
812/// \headerfile <x86intrin.h>
813///
814/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
815///
816/// \param __m
817/// A 64-bit integer vector interpreted as a single 64-bit integer.
818/// \param __count
819/// A 64-bit integer vector interpreted as a single 64-bit integer.
820/// \returns A 64-bit integer vector containing the left-shifted value. If
821/// \a __count is greater or equal to 64, the result is set to 0.
822static __inline__ __m64 __DEFAULT_FN_ATTRS
823_mm_sll_si64(__m64 __m, __m64 __count)
824{
825 return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
826}
827
828/// Left-shifts the first parameter, which is a 64-bit integer, by the
829/// number of bits specified by the second parameter, which is a 32-bit
830/// integer. The lower 64 bits of result are returned.
831///
832/// \headerfile <x86intrin.h>
833///
834/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
835///
836/// \param __m
837/// A 64-bit integer vector interpreted as a single 64-bit integer.
838/// \param __count
839/// A 32-bit integer value.
840/// \returns A 64-bit integer vector containing the left-shifted value. If
841/// \a __count is greater or equal to 64, the result is set to 0.
842static __inline__ __m64 __DEFAULT_FN_ATTRS
843_mm_slli_si64(__m64 __m, int __count)
844{
845 return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
846}
847
848/// Right-shifts each 16-bit integer element of the first parameter,
849/// which is a 64-bit integer vector of [4 x i16], by the number of bits
850/// specified by the second parameter, which is a 64-bit integer.
851///
852/// High-order bits are filled with the sign bit of the initial value of each
853/// 16-bit element. The 16-bit results are packed into a 64-bit integer
854/// vector of [4 x i16].
855///
856/// \headerfile <x86intrin.h>
857///
858/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
859///
860/// \param __m
861/// A 64-bit integer vector of [4 x i16].
862/// \param __count
863/// A 64-bit integer vector interpreted as a single 64-bit integer.
864/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
865/// values.
866static __inline__ __m64 __DEFAULT_FN_ATTRS
867_mm_sra_pi16(__m64 __m, __m64 __count)
868{
869 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
870}
871
872/// Right-shifts each 16-bit integer element of a 64-bit integer vector
873/// of [4 x i16] by the number of bits specified by a 32-bit integer.
874///
875/// High-order bits are filled with the sign bit of the initial value of each
876/// 16-bit element. The 16-bit results are packed into a 64-bit integer
877/// vector of [4 x i16].
878///
879/// \headerfile <x86intrin.h>
880///
881/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
882///
883/// \param __m
884/// A 64-bit integer vector of [4 x i16].
885/// \param __count
886/// A 32-bit integer value.
887/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
888/// values.
889static __inline__ __m64 __DEFAULT_FN_ATTRS
890_mm_srai_pi16(__m64 __m, int __count)
891{
892 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
893}
894
895/// Right-shifts each 32-bit integer element of the first parameter,
896/// which is a 64-bit integer vector of [2 x i32], by the number of bits
897/// specified by the second parameter, which is a 64-bit integer.
898///
899/// High-order bits are filled with the sign bit of the initial value of each
900/// 32-bit element. The 32-bit results are packed into a 64-bit integer
901/// vector of [2 x i32].
902///
903/// \headerfile <x86intrin.h>
904///
905/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
906///
907/// \param __m
908/// A 64-bit integer vector of [2 x i32].
909/// \param __count
910/// A 64-bit integer vector interpreted as a single 64-bit integer.
911/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
912/// values.
913static __inline__ __m64 __DEFAULT_FN_ATTRS
914_mm_sra_pi32(__m64 __m, __m64 __count)
915{
916 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
917}
918
919/// Right-shifts each 32-bit integer element of a 64-bit integer vector
920/// of [2 x i32] by the number of bits specified by a 32-bit integer.
921///
922/// High-order bits are filled with the sign bit of the initial value of each
923/// 32-bit element. The 32-bit results are packed into a 64-bit integer
924/// vector of [2 x i32].
925///
926/// \headerfile <x86intrin.h>
927///
928/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
929///
930/// \param __m
931/// A 64-bit integer vector of [2 x i32].
932/// \param __count
933/// A 32-bit integer value.
934/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
935/// values.
936static __inline__ __m64 __DEFAULT_FN_ATTRS
937_mm_srai_pi32(__m64 __m, int __count)
938{
939 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
940}
941
942/// Right-shifts each 16-bit integer element of the first parameter,
943/// which is a 64-bit integer vector of [4 x i16], by the number of bits
944/// specified by the second parameter, which is a 64-bit integer.
945///
946/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
947/// integer vector of [4 x i16].
948///
949/// \headerfile <x86intrin.h>
950///
951/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
952///
953/// \param __m
954/// A 64-bit integer vector of [4 x i16].
955/// \param __count
956/// A 64-bit integer vector interpreted as a single 64-bit integer.
957/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
958/// values.
959static __inline__ __m64 __DEFAULT_FN_ATTRS
960_mm_srl_pi16(__m64 __m, __m64 __count)
961{
962 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
963}
964
965/// Right-shifts each 16-bit integer element of a 64-bit integer vector
966/// of [4 x i16] by the number of bits specified by a 32-bit integer.
967///
968/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
969/// integer vector of [4 x i16].
970///
971/// \headerfile <x86intrin.h>
972///
973/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
974///
975/// \param __m
976/// A 64-bit integer vector of [4 x i16].
977/// \param __count
978/// A 32-bit integer value.
979/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
980/// values.
981static __inline__ __m64 __DEFAULT_FN_ATTRS
982_mm_srli_pi16(__m64 __m, int __count)
983{
984 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
985}
986
987/// Right-shifts each 32-bit integer element of the first parameter,
988/// which is a 64-bit integer vector of [2 x i32], by the number of bits
989/// specified by the second parameter, which is a 64-bit integer.
990///
991/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
992/// integer vector of [2 x i32].
993///
994/// \headerfile <x86intrin.h>
995///
996/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
997///
998/// \param __m
999/// A 64-bit integer vector of [2 x i32].
1000/// \param __count
1001/// A 64-bit integer vector interpreted as a single 64-bit integer.
1002/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1003/// values.
1004static __inline__ __m64 __DEFAULT_FN_ATTRS
1005_mm_srl_pi32(__m64 __m, __m64 __count)
1006{
1007 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
1008}
1009
1010/// Right-shifts each 32-bit integer element of a 64-bit integer vector
1011/// of [2 x i32] by the number of bits specified by a 32-bit integer.
1012///
1013/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
1014/// integer vector of [2 x i32].
1015///
1016/// \headerfile <x86intrin.h>
1017///
1018/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
1019///
1020/// \param __m
1021/// A 64-bit integer vector of [2 x i32].
1022/// \param __count
1023/// A 32-bit integer value.
1024/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1025/// values.
1026static __inline__ __m64 __DEFAULT_FN_ATTRS
1027_mm_srli_pi32(__m64 __m, int __count)
1028{
1029 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
1030}
1031
1032/// Right-shifts the first 64-bit integer parameter by the number of bits
1033/// specified by the second 64-bit integer parameter.
1034///
1035/// High-order bits are cleared.
1036///
1037/// \headerfile <x86intrin.h>
1038///
1039/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1040///
1041/// \param __m
1042/// A 64-bit integer vector interpreted as a single 64-bit integer.
1043/// \param __count
1044/// A 64-bit integer vector interpreted as a single 64-bit integer.
1045/// \returns A 64-bit integer vector containing the right-shifted value.
1046static __inline__ __m64 __DEFAULT_FN_ATTRS
1047_mm_srl_si64(__m64 __m, __m64 __count)
1048{
1049 return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
1050}
1051
1052/// Right-shifts the first parameter, which is a 64-bit integer, by the
1053/// number of bits specified by the second parameter, which is a 32-bit
1054/// integer.
1055///
1056/// High-order bits are cleared.
1057///
1058/// \headerfile <x86intrin.h>
1059///
1060/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1061///
1062/// \param __m
1063/// A 64-bit integer vector interpreted as a single 64-bit integer.
1064/// \param __count
1065/// A 32-bit integer value.
1066/// \returns A 64-bit integer vector containing the right-shifted value.
1067static __inline__ __m64 __DEFAULT_FN_ATTRS
1068_mm_srli_si64(__m64 __m, int __count)
1069{
1070 return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
1071}
1072
1073/// Performs a bitwise AND of two 64-bit integer vectors.
1074///
1075/// \headerfile <x86intrin.h>
1076///
1077/// This intrinsic corresponds to the <c> PAND </c> instruction.
1078///
1079/// \param __m1
1080/// A 64-bit integer vector.
1081/// \param __m2
1082/// A 64-bit integer vector.
1083/// \returns A 64-bit integer vector containing the bitwise AND of both
1084/// parameters.
1085static __inline__ __m64 __DEFAULT_FN_ATTRS
1086_mm_and_si64(__m64 __m1, __m64 __m2)
1087{
1088 return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
1089}
1090
1091/// Performs a bitwise NOT of the first 64-bit integer vector, and then
1092/// performs a bitwise AND of the intermediate result and the second 64-bit
1093/// integer vector.
1094///
1095/// \headerfile <x86intrin.h>
1096///
1097/// This intrinsic corresponds to the <c> PANDN </c> instruction.
1098///
1099/// \param __m1
1100/// A 64-bit integer vector. The one's complement of this parameter is used
1101/// in the bitwise AND.
1102/// \param __m2
1103/// A 64-bit integer vector.
1104/// \returns A 64-bit integer vector containing the bitwise AND of the second
1105/// parameter and the one's complement of the first parameter.
1106static __inline__ __m64 __DEFAULT_FN_ATTRS
1107_mm_andnot_si64(__m64 __m1, __m64 __m2)
1108{
1109 return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
1110}
1111
1112/// Performs a bitwise OR of two 64-bit integer vectors.
1113///
1114/// \headerfile <x86intrin.h>
1115///
1116/// This intrinsic corresponds to the <c> POR </c> instruction.
1117///
1118/// \param __m1
1119/// A 64-bit integer vector.
1120/// \param __m2
1121/// A 64-bit integer vector.
1122/// \returns A 64-bit integer vector containing the bitwise OR of both
1123/// parameters.
1124static __inline__ __m64 __DEFAULT_FN_ATTRS
1125_mm_or_si64(__m64 __m1, __m64 __m2)
1126{
1127 return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
1128}
1129
1130/// Performs a bitwise exclusive OR of two 64-bit integer vectors.
1131///
1132/// \headerfile <x86intrin.h>
1133///
1134/// This intrinsic corresponds to the <c> PXOR </c> instruction.
1135///
1136/// \param __m1
1137/// A 64-bit integer vector.
1138/// \param __m2
1139/// A 64-bit integer vector.
1140/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
1141/// parameters.
1142static __inline__ __m64 __DEFAULT_FN_ATTRS
1143_mm_xor_si64(__m64 __m1, __m64 __m2)
1144{
1145 return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
1146}
1147
1148/// Compares the 8-bit integer elements of two 64-bit integer vectors of
1149/// [8 x i8] to determine if the element of the first vector is equal to the
1150/// corresponding element of the second vector.
1151///
1152/// The comparison yields 0 for false, 0xFF for true.
1153///
1154/// \headerfile <x86intrin.h>
1155///
1156/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.
1157///
1158/// \param __m1
1159/// A 64-bit integer vector of [8 x i8].
1160/// \param __m2
1161/// A 64-bit integer vector of [8 x i8].
1162/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1163/// results.
1164static __inline__ __m64 __DEFAULT_FN_ATTRS
1165_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
1166{
1167 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
1168}
1169
1170/// Compares the 16-bit integer elements of two 64-bit integer vectors of
1171/// [4 x i16] to determine if the element of the first vector is equal to the
1172/// corresponding element of the second vector.
1173///
1174/// The comparison yields 0 for false, 0xFFFF for true.
1175///
1176/// \headerfile <x86intrin.h>
1177///
1178/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.
1179///
1180/// \param __m1
1181/// A 64-bit integer vector of [4 x i16].
1182/// \param __m2
1183/// A 64-bit integer vector of [4 x i16].
1184/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1185/// results.
1186static __inline__ __m64 __DEFAULT_FN_ATTRS
1187_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
1188{
1189 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
1190}
1191
1192/// Compares the 32-bit integer elements of two 64-bit integer vectors of
1193/// [2 x i32] to determine if the element of the first vector is equal to the
1194/// corresponding element of the second vector.
1195///
1196/// The comparison yields 0 for false, 0xFFFFFFFF for true.
1197///
1198/// \headerfile <x86intrin.h>
1199///
1200/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.
1201///
1202/// \param __m1
1203/// A 64-bit integer vector of [2 x i32].
1204/// \param __m2
1205/// A 64-bit integer vector of [2 x i32].
1206/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1207/// results.
1208static __inline__ __m64 __DEFAULT_FN_ATTRS
1209_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
1210{
1211 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
1212}
1213
1214/// Compares the 8-bit integer elements of two 64-bit integer vectors of
1215/// [8 x i8] to determine if the element of the first vector is greater than
1216/// the corresponding element of the second vector.
1217///
1218/// The comparison yields 0 for false, 0xFF for true.
1219///
1220/// \headerfile <x86intrin.h>
1221///
1222/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.
1223///
1224/// \param __m1
1225/// A 64-bit integer vector of [8 x i8].
1226/// \param __m2
1227/// A 64-bit integer vector of [8 x i8].
1228/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1229/// results.
1230static __inline__ __m64 __DEFAULT_FN_ATTRS
1231_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
1232{
1233 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
1234}
1235
1236/// Compares the 16-bit integer elements of two 64-bit integer vectors of
1237/// [4 x i16] to determine if the element of the first vector is greater than
1238/// the corresponding element of the second vector.
1239///
1240/// The comparison yields 0 for false, 0xFFFF for true.
1241///
1242/// \headerfile <x86intrin.h>
1243///
1244/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.
1245///
1246/// \param __m1
1247/// A 64-bit integer vector of [4 x i16].
1248/// \param __m2
1249/// A 64-bit integer vector of [4 x i16].
1250/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1251/// results.
1252static __inline__ __m64 __DEFAULT_FN_ATTRS
1253_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
1254{
1255 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
1256}
1257
1258/// Compares the 32-bit integer elements of two 64-bit integer vectors of
1259/// [2 x i32] to determine if the element of the first vector is greater than
1260/// the corresponding element of the second vector.
1261///
1262/// The comparison yields 0 for false, 0xFFFFFFFF for true.
1263///
1264/// \headerfile <x86intrin.h>
1265///
1266/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.
1267///
1268/// \param __m1
1269/// A 64-bit integer vector of [2 x i32].
1270/// \param __m2
1271/// A 64-bit integer vector of [2 x i32].
1272/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1273/// results.
1274static __inline__ __m64 __DEFAULT_FN_ATTRS
1275_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
1276{
1277 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
1278}
1279
1280/// Constructs a 64-bit integer vector initialized to zero.
1281///
1282/// \headerfile <x86intrin.h>
1283///
1284/// This intrinsic corresponds to the <c> PXOR </c> instruction.
1285///
1286/// \returns An initialized 64-bit integer vector with all elements set to zero.
1287static __inline__ __m64 __DEFAULT_FN_ATTRS
1288_mm_setzero_si64(void)
1289{
1290 return __extension__ (__m64){ 0LL };
1291}
1292
1293/// Constructs a 64-bit integer vector initialized with the specified
1294/// 32-bit integer values.
1295///
1296/// \headerfile <x86intrin.h>
1297///
1298/// This intrinsic is a utility function and does not correspond to a specific
1299/// instruction.
1300///
1301/// \param __i1
1302/// A 32-bit integer value used to initialize the upper 32 bits of the
1303/// result.
1304/// \param __i0
1305/// A 32-bit integer value used to initialize the lower 32 bits of the
1306/// result.
1307/// \returns An initialized 64-bit integer vector.
1308static __inline__ __m64 __DEFAULT_FN_ATTRS
1309_mm_set_pi32(int __i1, int __i0)
1310{
1311 return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
1312}
1313
1314/// Constructs a 64-bit integer vector initialized with the specified
1315/// 16-bit integer values.
1316///
1317/// \headerfile <x86intrin.h>
1318///
1319/// This intrinsic is a utility function and does not correspond to a specific
1320/// instruction.
1321///
1322/// \param __s3
1323/// A 16-bit integer value used to initialize bits [63:48] of the result.
1324/// \param __s2
1325/// A 16-bit integer value used to initialize bits [47:32] of the result.
1326/// \param __s1
1327/// A 16-bit integer value used to initialize bits [31:16] of the result.
1328/// \param __s0
1329/// A 16-bit integer value used to initialize bits [15:0] of the result.
1330/// \returns An initialized 64-bit integer vector.
1331static __inline__ __m64 __DEFAULT_FN_ATTRS
1332_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
1333{
1334 return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
1335}
1336
1337/// Constructs a 64-bit integer vector initialized with the specified
1338/// 8-bit integer values.
1339///
1340/// \headerfile <x86intrin.h>
1341///
1342/// This intrinsic is a utility function and does not correspond to a specific
1343/// instruction.
1344///
1345/// \param __b7
1346/// An 8-bit integer value used to initialize bits [63:56] of the result.
1347/// \param __b6
1348/// An 8-bit integer value used to initialize bits [55:48] of the result.
1349/// \param __b5
1350/// An 8-bit integer value used to initialize bits [47:40] of the result.
1351/// \param __b4
1352/// An 8-bit integer value used to initialize bits [39:32] of the result.
1353/// \param __b3
1354/// An 8-bit integer value used to initialize bits [31:24] of the result.
1355/// \param __b2
1356/// An 8-bit integer value used to initialize bits [23:16] of the result.
1357/// \param __b1
1358/// An 8-bit integer value used to initialize bits [15:8] of the result.
1359/// \param __b0
1360/// An 8-bit integer value used to initialize bits [7:0] of the result.
1361/// \returns An initialized 64-bit integer vector.
1362static __inline__ __m64 __DEFAULT_FN_ATTRS
1363_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
1364 char __b1, char __b0)
1365{
1366 return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
1367 __b4, __b5, __b6, __b7);
1368}
1369
1370/// Constructs a 64-bit integer vector of [2 x i32], with each of the
1371/// 32-bit integer vector elements set to the specified 32-bit integer
1372/// value.
1373///
1374/// \headerfile <x86intrin.h>
1375///
1376/// This intrinsic is a utility function and does not correspond to a specific
1377/// instruction.
1378///
1379/// \param __i
1380/// A 32-bit integer value used to initialize each vector element of the
1381/// result.
1382/// \returns An initialized 64-bit integer vector of [2 x i32].
1383static __inline__ __m64 __DEFAULT_FN_ATTRS
1384_mm_set1_pi32(int __i)
1385{
1386 return _mm_set_pi32(__i, __i);
1387}
1388
1389/// Constructs a 64-bit integer vector of [4 x i16], with each of the
1390/// 16-bit integer vector elements set to the specified 16-bit integer
1391/// value.
1392///
1393/// \headerfile <x86intrin.h>
1394///
1395/// This intrinsic is a utility function and does not correspond to a specific
1396/// instruction.
1397///
1398/// \param __w
1399/// A 16-bit integer value used to initialize each vector element of the
1400/// result.
1401/// \returns An initialized 64-bit integer vector of [4 x i16].
1402static __inline__ __m64 __DEFAULT_FN_ATTRS
1403_mm_set1_pi16(short __w)
1404{
1405 return _mm_set_pi16(__w, __w, __w, __w);
1406}
1407
1408/// Constructs a 64-bit integer vector of [8 x i8], with each of the
1409/// 8-bit integer vector elements set to the specified 8-bit integer value.
1410///
1411/// \headerfile <x86intrin.h>
1412///
1413/// This intrinsic is a utility function and does not correspond to a specific
1414/// instruction.
1415///
1416/// \param __b
1417/// An 8-bit integer value used to initialize each vector element of the
1418/// result.
1419/// \returns An initialized 64-bit integer vector of [8 x i8].
1420static __inline__ __m64 __DEFAULT_FN_ATTRS
1421_mm_set1_pi8(char __b)
1422{
1423 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
1424}
1425
1426/// Constructs a 64-bit integer vector, initialized in reverse order with
1427/// the specified 32-bit integer values.
1428///
1429/// \headerfile <x86intrin.h>
1430///
1431/// This intrinsic is a utility function and does not correspond to a specific
1432/// instruction.
1433///
1434/// \param __i0
1435/// A 32-bit integer value used to initialize the lower 32 bits of the
1436/// result.
1437/// \param __i1
1438/// A 32-bit integer value used to initialize the upper 32 bits of the
1439/// result.
1440/// \returns An initialized 64-bit integer vector.
1441static __inline__ __m64 __DEFAULT_FN_ATTRS
1442_mm_setr_pi32(int __i0, int __i1)
1443{
1444 return _mm_set_pi32(__i1, __i0);
1445}
1446
1447/// Constructs a 64-bit integer vector, initialized in reverse order with
1448/// the specified 16-bit integer values.
1449///
1450/// \headerfile <x86intrin.h>
1451///
1452/// This intrinsic is a utility function and does not correspond to a specific
1453/// instruction.
1454///
1455/// \param __w0
1456/// A 16-bit integer value used to initialize bits [15:0] of the result.
1457/// \param __w1
1458/// A 16-bit integer value used to initialize bits [31:16] of the result.
1459/// \param __w2
1460/// A 16-bit integer value used to initialize bits [47:32] of the result.
1461/// \param __w3
1462/// A 16-bit integer value used to initialize bits [63:48] of the result.
1463/// \returns An initialized 64-bit integer vector.
1464static __inline__ __m64 __DEFAULT_FN_ATTRS
1465_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
1466{
1467 return _mm_set_pi16(__w3, __w2, __w1, __w0);
1468}
1469
1470/// Constructs a 64-bit integer vector, initialized in reverse order with
1471/// the specified 8-bit integer values.
1472///
1473/// \headerfile <x86intrin.h>
1474///
1475/// This intrinsic is a utility function and does not correspond to a specific
1476/// instruction.
1477///
1478/// \param __b0
1479/// An 8-bit integer value used to initialize bits [7:0] of the result.
1480/// \param __b1
1481/// An 8-bit integer value used to initialize bits [15:8] of the result.
1482/// \param __b2
1483/// An 8-bit integer value used to initialize bits [23:16] of the result.
1484/// \param __b3
1485/// An 8-bit integer value used to initialize bits [31:24] of the result.
1486/// \param __b4
1487/// An 8-bit integer value used to initialize bits [39:32] of the result.
1488/// \param __b5
1489/// An 8-bit integer value used to initialize bits [47:40] of the result.
1490/// \param __b6
1491/// An 8-bit integer value used to initialize bits [55:48] of the result.
1492/// \param __b7
1493/// An 8-bit integer value used to initialize bits [63:56] of the result.
1494/// \returns An initialized 64-bit integer vector.
1495static __inline__ __m64 __DEFAULT_FN_ATTRS
1496_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
1497 char __b6, char __b7)
1498{
1499 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
1500}
1501
1502#undef __DEFAULT_FN_ATTRS
1503
1504/* Aliases for compatibility. */
1505#define _m_empty _mm_empty
1506#define _m_from_int _mm_cvtsi32_si64
1507#define _m_from_int64 _mm_cvtsi64_m64
1508#define _m_to_int _mm_cvtsi64_si32
1509#define _m_to_int64 _mm_cvtm64_si64
1510#define _m_packsswb _mm_packs_pi16
1511#define _m_packssdw _mm_packs_pi32
1512#define _m_packuswb _mm_packs_pu16
1513#define _m_punpckhbw _mm_unpackhi_pi8
1514#define _m_punpckhwd _mm_unpackhi_pi16
1515#define _m_punpckhdq _mm_unpackhi_pi32
1516#define _m_punpcklbw _mm_unpacklo_pi8
1517#define _m_punpcklwd _mm_unpacklo_pi16
1518#define _m_punpckldq _mm_unpacklo_pi32
1519#define _m_paddb _mm_add_pi8
1520#define _m_paddw _mm_add_pi16
1521#define _m_paddd _mm_add_pi32
1522#define _m_paddsb _mm_adds_pi8
1523#define _m_paddsw _mm_adds_pi16
1524#define _m_paddusb _mm_adds_pu8
1525#define _m_paddusw _mm_adds_pu16
1526#define _m_psubb _mm_sub_pi8
1527#define _m_psubw _mm_sub_pi16
1528#define _m_psubd _mm_sub_pi32
1529#define _m_psubsb _mm_subs_pi8
1530#define _m_psubsw _mm_subs_pi16
1531#define _m_psubusb _mm_subs_pu8
1532#define _m_psubusw _mm_subs_pu16
1533#define _m_pmaddwd _mm_madd_pi16
1534#define _m_pmulhw _mm_mulhi_pi16
1535#define _m_pmullw _mm_mullo_pi16
1536#define _m_psllw _mm_sll_pi16
1537#define _m_psllwi _mm_slli_pi16
1538#define _m_pslld _mm_sll_pi32
1539#define _m_pslldi _mm_slli_pi32
1540#define _m_psllq _mm_sll_si64
1541#define _m_psllqi _mm_slli_si64
1542#define _m_psraw _mm_sra_pi16
1543#define _m_psrawi _mm_srai_pi16
1544#define _m_psrad _mm_sra_pi32
1545#define _m_psradi _mm_srai_pi32
1546#define _m_psrlw _mm_srl_pi16
1547#define _m_psrlwi _mm_srli_pi16
1548#define _m_psrld _mm_srl_pi32
1549#define _m_psrldi _mm_srli_pi32
1550#define _m_psrlq _mm_srl_si64
1551#define _m_psrlqi _mm_srli_si64
1552#define _m_pand _mm_and_si64
1553#define _m_pandn _mm_andnot_si64
1554#define _m_por _mm_or_si64
1555#define _m_pxor _mm_xor_si64
1556#define _m_pcmpeqb _mm_cmpeq_pi8
1557#define _m_pcmpeqw _mm_cmpeq_pi16
1558#define _m_pcmpeqd _mm_cmpeq_pi32
1559#define _m_pcmpgtb _mm_cmpgt_pi8
1560#define _m_pcmpgtw _mm_cmpgt_pi16
1561#define _m_pcmpgtd _mm_cmpgt_pi32
1562
1563#endif /* __MMINTRIN_H */
1564
1565

Warning: This file is not a C or C++ file. It does not have highlighting.

source code of clang/lib/Headers/mmintrin.h