mmintrin.h source code [clang/lib/Headers/mmintrin.h]

Warning: This file is not a C or C++ file. It does not have highlighting.

1	/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2	*
3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	* See https://llvm.org/LICENSE.txt for license information.
5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	*
7	*===-----------------------------------------------------------------------===
8	*/
9
10	#ifndef __MMINTRIN_H
11	#define __MMINTRIN_H
12
13	#if !defined(__i386__) && !defined(__x86_64__)
14	#error "This header is only meant to be used on x86 and x64 architecture"
15	#endif
16
17	typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8)));
18
19	typedef long long __v1di __attribute__((__vector_size__(8)));
20	typedef int __v2si __attribute__((__vector_size__(8)));
21	typedef short __v4hi __attribute__((__vector_size__(8)));
22	typedef char __v8qi __attribute__((__vector_size__(8)));
23
24	/* Define the default attributes for the functions in this file. */
25	#define __DEFAULT_FN_ATTRS \
26	__attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \
27	__min_vector_width__(64)))
28
29	/// Clears the MMX state by setting the state of the x87 stack registers
30	/// to empty.
31	///
32	/// \headerfile <x86intrin.h>
33	///
34	/// This intrinsic corresponds to the <c> EMMS </c> instruction.
35	///
36	static __inline__ void __attribute__((__always_inline__, __nodebug__,
37	__target__("mmx,no-evex512")))
38	_mm_empty(void) {
39	__builtin_ia32_emms();
40	}
41
42	/// Constructs a 64-bit integer vector, setting the lower 32 bits to the
43	/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.
44	///
45	/// \headerfile <x86intrin.h>
46	///
47	/// This intrinsic corresponds to the <c> MOVD </c> instruction.
48	///
49	/// \param __i
50	/// A 32-bit integer value.
51	/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
52	/// parameter. The upper 32 bits are set to 0.
53	static __inline__ __m64 __DEFAULT_FN_ATTRS
54	_mm_cvtsi32_si64(int __i)
55	{
56	return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
57	}
58
59	/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
60	/// signed integer.
61	///
62	/// \headerfile <x86intrin.h>
63	///
64	/// This intrinsic corresponds to the <c> MOVD </c> instruction.
65	///
66	/// \param __m
67	/// A 64-bit integer vector.
68	/// \returns A 32-bit signed integer value containing the lower 32 bits of the
69	/// parameter.
70	static __inline__ int __DEFAULT_FN_ATTRS
71	_mm_cvtsi64_si32(__m64 __m)
72	{
73	return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
74	}
75
76	/// Casts a 64-bit signed integer value into a 64-bit integer vector.
77	///
78	/// \headerfile <x86intrin.h>
79	///
80	/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
81	///
82	/// \param __i
83	/// A 64-bit signed integer.
84	/// \returns A 64-bit integer vector containing the same bitwise pattern as the
85	/// parameter.
86	static __inline__ __m64 __DEFAULT_FN_ATTRS
87	_mm_cvtsi64_m64(long long __i)
88	{
89	return (__m64)__i;
90	}
91
92	/// Casts a 64-bit integer vector into a 64-bit signed integer value.
93	///
94	/// \headerfile <x86intrin.h>
95	///
96	/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
97	///
98	/// \param __m
99	/// A 64-bit integer vector.
100	/// \returns A 64-bit signed integer containing the same bitwise pattern as the
101	/// parameter.
102	static __inline__ long long __DEFAULT_FN_ATTRS
103	_mm_cvtm64_si64(__m64 __m)
104	{
105	return (long long)__m;
106	}
107
108	/// Converts 16-bit signed integers from both 64-bit integer vector
109	/// parameters of [4 x i16] into 8-bit signed integer values, and constructs
110	/// a 64-bit integer vector of [8 x i8] as the result. Positive values
111	/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
112	/// are saturated to 0x80.
113	///
114	/// \headerfile <x86intrin.h>
115	///
116	/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
117	///
118	/// \param __m1
119	/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
120	/// 16-bit signed integer and is converted to an 8-bit signed integer with
121	/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
122	/// Negative values less than 0x80 are saturated to 0x80. The converted
123	/// [4 x i8] values are written to the lower 32 bits of the result.
124	/// \param __m2
125	/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
126	/// 16-bit signed integer and is converted to an 8-bit signed integer with
127	/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
128	/// Negative values less than 0x80 are saturated to 0x80. The converted
129	/// [4 x i8] values are written to the upper 32 bits of the result.
130	/// \returns A 64-bit integer vector of [8 x i8] containing the converted
131	/// values.
132	static __inline__ __m64 __DEFAULT_FN_ATTRS
133	_mm_packs_pi16(__m64 __m1, __m64 __m2)
134	{
135	return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
136	}
137
138	/// Converts 32-bit signed integers from both 64-bit integer vector
139	/// parameters of [2 x i32] into 16-bit signed integer values, and constructs
140	/// a 64-bit integer vector of [4 x i16] as the result. Positive values
141	/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
142	/// 0x8000 are saturated to 0x8000.
143	///
144	/// \headerfile <x86intrin.h>
145	///
146	/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
147	///
148	/// \param __m1
149	/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
150	/// 32-bit signed integer and is converted to a 16-bit signed integer with
151	/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
152	/// Negative values less than 0x8000 are saturated to 0x8000. The converted
153	/// [2 x i16] values are written to the lower 32 bits of the result.
154	/// \param __m2
155	/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
156	/// 32-bit signed integer and is converted to a 16-bit signed integer with
157	/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
158	/// Negative values less than 0x8000 are saturated to 0x8000. The converted
159	/// [2 x i16] values are written to the upper 32 bits of the result.
160	/// \returns A 64-bit integer vector of [4 x i16] containing the converted
161	/// values.
162	static __inline__ __m64 __DEFAULT_FN_ATTRS
163	_mm_packs_pi32(__m64 __m1, __m64 __m2)
164	{
165	return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
166	}
167
168	/// Converts 16-bit signed integers from both 64-bit integer vector
169	/// parameters of [4 x i16] into 8-bit unsigned integer values, and
170	/// constructs a 64-bit integer vector of [8 x i8] as the result. Values
171	/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
172	/// to 0.
173	///
174	/// \headerfile <x86intrin.h>
175	///
176	/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
177	///
178	/// \param __m1
179	/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
180	/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
181	/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
182	/// than 0 are saturated to 0. The converted [4 x i8] values are written to
183	/// the lower 32 bits of the result.
184	/// \param __m2
185	/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
186	/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
187	/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
188	/// than 0 are saturated to 0. The converted [4 x i8] values are written to
189	/// the upper 32 bits of the result.
190	/// \returns A 64-bit integer vector of [8 x i8] containing the converted
191	/// values.
192	static __inline__ __m64 __DEFAULT_FN_ATTRS
193	_mm_packs_pu16(__m64 __m1, __m64 __m2)
194	{
195	return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
196	}
197
198	/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
199	/// and interleaves them into a 64-bit integer vector of [8 x i8].
200	///
201	/// \headerfile <x86intrin.h>
202	///
203	/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
204	///
205	/// \param __m1
206	/// A 64-bit integer vector of [8 x i8]. \n
207	/// Bits [39:32] are written to bits [7:0] of the result. \n
208	/// Bits [47:40] are written to bits [23:16] of the result. \n
209	/// Bits [55:48] are written to bits [39:32] of the result. \n
210	/// Bits [63:56] are written to bits [55:48] of the result.
211	/// \param __m2
212	/// A 64-bit integer vector of [8 x i8].
213	/// Bits [39:32] are written to bits [15:8] of the result. \n
214	/// Bits [47:40] are written to bits [31:24] of the result. \n
215	/// Bits [55:48] are written to bits [47:40] of the result. \n
216	/// Bits [63:56] are written to bits [63:56] of the result.
217	/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
218	/// values.
219	static __inline__ __m64 __DEFAULT_FN_ATTRS
220	_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
221	{
222	return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
223	}
224
225	/// Unpacks the upper 32 bits from two 64-bit integer vectors of
226	/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
227	///
228	/// \headerfile <x86intrin.h>
229	///
230	/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.
231	///
232	/// \param __m1
233	/// A 64-bit integer vector of [4 x i16].
234	/// Bits [47:32] are written to bits [15:0] of the result. \n
235	/// Bits [63:48] are written to bits [47:32] of the result.
236	/// \param __m2
237	/// A 64-bit integer vector of [4 x i16].
238	/// Bits [47:32] are written to bits [31:16] of the result. \n
239	/// Bits [63:48] are written to bits [63:48] of the result.
240	/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
241	/// values.
242	static __inline__ __m64 __DEFAULT_FN_ATTRS
243	_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
244	{
245	return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
246	}
247
248	/// Unpacks the upper 32 bits from two 64-bit integer vectors of
249	/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
250	///
251	/// \headerfile <x86intrin.h>
252	///
253	/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.
254	///
255	/// \param __m1
256	/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
257	/// the lower 32 bits of the result.
258	/// \param __m2
259	/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
260	/// the upper 32 bits of the result.
261	/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
262	/// values.
263	static __inline__ __m64 __DEFAULT_FN_ATTRS
264	_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
265	{
266	return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
267	}
268
269	/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
270	/// and interleaves them into a 64-bit integer vector of [8 x i8].
271	///
272	/// \headerfile <x86intrin.h>
273	///
274	/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.
275	///
276	/// \param __m1
277	/// A 64-bit integer vector of [8 x i8].
278	/// Bits [7:0] are written to bits [7:0] of the result. \n
279	/// Bits [15:8] are written to bits [23:16] of the result. \n
280	/// Bits [23:16] are written to bits [39:32] of the result. \n
281	/// Bits [31:24] are written to bits [55:48] of the result.
282	/// \param __m2
283	/// A 64-bit integer vector of [8 x i8].
284	/// Bits [7:0] are written to bits [15:8] of the result. \n
285	/// Bits [15:8] are written to bits [31:24] of the result. \n
286	/// Bits [23:16] are written to bits [47:40] of the result. \n
287	/// Bits [31:24] are written to bits [63:56] of the result.
288	/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
289	/// values.
290	static __inline__ __m64 __DEFAULT_FN_ATTRS
291	_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
292	{
293	return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
294	}
295
296	/// Unpacks the lower 32 bits from two 64-bit integer vectors of
297	/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
298	///
299	/// \headerfile <x86intrin.h>
300	///
301	/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.
302	///
303	/// \param __m1
304	/// A 64-bit integer vector of [4 x i16].
305	/// Bits [15:0] are written to bits [15:0] of the result. \n
306	/// Bits [31:16] are written to bits [47:32] of the result.
307	/// \param __m2
308	/// A 64-bit integer vector of [4 x i16].
309	/// Bits [15:0] are written to bits [31:16] of the result. \n
310	/// Bits [31:16] are written to bits [63:48] of the result.
311	/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
312	/// values.
313	static __inline__ __m64 __DEFAULT_FN_ATTRS
314	_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
315	{
316	return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
317	}
318
319	/// Unpacks the lower 32 bits from two 64-bit integer vectors of
320	/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
321	///
322	/// \headerfile <x86intrin.h>
323	///
324	/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.
325	///
326	/// \param __m1
327	/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
328	/// the lower 32 bits of the result.
329	/// \param __m2
330	/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
331	/// the upper 32 bits of the result.
332	/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
333	/// values.
334	static __inline__ __m64 __DEFAULT_FN_ATTRS
335	_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
336	{
337	return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
338	}
339
340	/// Adds each 8-bit integer element of the first 64-bit integer vector
341	/// of [8 x i8] to the corresponding 8-bit integer element of the second
342	/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
343	/// packed into a 64-bit integer vector of [8 x i8].
344	///
345	/// \headerfile <x86intrin.h>
346	///
347	/// This intrinsic corresponds to the <c> PADDB </c> instruction.
348	///
349	/// \param __m1
350	/// A 64-bit integer vector of [8 x i8].
351	/// \param __m2
352	/// A 64-bit integer vector of [8 x i8].
353	/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
354	/// parameters.
355	static __inline__ __m64 __DEFAULT_FN_ATTRS
356	_mm_add_pi8(__m64 __m1, __m64 __m2)
357	{
358	return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
359	}
360
361	/// Adds each 16-bit integer element of the first 64-bit integer vector
362	/// of [4 x i16] to the corresponding 16-bit integer element of the second
363	/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
364	/// packed into a 64-bit integer vector of [4 x i16].
365	///
366	/// \headerfile <x86intrin.h>
367	///
368	/// This intrinsic corresponds to the <c> PADDW </c> instruction.
369	///
370	/// \param __m1
371	/// A 64-bit integer vector of [4 x i16].
372	/// \param __m2
373	/// A 64-bit integer vector of [4 x i16].
374	/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
375	/// parameters.
376	static __inline__ __m64 __DEFAULT_FN_ATTRS
377	_mm_add_pi16(__m64 __m1, __m64 __m2)
378	{
379	return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
380	}
381
382	/// Adds each 32-bit integer element of the first 64-bit integer vector
383	/// of [2 x i32] to the corresponding 32-bit integer element of the second
384	/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
385	/// packed into a 64-bit integer vector of [2 x i32].
386	///
387	/// \headerfile <x86intrin.h>
388	///
389	/// This intrinsic corresponds to the <c> PADDD </c> instruction.
390	///
391	/// \param __m1
392	/// A 64-bit integer vector of [2 x i32].
393	/// \param __m2
394	/// A 64-bit integer vector of [2 x i32].
395	/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
396	/// parameters.
397	static __inline__ __m64 __DEFAULT_FN_ATTRS
398	_mm_add_pi32(__m64 __m1, __m64 __m2)
399	{
400	return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
401	}
402
403	/// Adds each 8-bit signed integer element of the first 64-bit integer
404	/// vector of [8 x i8] to the corresponding 8-bit signed integer element of
405	/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than
406	/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
407	/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].
408	///
409	/// \headerfile <x86intrin.h>
410	///
411	/// This intrinsic corresponds to the <c> PADDSB </c> instruction.
412	///
413	/// \param __m1
414	/// A 64-bit integer vector of [8 x i8].
415	/// \param __m2
416	/// A 64-bit integer vector of [8 x i8].
417	/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
418	/// of both parameters.
419	static __inline__ __m64 __DEFAULT_FN_ATTRS
420	_mm_adds_pi8(__m64 __m1, __m64 __m2)
421	{
422	return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
423	}
424
425	/// Adds each 16-bit signed integer element of the first 64-bit integer
426	/// vector of [4 x i16] to the corresponding 16-bit signed integer element of
427	/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than
428	/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
429	/// saturated to 0x8000. The results are packed into a 64-bit integer vector
430	/// of [4 x i16].
431	///
432	/// \headerfile <x86intrin.h>
433	///
434	/// This intrinsic corresponds to the <c> PADDSW </c> instruction.
435	///
436	/// \param __m1
437	/// A 64-bit integer vector of [4 x i16].
438	/// \param __m2
439	/// A 64-bit integer vector of [4 x i16].
440	/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
441	/// of both parameters.
442	static __inline__ __m64 __DEFAULT_FN_ATTRS
443	_mm_adds_pi16(__m64 __m1, __m64 __m2)
444	{
445	return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
446	}
447
448	/// Adds each 8-bit unsigned integer element of the first 64-bit integer
449	/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
450	/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
451	/// saturated to 0xFF. The results are packed into a 64-bit integer vector of
452	/// [8 x i8].
453	///
454	/// \headerfile <x86intrin.h>
455	///
456	/// This intrinsic corresponds to the <c> PADDUSB </c> instruction.
457	///
458	/// \param __m1
459	/// A 64-bit integer vector of [8 x i8].
460	/// \param __m2
461	/// A 64-bit integer vector of [8 x i8].
462	/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
463	/// unsigned sums of both parameters.
464	static __inline__ __m64 __DEFAULT_FN_ATTRS
465	_mm_adds_pu8(__m64 __m1, __m64 __m2)
466	{
467	return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
468	}
469
470	/// Adds each 16-bit unsigned integer element of the first 64-bit integer
471	/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element
472	/// of the second 64-bit integer vector of [4 x i16]. Sums greater than
473	/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
474	/// integer vector of [4 x i16].
475	///
476	/// \headerfile <x86intrin.h>
477	///
478	/// This intrinsic corresponds to the <c> PADDUSW </c> instruction.
479	///
480	/// \param __m1
481	/// A 64-bit integer vector of [4 x i16].
482	/// \param __m2
483	/// A 64-bit integer vector of [4 x i16].
484	/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
485	/// unsigned sums of both parameters.
486	static __inline__ __m64 __DEFAULT_FN_ATTRS
487	_mm_adds_pu16(__m64 __m1, __m64 __m2)
488	{
489	return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
490	}
491
492	/// Subtracts each 8-bit integer element of the second 64-bit integer
493	/// vector of [8 x i8] from the corresponding 8-bit integer element of the
494	/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
495	/// are packed into a 64-bit integer vector of [8 x i8].
496	///
497	/// \headerfile <x86intrin.h>
498	///
499	/// This intrinsic corresponds to the <c> PSUBB </c> instruction.
500	///
501	/// \param __m1
502	/// A 64-bit integer vector of [8 x i8] containing the minuends.
503	/// \param __m2
504	/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
505	/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
506	/// both parameters.
507	static __inline__ __m64 __DEFAULT_FN_ATTRS
508	_mm_sub_pi8(__m64 __m1, __m64 __m2)
509	{
510	return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
511	}
512
513	/// Subtracts each 16-bit integer element of the second 64-bit integer
514	/// vector of [4 x i16] from the corresponding 16-bit integer element of the
515	/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
516	/// results are packed into a 64-bit integer vector of [4 x i16].
517	///
518	/// \headerfile <x86intrin.h>
519	///
520	/// This intrinsic corresponds to the <c> PSUBW </c> instruction.
521	///
522	/// \param __m1
523	/// A 64-bit integer vector of [4 x i16] containing the minuends.
524	/// \param __m2
525	/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
526	/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
527	/// both parameters.
528	static __inline__ __m64 __DEFAULT_FN_ATTRS
529	_mm_sub_pi16(__m64 __m1, __m64 __m2)
530	{
531	return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
532	}
533
534	/// Subtracts each 32-bit integer element of the second 64-bit integer
535	/// vector of [2 x i32] from the corresponding 32-bit integer element of the
536	/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
537	/// results are packed into a 64-bit integer vector of [2 x i32].
538	///
539	/// \headerfile <x86intrin.h>
540	///
541	/// This intrinsic corresponds to the <c> PSUBD </c> instruction.
542	///
543	/// \param __m1
544	/// A 64-bit integer vector of [2 x i32] containing the minuends.
545	/// \param __m2
546	/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
547	/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
548	/// both parameters.
549	static __inline__ __m64 __DEFAULT_FN_ATTRS
550	_mm_sub_pi32(__m64 __m1, __m64 __m2)
551	{
552	return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
553	}
554
555	/// Subtracts each 8-bit signed integer element of the second 64-bit
556	/// integer vector of [8 x i8] from the corresponding 8-bit signed integer
557	/// element of the first 64-bit integer vector of [8 x i8]. Positive results
558	/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
559	/// are saturated to 0x80. The results are packed into a 64-bit integer
560	/// vector of [8 x i8].
561	///
562	/// \headerfile <x86intrin.h>
563	///
564	/// This intrinsic corresponds to the <c> PSUBSB </c> instruction.
565	///
566	/// \param __m1
567	/// A 64-bit integer vector of [8 x i8] containing the minuends.
568	/// \param __m2
569	/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
570	/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
571	/// differences of both parameters.
572	static __inline__ __m64 __DEFAULT_FN_ATTRS
573	_mm_subs_pi8(__m64 __m1, __m64 __m2)
574	{
575	return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
576	}
577
578	/// Subtracts each 16-bit signed integer element of the second 64-bit
579	/// integer vector of [4 x i16] from the corresponding 16-bit signed integer
580	/// element of the first 64-bit integer vector of [4 x i16]. Positive results
581	/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
582	/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit
583	/// integer vector of [4 x i16].
584	///
585	/// \headerfile <x86intrin.h>
586	///
587	/// This intrinsic corresponds to the <c> PSUBSW </c> instruction.
588	///
589	/// \param __m1
590	/// A 64-bit integer vector of [4 x i16] containing the minuends.
591	/// \param __m2
592	/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
593	/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
594	/// differences of both parameters.
595	static __inline__ __m64 __DEFAULT_FN_ATTRS
596	_mm_subs_pi16(__m64 __m1, __m64 __m2)
597	{
598	return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
599	}
600
601	/// Subtracts each 8-bit unsigned integer element of the second 64-bit
602	/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
603	/// element of the first 64-bit integer vector of [8 x i8].
604	///
605	/// If an element of the first vector is less than the corresponding element
606	/// of the second vector, the result is saturated to 0. The results are
607	/// packed into a 64-bit integer vector of [8 x i8].
608	///
609	/// \headerfile <x86intrin.h>
610	///
611	/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.
612	///
613	/// \param __m1
614	/// A 64-bit integer vector of [8 x i8] containing the minuends.
615	/// \param __m2
616	/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
617	/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
618	/// differences of both parameters.
619	static __inline__ __m64 __DEFAULT_FN_ATTRS
620	_mm_subs_pu8(__m64 __m1, __m64 __m2)
621	{
622	return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
623	}
624
625	/// Subtracts each 16-bit unsigned integer element of the second 64-bit
626	/// integer vector of [4 x i16] from the corresponding 16-bit unsigned
627	/// integer element of the first 64-bit integer vector of [4 x i16].
628	///
629	/// If an element of the first vector is less than the corresponding element
630	/// of the second vector, the result is saturated to 0. The results are
631	/// packed into a 64-bit integer vector of [4 x i16].
632	///
633	/// \headerfile <x86intrin.h>
634	///
635	/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.
636	///
637	/// \param __m1
638	/// A 64-bit integer vector of [4 x i16] containing the minuends.
639	/// \param __m2
640	/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
641	/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
642	/// differences of both parameters.
643	static __inline__ __m64 __DEFAULT_FN_ATTRS
644	_mm_subs_pu16(__m64 __m1, __m64 __m2)
645	{
646	return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
647	}
648
649	/// Multiplies each 16-bit signed integer element of the first 64-bit
650	/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
651	/// element of the second 64-bit integer vector of [4 x i16] and get four
652	/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
653	/// The lower 32 bits of these two sums are packed into a 64-bit integer
654	/// vector of [2 x i32].
655	///
656	/// For example, bits [15:0] of both parameters are multiplied, bits [31:16]
657	/// of both parameters are multiplied, and the sum of both results is written
658	/// to bits [31:0] of the result.
659	///
660	/// \headerfile <x86intrin.h>
661	///
662	/// This intrinsic corresponds to the <c> PMADDWD </c> instruction.
663	///
664	/// \param __m1
665	/// A 64-bit integer vector of [4 x i16].
666	/// \param __m2
667	/// A 64-bit integer vector of [4 x i16].
668	/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
669	/// products of both parameters.
670	static __inline__ __m64 __DEFAULT_FN_ATTRS
671	_mm_madd_pi16(__m64 __m1, __m64 __m2)
672	{
673	return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
674	}
675
676	/// Multiplies each 16-bit signed integer element of the first 64-bit
677	/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
678	/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper
679	/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
680	///
681	/// \headerfile <x86intrin.h>
682	///
683	/// This intrinsic corresponds to the <c> PMULHW </c> instruction.
684	///
685	/// \param __m1
686	/// A 64-bit integer vector of [4 x i16].
687	/// \param __m2
688	/// A 64-bit integer vector of [4 x i16].
689	/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
690	/// of the products of both parameters.
691	static __inline__ __m64 __DEFAULT_FN_ATTRS
692	_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
693	{
694	return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
695	}
696
697	/// Multiplies each 16-bit signed integer element of the first 64-bit
698	/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
699	/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower
700	/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
701	///
702	/// \headerfile <x86intrin.h>
703	///
704	/// This intrinsic corresponds to the <c> PMULLW </c> instruction.
705	///
706	/// \param __m1
707	/// A 64-bit integer vector of [4 x i16].
708	/// \param __m2
709	/// A 64-bit integer vector of [4 x i16].
710	/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
711	/// of the products of both parameters.
712	static __inline__ __m64 __DEFAULT_FN_ATTRS
713	_mm_mullo_pi16(__m64 __m1, __m64 __m2)
714	{
715	return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
716	}
717
718	/// Left-shifts each 16-bit signed integer element of the first
719	/// parameter, which is a 64-bit integer vector of [4 x i16], by the number
720	/// of bits specified by the second parameter, which is a 64-bit integer. The
721	/// lower 16 bits of the results are packed into a 64-bit integer vector of
722	/// [4 x i16].
723	///
724	/// \headerfile <x86intrin.h>
725	///
726	/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
727	///
728	/// \param __m
729	/// A 64-bit integer vector of [4 x i16].
730	/// \param __count
731	/// A 64-bit integer vector interpreted as a single 64-bit integer.
732	/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
733	/// values. If \a __count is greater or equal to 16, the result is set to all
734	/// 0.
735	static __inline__ __m64 __DEFAULT_FN_ATTRS
736	_mm_sll_pi16(__m64 __m, __m64 __count)
737	{
738	return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
739	}
740
741	/// Left-shifts each 16-bit signed integer element of a 64-bit integer
742	/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.
743	/// The lower 16 bits of the results are packed into a 64-bit integer vector
744	/// of [4 x i16].
745	///
746	/// \headerfile <x86intrin.h>
747	///
748	/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
749	///
750	/// \param __m
751	/// A 64-bit integer vector of [4 x i16].
752	/// \param __count
753	/// A 32-bit integer value.
754	/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
755	/// values. If \a __count is greater or equal to 16, the result is set to all
756	/// 0.
757	static __inline__ __m64 __DEFAULT_FN_ATTRS
758	_mm_slli_pi16(__m64 __m, int __count)
759	{
760	return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
761	}
762
763	/// Left-shifts each 32-bit signed integer element of the first
764	/// parameter, which is a 64-bit integer vector of [2 x i32], by the number
765	/// of bits specified by the second parameter, which is a 64-bit integer. The
766	/// lower 32 bits of the results are packed into a 64-bit integer vector of
767	/// [2 x i32].
768	///
769	/// \headerfile <x86intrin.h>
770	///
771	/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
772	///
773	/// \param __m
774	/// A 64-bit integer vector of [2 x i32].
775	/// \param __count
776	/// A 64-bit integer vector interpreted as a single 64-bit integer.
777	/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
778	/// values. If \a __count is greater or equal to 32, the result is set to all
779	/// 0.
780	static __inline__ __m64 __DEFAULT_FN_ATTRS
781	_mm_sll_pi32(__m64 __m, __m64 __count)
782	{
783	return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
784	}
785
786	/// Left-shifts each 32-bit signed integer element of a 64-bit integer
787	/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.
788	/// The lower 32 bits of the results are packed into a 64-bit integer vector
789	/// of [2 x i32].
790	///
791	/// \headerfile <x86intrin.h>
792	///
793	/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
794	///
795	/// \param __m
796	/// A 64-bit integer vector of [2 x i32].
797	/// \param __count
798	/// A 32-bit integer value.
799	/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
800	/// values. If \a __count is greater or equal to 32, the result is set to all
801	/// 0.
802	static __inline__ __m64 __DEFAULT_FN_ATTRS
803	_mm_slli_pi32(__m64 __m, int __count)
804	{
805	return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
806	}
807
808	/// Left-shifts the first 64-bit integer parameter by the number of bits
809	/// specified by the second 64-bit integer parameter. The lower 64 bits of
810	/// result are returned.
811	///
812	/// \headerfile <x86intrin.h>
813	///
814	/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
815	///
816	/// \param __m
817	/// A 64-bit integer vector interpreted as a single 64-bit integer.
818	/// \param __count
819	/// A 64-bit integer vector interpreted as a single 64-bit integer.
820	/// \returns A 64-bit integer vector containing the left-shifted value. If
821	/// \a __count is greater or equal to 64, the result is set to 0.
822	static __inline__ __m64 __DEFAULT_FN_ATTRS
823	_mm_sll_si64(__m64 __m, __m64 __count)
824	{
825	return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
826	}
827
828	/// Left-shifts the first parameter, which is a 64-bit integer, by the
829	/// number of bits specified by the second parameter, which is a 32-bit
830	/// integer. The lower 64 bits of result are returned.
831	///
832	/// \headerfile <x86intrin.h>
833	///
834	/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
835	///
836	/// \param __m
837	/// A 64-bit integer vector interpreted as a single 64-bit integer.
838	/// \param __count
839	/// A 32-bit integer value.
840	/// \returns A 64-bit integer vector containing the left-shifted value. If
841	/// \a __count is greater or equal to 64, the result is set to 0.
842	static __inline__ __m64 __DEFAULT_FN_ATTRS
843	_mm_slli_si64(__m64 __m, int __count)
844	{
845	return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
846	}
847
848	/// Right-shifts each 16-bit integer element of the first parameter,
849	/// which is a 64-bit integer vector of [4 x i16], by the number of bits
850	/// specified by the second parameter, which is a 64-bit integer.
851	///
852	/// High-order bits are filled with the sign bit of the initial value of each
853	/// 16-bit element. The 16-bit results are packed into a 64-bit integer
854	/// vector of [4 x i16].
855	///
856	/// \headerfile <x86intrin.h>
857	///
858	/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
859	///
860	/// \param __m
861	/// A 64-bit integer vector of [4 x i16].
862	/// \param __count
863	/// A 64-bit integer vector interpreted as a single 64-bit integer.
864	/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
865	/// values.
866	static __inline__ __m64 __DEFAULT_FN_ATTRS
867	_mm_sra_pi16(__m64 __m, __m64 __count)
868	{
869	return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
870	}
871
872	/// Right-shifts each 16-bit integer element of a 64-bit integer vector
873	/// of [4 x i16] by the number of bits specified by a 32-bit integer.
874	///
875	/// High-order bits are filled with the sign bit of the initial value of each
876	/// 16-bit element. The 16-bit results are packed into a 64-bit integer
877	/// vector of [4 x i16].
878	///
879	/// \headerfile <x86intrin.h>
880	///
881	/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
882	///
883	/// \param __m
884	/// A 64-bit integer vector of [4 x i16].
885	/// \param __count
886	/// A 32-bit integer value.
887	/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
888	/// values.
889	static __inline__ __m64 __DEFAULT_FN_ATTRS
890	_mm_srai_pi16(__m64 __m, int __count)
891	{
892	return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
893	}
894
895	/// Right-shifts each 32-bit integer element of the first parameter,
896	/// which is a 64-bit integer vector of [2 x i32], by the number of bits
897	/// specified by the second parameter, which is a 64-bit integer.
898	///
899	/// High-order bits are filled with the sign bit of the initial value of each
900	/// 32-bit element. The 32-bit results are packed into a 64-bit integer
901	/// vector of [2 x i32].
902	///
903	/// \headerfile <x86intrin.h>
904	///
905	/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
906	///
907	/// \param __m
908	/// A 64-bit integer vector of [2 x i32].
909	/// \param __count
910	/// A 64-bit integer vector interpreted as a single 64-bit integer.
911	/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
912	/// values.
913	static __inline__ __m64 __DEFAULT_FN_ATTRS
914	_mm_sra_pi32(__m64 __m, __m64 __count)
915	{
916	return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
917	}
918
919	/// Right-shifts each 32-bit integer element of a 64-bit integer vector
920	/// of [2 x i32] by the number of bits specified by a 32-bit integer.
921	///
922	/// High-order bits are filled with the sign bit of the initial value of each
923	/// 32-bit element. The 32-bit results are packed into a 64-bit integer
924	/// vector of [2 x i32].
925	///
926	/// \headerfile <x86intrin.h>
927	///
928	/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
929	///
930	/// \param __m
931	/// A 64-bit integer vector of [2 x i32].
932	/// \param __count
933	/// A 32-bit integer value.
934	/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
935	/// values.
936	static __inline__ __m64 __DEFAULT_FN_ATTRS
937	_mm_srai_pi32(__m64 __m, int __count)
938	{
939	return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
940	}
941
942	/// Right-shifts each 16-bit integer element of the first parameter,
943	/// which is a 64-bit integer vector of [4 x i16], by the number of bits
944	/// specified by the second parameter, which is a 64-bit integer.
945	///
946	/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
947	/// integer vector of [4 x i16].
948	///
949	/// \headerfile <x86intrin.h>
950	///
951	/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
952	///
953	/// \param __m
954	/// A 64-bit integer vector of [4 x i16].
955	/// \param __count
956	/// A 64-bit integer vector interpreted as a single 64-bit integer.
957	/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
958	/// values.
959	static __inline__ __m64 __DEFAULT_FN_ATTRS
960	_mm_srl_pi16(__m64 __m, __m64 __count)
961	{
962	return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
963	}
964
965	/// Right-shifts each 16-bit integer element of a 64-bit integer vector
966	/// of [4 x i16] by the number of bits specified by a 32-bit integer.
967	///
968	/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
969	/// integer vector of [4 x i16].
970	///
971	/// \headerfile <x86intrin.h>
972	///
973	/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
974	///
975	/// \param __m
976	/// A 64-bit integer vector of [4 x i16].
977	/// \param __count
978	/// A 32-bit integer value.
979	/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
980	/// values.
981	static __inline__ __m64 __DEFAULT_FN_ATTRS
982	_mm_srli_pi16(__m64 __m, int __count)
983	{
984	return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
985	}
986
987	/// Right-shifts each 32-bit integer element of the first parameter,
988	/// which is a 64-bit integer vector of [2 x i32], by the number of bits
989	/// specified by the second parameter, which is a 64-bit integer.
990	///
991	/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
992	/// integer vector of [2 x i32].
993	///
994	/// \headerfile <x86intrin.h>
995	///
996	/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
997	///
998	/// \param __m
999	/// A 64-bit integer vector of [2 x i32].
1000	/// \param __count
1001	/// A 64-bit integer vector interpreted as a single 64-bit integer.
1002	/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1003	/// values.
1004	static __inline__ __m64 __DEFAULT_FN_ATTRS
1005	_mm_srl_pi32(__m64 __m, __m64 __count)
1006	{
1007	return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
1008	}
1009
1010	/// Right-shifts each 32-bit integer element of a 64-bit integer vector
1011	/// of [2 x i32] by the number of bits specified by a 32-bit integer.
1012	///
1013	/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
1014	/// integer vector of [2 x i32].
1015	///
1016	/// \headerfile <x86intrin.h>
1017	///
1018	/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
1019	///
1020	/// \param __m
1021	/// A 64-bit integer vector of [2 x i32].
1022	/// \param __count
1023	/// A 32-bit integer value.
1024	/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1025	/// values.
1026	static __inline__ __m64 __DEFAULT_FN_ATTRS
1027	_mm_srli_pi32(__m64 __m, int __count)
1028	{
1029	return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
1030	}
1031
1032	/// Right-shifts the first 64-bit integer parameter by the number of bits
1033	/// specified by the second 64-bit integer parameter.
1034	///
1035	/// High-order bits are cleared.
1036	///
1037	/// \headerfile <x86intrin.h>
1038	///
1039	/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1040	///
1041	/// \param __m
1042	/// A 64-bit integer vector interpreted as a single 64-bit integer.
1043	/// \param __count
1044	/// A 64-bit integer vector interpreted as a single 64-bit integer.
1045	/// \returns A 64-bit integer vector containing the right-shifted value.
1046	static __inline__ __m64 __DEFAULT_FN_ATTRS
1047	_mm_srl_si64(__m64 __m, __m64 __count)
1048	{
1049	return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
1050	}
1051
1052	/// Right-shifts the first parameter, which is a 64-bit integer, by the
1053	/// number of bits specified by the second parameter, which is a 32-bit
1054	/// integer.
1055	///
1056	/// High-order bits are cleared.
1057	///
1058	/// \headerfile <x86intrin.h>
1059	///
1060	/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1061	///
1062	/// \param __m
1063	/// A 64-bit integer vector interpreted as a single 64-bit integer.
1064	/// \param __count
1065	/// A 32-bit integer value.
1066	/// \returns A 64-bit integer vector containing the right-shifted value.
1067	static __inline__ __m64 __DEFAULT_FN_ATTRS
1068	_mm_srli_si64(__m64 __m, int __count)
1069	{
1070	return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
1071	}
1072
1073	/// Performs a bitwise AND of two 64-bit integer vectors.
1074	///
1075	/// \headerfile <x86intrin.h>
1076	///
1077	/// This intrinsic corresponds to the <c> PAND </c> instruction.
1078	///
1079	/// \param __m1
1080	/// A 64-bit integer vector.
1081	/// \param __m2
1082	/// A 64-bit integer vector.
1083	/// \returns A 64-bit integer vector containing the bitwise AND of both
1084	/// parameters.
1085	static __inline__ __m64 __DEFAULT_FN_ATTRS
1086	_mm_and_si64(__m64 __m1, __m64 __m2)
1087	{
1088	return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
1089	}
1090
1091	/// Performs a bitwise NOT of the first 64-bit integer vector, and then
1092	/// performs a bitwise AND of the intermediate result and the second 64-bit
1093	/// integer vector.
1094	///
1095	/// \headerfile <x86intrin.h>
1096	///
1097	/// This intrinsic corresponds to the <c> PANDN </c> instruction.
1098	///
1099	/// \param __m1
1100	/// A 64-bit integer vector. The one's complement of this parameter is used
1101	/// in the bitwise AND.
1102	/// \param __m2
1103	/// A 64-bit integer vector.
1104	/// \returns A 64-bit integer vector containing the bitwise AND of the second
1105	/// parameter and the one's complement of the first parameter.
1106	static __inline__ __m64 __DEFAULT_FN_ATTRS
1107	_mm_andnot_si64(__m64 __m1, __m64 __m2)
1108	{
1109	return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
1110	}
1111
1112	/// Performs a bitwise OR of two 64-bit integer vectors.
1113	///
1114	/// \headerfile <x86intrin.h>
1115	///
1116	/// This intrinsic corresponds to the <c> POR </c> instruction.
1117	///
1118	/// \param __m1
1119	/// A 64-bit integer vector.
1120	/// \param __m2
1121	/// A 64-bit integer vector.
1122	/// \returns A 64-bit integer vector containing the bitwise OR of both
1123	/// parameters.
1124	static __inline__ __m64 __DEFAULT_FN_ATTRS
1125	_mm_or_si64(__m64 __m1, __m64 __m2)
1126	{
1127	return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
1128	}
1129
1130	/// Performs a bitwise exclusive OR of two 64-bit integer vectors.
1131	///
1132	/// \headerfile <x86intrin.h>
1133	///
1134	/// This intrinsic corresponds to the <c> PXOR </c> instruction.
1135	///
1136	/// \param __m1
1137	/// A 64-bit integer vector.
1138	/// \param __m2
1139	/// A 64-bit integer vector.
1140	/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
1141	/// parameters.
1142	static __inline__ __m64 __DEFAULT_FN_ATTRS
1143	_mm_xor_si64(__m64 __m1, __m64 __m2)
1144	{
1145	return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
1146	}
1147
1148	/// Compares the 8-bit integer elements of two 64-bit integer vectors of
1149	/// [8 x i8] to determine if the element of the first vector is equal to the
1150	/// corresponding element of the second vector.
1151	///
1152	/// The comparison yields 0 for false, 0xFF for true.
1153	///
1154	/// \headerfile <x86intrin.h>
1155	///
1156	/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.
1157	///
1158	/// \param __m1
1159	/// A 64-bit integer vector of [8 x i8].
1160	/// \param __m2
1161	/// A 64-bit integer vector of [8 x i8].
1162	/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1163	/// results.
1164	static __inline__ __m64 __DEFAULT_FN_ATTRS
1165	_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
1166	{
1167	return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
1168	}
1169
1170	/// Compares the 16-bit integer elements of two 64-bit integer vectors of
1171	/// [4 x i16] to determine if the element of the first vector is equal to the
1172	/// corresponding element of the second vector.
1173	///
1174	/// The comparison yields 0 for false, 0xFFFF for true.
1175	///
1176	/// \headerfile <x86intrin.h>
1177	///
1178	/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.
1179	///
1180	/// \param __m1
1181	/// A 64-bit integer vector of [4 x i16].
1182	/// \param __m2
1183	/// A 64-bit integer vector of [4 x i16].
1184	/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1185	/// results.
1186	static __inline__ __m64 __DEFAULT_FN_ATTRS
1187	_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
1188	{
1189	return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
1190	}
1191
1192	/// Compares the 32-bit integer elements of two 64-bit integer vectors of
1193	/// [2 x i32] to determine if the element of the first vector is equal to the
1194	/// corresponding element of the second vector.
1195	///
1196	/// The comparison yields 0 for false, 0xFFFFFFFF for true.
1197	///
1198	/// \headerfile <x86intrin.h>
1199	///
1200	/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.
1201	///
1202	/// \param __m1
1203	/// A 64-bit integer vector of [2 x i32].
1204	/// \param __m2
1205	/// A 64-bit integer vector of [2 x i32].
1206	/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1207	/// results.
1208	static __inline__ __m64 __DEFAULT_FN_ATTRS
1209	_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
1210	{
1211	return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
1212	}
1213
1214	/// Compares the 8-bit integer elements of two 64-bit integer vectors of
1215	/// [8 x i8] to determine if the element of the first vector is greater than
1216	/// the corresponding element of the second vector.
1217	///
1218	/// The comparison yields 0 for false, 0xFF for true.
1219	///
1220	/// \headerfile <x86intrin.h>
1221	///
1222	/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.
1223	///
1224	/// \param __m1
1225	/// A 64-bit integer vector of [8 x i8].
1226	/// \param __m2
1227	/// A 64-bit integer vector of [8 x i8].
1228	/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1229	/// results.
1230	static __inline__ __m64 __DEFAULT_FN_ATTRS
1231	_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
1232	{
1233	return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
1234	}
1235
1236	/// Compares the 16-bit integer elements of two 64-bit integer vectors of
1237	/// [4 x i16] to determine if the element of the first vector is greater than
1238	/// the corresponding element of the second vector.
1239	///
1240	/// The comparison yields 0 for false, 0xFFFF for true.
1241	///
1242	/// \headerfile <x86intrin.h>
1243	///
1244	/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.
1245	///
1246	/// \param __m1
1247	/// A 64-bit integer vector of [4 x i16].
1248	/// \param __m2
1249	/// A 64-bit integer vector of [4 x i16].
1250	/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1251	/// results.
1252	static __inline__ __m64 __DEFAULT_FN_ATTRS
1253	_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
1254	{
1255	return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
1256	}
1257
1258	/// Compares the 32-bit integer elements of two 64-bit integer vectors of
1259	/// [2 x i32] to determine if the element of the first vector is greater than
1260	/// the corresponding element of the second vector.
1261	///
1262	/// The comparison yields 0 for false, 0xFFFFFFFF for true.
1263	///
1264	/// \headerfile <x86intrin.h>
1265	///
1266	/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.
1267	///
1268	/// \param __m1
1269	/// A 64-bit integer vector of [2 x i32].
1270	/// \param __m2
1271	/// A 64-bit integer vector of [2 x i32].
1272	/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1273	/// results.
1274	static __inline__ __m64 __DEFAULT_FN_ATTRS
1275	_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
1276	{
1277	return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
1278	}
1279
1280	/// Constructs a 64-bit integer vector initialized to zero.
1281	///
1282	/// \headerfile <x86intrin.h>
1283	///
1284	/// This intrinsic corresponds to the <c> PXOR </c> instruction.
1285	///
1286	/// \returns An initialized 64-bit integer vector with all elements set to zero.
1287	static __inline__ __m64 __DEFAULT_FN_ATTRS
1288	_mm_setzero_si64(void)
1289	{
1290	return __extension__ (__m64){ 0LL };
1291	}
1292
1293	/// Constructs a 64-bit integer vector initialized with the specified
1294	/// 32-bit integer values.
1295	///
1296	/// \headerfile <x86intrin.h>
1297	///
1298	/// This intrinsic is a utility function and does not correspond to a specific
1299	/// instruction.
1300	///
1301	/// \param __i1
1302	/// A 32-bit integer value used to initialize the upper 32 bits of the
1303	/// result.
1304	/// \param __i0
1305	/// A 32-bit integer value used to initialize the lower 32 bits of the
1306	/// result.
1307	/// \returns An initialized 64-bit integer vector.
1308	static __inline__ __m64 __DEFAULT_FN_ATTRS
1309	_mm_set_pi32(int __i1, int __i0)
1310	{
1311	return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
1312	}
1313
1314	/// Constructs a 64-bit integer vector initialized with the specified
1315	/// 16-bit integer values.
1316	///
1317	/// \headerfile <x86intrin.h>
1318	///
1319	/// This intrinsic is a utility function and does not correspond to a specific
1320	/// instruction.
1321	///
1322	/// \param __s3
1323	/// A 16-bit integer value used to initialize bits [63:48] of the result.
1324	/// \param __s2
1325	/// A 16-bit integer value used to initialize bits [47:32] of the result.
1326	/// \param __s1
1327	/// A 16-bit integer value used to initialize bits [31:16] of the result.
1328	/// \param __s0
1329	/// A 16-bit integer value used to initialize bits [15:0] of the result.
1330	/// \returns An initialized 64-bit integer vector.
1331	static __inline__ __m64 __DEFAULT_FN_ATTRS
1332	_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
1333	{
1334	return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
1335	}
1336
1337	/// Constructs a 64-bit integer vector initialized with the specified
1338	/// 8-bit integer values.
1339	///
1340	/// \headerfile <x86intrin.h>
1341	///
1342	/// This intrinsic is a utility function and does not correspond to a specific
1343	/// instruction.
1344	///
1345	/// \param __b7
1346	/// An 8-bit integer value used to initialize bits [63:56] of the result.
1347	/// \param __b6
1348	/// An 8-bit integer value used to initialize bits [55:48] of the result.
1349	/// \param __b5
1350	/// An 8-bit integer value used to initialize bits [47:40] of the result.
1351	/// \param __b4
1352	/// An 8-bit integer value used to initialize bits [39:32] of the result.
1353	/// \param __b3
1354	/// An 8-bit integer value used to initialize bits [31:24] of the result.
1355	/// \param __b2
1356	/// An 8-bit integer value used to initialize bits [23:16] of the result.
1357	/// \param __b1
1358	/// An 8-bit integer value used to initialize bits [15:8] of the result.
1359	/// \param __b0
1360	/// An 8-bit integer value used to initialize bits [7:0] of the result.
1361	/// \returns An initialized 64-bit integer vector.
1362	static __inline__ __m64 __DEFAULT_FN_ATTRS
1363	_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
1364	char __b1, char __b0)
1365	{
1366	return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
1367	__b4, __b5, __b6, __b7);
1368	}
1369
1370	/// Constructs a 64-bit integer vector of [2 x i32], with each of the
1371	/// 32-bit integer vector elements set to the specified 32-bit integer
1372	/// value.
1373	///
1374	/// \headerfile <x86intrin.h>
1375	///
1376	/// This intrinsic is a utility function and does not correspond to a specific
1377	/// instruction.
1378	///
1379	/// \param __i
1380	/// A 32-bit integer value used to initialize each vector element of the
1381	/// result.
1382	/// \returns An initialized 64-bit integer vector of [2 x i32].
1383	static __inline__ __m64 __DEFAULT_FN_ATTRS
1384	_mm_set1_pi32(int __i)
1385	{
1386	return _mm_set_pi32(__i, __i);
1387	}
1388
1389	/// Constructs a 64-bit integer vector of [4 x i16], with each of the
1390	/// 16-bit integer vector elements set to the specified 16-bit integer
1391	/// value.
1392	///
1393	/// \headerfile <x86intrin.h>
1394	///
1395	/// This intrinsic is a utility function and does not correspond to a specific
1396	/// instruction.
1397	///
1398	/// \param __w
1399	/// A 16-bit integer value used to initialize each vector element of the
1400	/// result.
1401	/// \returns An initialized 64-bit integer vector of [4 x i16].
1402	static __inline__ __m64 __DEFAULT_FN_ATTRS
1403	_mm_set1_pi16(short __w)
1404	{
1405	return _mm_set_pi16(__w, __w, __w, __w);
1406	}
1407
1408	/// Constructs a 64-bit integer vector of [8 x i8], with each of the
1409	/// 8-bit integer vector elements set to the specified 8-bit integer value.
1410	///
1411	/// \headerfile <x86intrin.h>
1412	///
1413	/// This intrinsic is a utility function and does not correspond to a specific
1414	/// instruction.
1415	///
1416	/// \param __b
1417	/// An 8-bit integer value used to initialize each vector element of the
1418	/// result.
1419	/// \returns An initialized 64-bit integer vector of [8 x i8].
1420	static __inline__ __m64 __DEFAULT_FN_ATTRS
1421	_mm_set1_pi8(char __b)
1422	{
1423	return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
1424	}
1425
1426	/// Constructs a 64-bit integer vector, initialized in reverse order with
1427	/// the specified 32-bit integer values.
1428	///
1429	/// \headerfile <x86intrin.h>
1430	///
1431	/// This intrinsic is a utility function and does not correspond to a specific
1432	/// instruction.
1433	///
1434	/// \param __i0
1435	/// A 32-bit integer value used to initialize the lower 32 bits of the
1436	/// result.
1437	/// \param __i1
1438	/// A 32-bit integer value used to initialize the upper 32 bits of the
1439	/// result.
1440	/// \returns An initialized 64-bit integer vector.
1441	static __inline__ __m64 __DEFAULT_FN_ATTRS
1442	_mm_setr_pi32(int __i0, int __i1)
1443	{
1444	return _mm_set_pi32(__i1, __i0);
1445	}
1446
1447	/// Constructs a 64-bit integer vector, initialized in reverse order with
1448	/// the specified 16-bit integer values.
1449	///
1450	/// \headerfile <x86intrin.h>
1451	///
1452	/// This intrinsic is a utility function and does not correspond to a specific
1453	/// instruction.
1454	///
1455	/// \param __w0
1456	/// A 16-bit integer value used to initialize bits [15:0] of the result.
1457	/// \param __w1
1458	/// A 16-bit integer value used to initialize bits [31:16] of the result.
1459	/// \param __w2
1460	/// A 16-bit integer value used to initialize bits [47:32] of the result.
1461	/// \param __w3
1462	/// A 16-bit integer value used to initialize bits [63:48] of the result.
1463	/// \returns An initialized 64-bit integer vector.
1464	static __inline__ __m64 __DEFAULT_FN_ATTRS
1465	_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
1466	{
1467	return _mm_set_pi16(__w3, __w2, __w1, __w0);
1468	}
1469
1470	/// Constructs a 64-bit integer vector, initialized in reverse order with
1471	/// the specified 8-bit integer values.
1472	///
1473	/// \headerfile <x86intrin.h>
1474	///
1475	/// This intrinsic is a utility function and does not correspond to a specific
1476	/// instruction.
1477	///
1478	/// \param __b0
1479	/// An 8-bit integer value used to initialize bits [7:0] of the result.
1480	/// \param __b1
1481	/// An 8-bit integer value used to initialize bits [15:8] of the result.
1482	/// \param __b2
1483	/// An 8-bit integer value used to initialize bits [23:16] of the result.
1484	/// \param __b3
1485	/// An 8-bit integer value used to initialize bits [31:24] of the result.
1486	/// \param __b4
1487	/// An 8-bit integer value used to initialize bits [39:32] of the result.
1488	/// \param __b5
1489	/// An 8-bit integer value used to initialize bits [47:40] of the result.
1490	/// \param __b6
1491	/// An 8-bit integer value used to initialize bits [55:48] of the result.
1492	/// \param __b7
1493	/// An 8-bit integer value used to initialize bits [63:56] of the result.
1494	/// \returns An initialized 64-bit integer vector.
1495	static __inline__ __m64 __DEFAULT_FN_ATTRS
1496	_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
1497	char __b6, char __b7)
1498	{
1499	return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
1500	}
1501
1502	#undef __DEFAULT_FN_ATTRS
1503
1504	/* Aliases for compatibility. */
1505	#define _m_empty _mm_empty
1506	#define _m_from_int _mm_cvtsi32_si64
1507	#define _m_from_int64 _mm_cvtsi64_m64
1508	#define _m_to_int _mm_cvtsi64_si32
1509	#define _m_to_int64 _mm_cvtm64_si64
1510	#define _m_packsswb _mm_packs_pi16
1511	#define _m_packssdw _mm_packs_pi32
1512	#define _m_packuswb _mm_packs_pu16
1513	#define _m_punpckhbw _mm_unpackhi_pi8
1514	#define _m_punpckhwd _mm_unpackhi_pi16
1515	#define _m_punpckhdq _mm_unpackhi_pi32
1516	#define _m_punpcklbw _mm_unpacklo_pi8
1517	#define _m_punpcklwd _mm_unpacklo_pi16
1518	#define _m_punpckldq _mm_unpacklo_pi32
1519	#define _m_paddb _mm_add_pi8
1520	#define _m_paddw _mm_add_pi16
1521	#define _m_paddd _mm_add_pi32
1522	#define _m_paddsb _mm_adds_pi8
1523	#define _m_paddsw _mm_adds_pi16
1524	#define _m_paddusb _mm_adds_pu8
1525	#define _m_paddusw _mm_adds_pu16
1526	#define _m_psubb _mm_sub_pi8
1527	#define _m_psubw _mm_sub_pi16
1528	#define _m_psubd _mm_sub_pi32
1529	#define _m_psubsb _mm_subs_pi8
1530	#define _m_psubsw _mm_subs_pi16
1531	#define _m_psubusb _mm_subs_pu8
1532	#define _m_psubusw _mm_subs_pu16
1533	#define _m_pmaddwd _mm_madd_pi16
1534	#define _m_pmulhw _mm_mulhi_pi16
1535	#define _m_pmullw _mm_mullo_pi16
1536	#define _m_psllw _mm_sll_pi16
1537	#define _m_psllwi _mm_slli_pi16
1538	#define _m_pslld _mm_sll_pi32
1539	#define _m_pslldi _mm_slli_pi32
1540	#define _m_psllq _mm_sll_si64
1541	#define _m_psllqi _mm_slli_si64
1542	#define _m_psraw _mm_sra_pi16
1543	#define _m_psrawi _mm_srai_pi16
1544	#define _m_psrad _mm_sra_pi32
1545	#define _m_psradi _mm_srai_pi32
1546	#define _m_psrlw _mm_srl_pi16
1547	#define _m_psrlwi _mm_srli_pi16
1548	#define _m_psrld _mm_srl_pi32
1549	#define _m_psrldi _mm_srli_pi32
1550	#define _m_psrlq _mm_srl_si64
1551	#define _m_psrlqi _mm_srli_si64
1552	#define _m_pand _mm_and_si64
1553	#define _m_pandn _mm_andnot_si64
1554	#define _m_por _mm_or_si64
1555	#define _m_pxor _mm_xor_si64
1556	#define _m_pcmpeqb _mm_cmpeq_pi8
1557	#define _m_pcmpeqw _mm_cmpeq_pi16
1558	#define _m_pcmpeqd _mm_cmpeq_pi32
1559	#define _m_pcmpgtb _mm_cmpgt_pi8
1560	#define _m_pcmpgtw _mm_cmpgt_pi16
1561	#define _m_pcmpgtd _mm_cmpgt_pi32
1562
1563	#endif /* __MMINTRIN_H */
1564
1565

Warning: This file is not a C or C++ file. It does not have highlighting.

source code of clang/lib/Headers/mmintrin.h