tmmintrin.h source code [clang/lib/Headers/tmmintrin.h]

Warning: This file is not a C or C++ file. It does not have highlighting.

1	/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2	*
3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	* See https://llvm.org/LICENSE.txt for license information.
5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	*
7	*===-----------------------------------------------------------------------===
8	*/
9
10	#ifndef __TMMINTRIN_H
11	#define __TMMINTRIN_H
12
13	#if !defined(__i386__) && !defined(__x86_64__)
14	#error "This header is only meant to be used on x86 and x64 architecture"
15	#endif
16
17	#include <pmmintrin.h>
18
19	/* Define the default attributes for the functions in this file. */
20	#define __DEFAULT_FN_ATTRS \
21	__attribute__((__always_inline__, __nodebug__, \
22	__target__("ssse3,no-evex512"), __min_vector_width__(64)))
23	#define __DEFAULT_FN_ATTRS_MMX \
24	__attribute__((__always_inline__, __nodebug__, \
25	__target__("mmx,ssse3,no-evex512"), \
26	__min_vector_width__(64)))
27
28	/// Computes the absolute value of each of the packed 8-bit signed
29	/// integers in the source operand and stores the 8-bit unsigned integer
30	/// results in the destination.
31	///
32	/// \headerfile <x86intrin.h>
33	///
34	/// This intrinsic corresponds to the \c PABSB instruction.
35	///
36	/// \param __a
37	/// A 64-bit vector of [8 x i8].
38	/// \returns A 64-bit integer vector containing the absolute values of the
39	/// elements in the operand.
40	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
41	_mm_abs_pi8(__m64 __a)
42	{
43	return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
44	}
45
46	/// Computes the absolute value of each of the packed 8-bit signed
47	/// integers in the source operand and stores the 8-bit unsigned integer
48	/// results in the destination.
49	///
50	/// \headerfile <x86intrin.h>
51	///
52	/// This intrinsic corresponds to the \c VPABSB instruction.
53	///
54	/// \param __a
55	/// A 128-bit vector of [16 x i8].
56	/// \returns A 128-bit integer vector containing the absolute values of the
57	/// elements in the operand.
58	static __inline__ __m128i __DEFAULT_FN_ATTRS
59	_mm_abs_epi8(__m128i __a)
60	{
61	return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
62	}
63
64	/// Computes the absolute value of each of the packed 16-bit signed
65	/// integers in the source operand and stores the 16-bit unsigned integer
66	/// results in the destination.
67	///
68	/// \headerfile <x86intrin.h>
69	///
70	/// This intrinsic corresponds to the \c PABSW instruction.
71	///
72	/// \param __a
73	/// A 64-bit vector of [4 x i16].
74	/// \returns A 64-bit integer vector containing the absolute values of the
75	/// elements in the operand.
76	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
77	_mm_abs_pi16(__m64 __a)
78	{
79	return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
80	}
81
82	/// Computes the absolute value of each of the packed 16-bit signed
83	/// integers in the source operand and stores the 16-bit unsigned integer
84	/// results in the destination.
85	///
86	/// \headerfile <x86intrin.h>
87	///
88	/// This intrinsic corresponds to the \c VPABSW instruction.
89	///
90	/// \param __a
91	/// A 128-bit vector of [8 x i16].
92	/// \returns A 128-bit integer vector containing the absolute values of the
93	/// elements in the operand.
94	static __inline__ __m128i __DEFAULT_FN_ATTRS
95	_mm_abs_epi16(__m128i __a)
96	{
97	return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
98	}
99
100	/// Computes the absolute value of each of the packed 32-bit signed
101	/// integers in the source operand and stores the 32-bit unsigned integer
102	/// results in the destination.
103	///
104	/// \headerfile <x86intrin.h>
105	///
106	/// This intrinsic corresponds to the \c PABSD instruction.
107	///
108	/// \param __a
109	/// A 64-bit vector of [2 x i32].
110	/// \returns A 64-bit integer vector containing the absolute values of the
111	/// elements in the operand.
112	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
113	_mm_abs_pi32(__m64 __a)
114	{
115	return (__m64)__builtin_ia32_pabsd((__v2si)__a);
116	}
117
118	/// Computes the absolute value of each of the packed 32-bit signed
119	/// integers in the source operand and stores the 32-bit unsigned integer
120	/// results in the destination.
121	///
122	/// \headerfile <x86intrin.h>
123	///
124	/// This intrinsic corresponds to the \c VPABSD instruction.
125	///
126	/// \param __a
127	/// A 128-bit vector of [4 x i32].
128	/// \returns A 128-bit integer vector containing the absolute values of the
129	/// elements in the operand.
130	static __inline__ __m128i __DEFAULT_FN_ATTRS
131	_mm_abs_epi32(__m128i __a)
132	{
133	return (__m128i)__builtin_elementwise_abs((__v4si)__a);
134	}
135
136	/// Concatenates the two 128-bit integer vector operands, and
137	/// right-shifts the result by the number of bytes specified in the immediate
138	/// operand.
139	///
140	/// \headerfile <x86intrin.h>
141	///
142	/// \code
143	/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
144	/// \endcode
145	///
146	/// This intrinsic corresponds to the \c PALIGNR instruction.
147	///
148	/// \param a
149	/// A 128-bit vector of [16 x i8] containing one of the source operands.
150	/// \param b
151	/// A 128-bit vector of [16 x i8] containing one of the source operands.
152	/// \param n
153	/// An immediate operand specifying how many bytes to right-shift the result.
154	/// \returns A 128-bit integer vector containing the concatenated right-shifted
155	/// value.
156	#define _mm_alignr_epi8(a, b, n) \
157	((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
158	(__v16qi)(__m128i)(b), (n)))
159
160	/// Concatenates the two 64-bit integer vector operands, and right-shifts
161	/// the result by the number of bytes specified in the immediate operand.
162	///
163	/// \headerfile <x86intrin.h>
164	///
165	/// \code
166	/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
167	/// \endcode
168	///
169	/// This intrinsic corresponds to the \c PALIGNR instruction.
170	///
171	/// \param a
172	/// A 64-bit vector of [8 x i8] containing one of the source operands.
173	/// \param b
174	/// A 64-bit vector of [8 x i8] containing one of the source operands.
175	/// \param n
176	/// An immediate operand specifying how many bytes to right-shift the result.
177	/// \returns A 64-bit integer vector containing the concatenated right-shifted
178	/// value.
179	#define _mm_alignr_pi8(a, b, n) \
180	((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
181
182	/// Horizontally adds the adjacent pairs of values contained in 2 packed
183	/// 128-bit vectors of [8 x i16].
184	///
185	/// \headerfile <x86intrin.h>
186	///
187	/// This intrinsic corresponds to the \c VPHADDW instruction.
188	///
189	/// \param __a
190	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
191	/// horizontal sums of the values are stored in the lower bits of the
192	/// destination.
193	/// \param __b
194	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
195	/// horizontal sums of the values are stored in the upper bits of the
196	/// destination.
197	/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
198	/// both operands.
199	static __inline__ __m128i __DEFAULT_FN_ATTRS
200	_mm_hadd_epi16(__m128i __a, __m128i __b)
201	{
202	return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
203	}
204
205	/// Horizontally adds the adjacent pairs of values contained in 2 packed
206	/// 128-bit vectors of [4 x i32].
207	///
208	/// \headerfile <x86intrin.h>
209	///
210	/// This intrinsic corresponds to the \c VPHADDD instruction.
211	///
212	/// \param __a
213	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
214	/// horizontal sums of the values are stored in the lower bits of the
215	/// destination.
216	/// \param __b
217	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
218	/// horizontal sums of the values are stored in the upper bits of the
219	/// destination.
220	/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
221	/// both operands.
222	static __inline__ __m128i __DEFAULT_FN_ATTRS
223	_mm_hadd_epi32(__m128i __a, __m128i __b)
224	{
225	return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
226	}
227
228	/// Horizontally adds the adjacent pairs of values contained in 2 packed
229	/// 64-bit vectors of [4 x i16].
230	///
231	/// \headerfile <x86intrin.h>
232	///
233	/// This intrinsic corresponds to the \c PHADDW instruction.
234	///
235	/// \param __a
236	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
237	/// horizontal sums of the values are stored in the lower bits of the
238	/// destination.
239	/// \param __b
240	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
241	/// horizontal sums of the values are stored in the upper bits of the
242	/// destination.
243	/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
244	/// operands.
245	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
246	_mm_hadd_pi16(__m64 __a, __m64 __b)
247	{
248	return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
249	}
250
251	/// Horizontally adds the adjacent pairs of values contained in 2 packed
252	/// 64-bit vectors of [2 x i32].
253	///
254	/// \headerfile <x86intrin.h>
255	///
256	/// This intrinsic corresponds to the \c PHADDD instruction.
257	///
258	/// \param __a
259	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
260	/// horizontal sums of the values are stored in the lower bits of the
261	/// destination.
262	/// \param __b
263	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
264	/// horizontal sums of the values are stored in the upper bits of the
265	/// destination.
266	/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
267	/// operands.
268	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
269	_mm_hadd_pi32(__m64 __a, __m64 __b)
270	{
271	return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
272	}
273
274	/// Horizontally adds the adjacent pairs of values contained in 2 packed
275	/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
276	/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
277	/// 0x8000.
278	///
279	/// \headerfile <x86intrin.h>
280	///
281	/// This intrinsic corresponds to the \c VPHADDSW instruction.
282	///
283	/// \param __a
284	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
285	/// horizontal sums of the values are stored in the lower bits of the
286	/// destination.
287	/// \param __b
288	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
289	/// horizontal sums of the values are stored in the upper bits of the
290	/// destination.
291	/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
292	/// sums of both operands.
293	static __inline__ __m128i __DEFAULT_FN_ATTRS
294	_mm_hadds_epi16(__m128i __a, __m128i __b)
295	{
296	return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
297	}
298
299	/// Horizontally adds the adjacent pairs of values contained in 2 packed
300	/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
301	/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
302	/// 0x8000.
303	///
304	/// \headerfile <x86intrin.h>
305	///
306	/// This intrinsic corresponds to the \c PHADDSW instruction.
307	///
308	/// \param __a
309	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
310	/// horizontal sums of the values are stored in the lower bits of the
311	/// destination.
312	/// \param __b
313	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
314	/// horizontal sums of the values are stored in the upper bits of the
315	/// destination.
316	/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
317	/// sums of both operands.
318	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
319	_mm_hadds_pi16(__m64 __a, __m64 __b)
320	{
321	return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
322	}
323
324	/// Horizontally subtracts the adjacent pairs of values contained in 2
325	/// packed 128-bit vectors of [8 x i16].
326	///
327	/// \headerfile <x86intrin.h>
328	///
329	/// This intrinsic corresponds to the \c VPHSUBW instruction.
330	///
331	/// \param __a
332	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
333	/// horizontal differences between the values are stored in the lower bits of
334	/// the destination.
335	/// \param __b
336	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
337	/// horizontal differences between the values are stored in the upper bits of
338	/// the destination.
339	/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
340	/// of both operands.
341	static __inline__ __m128i __DEFAULT_FN_ATTRS
342	_mm_hsub_epi16(__m128i __a, __m128i __b)
343	{
344	return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
345	}
346
347	/// Horizontally subtracts the adjacent pairs of values contained in 2
348	/// packed 128-bit vectors of [4 x i32].
349	///
350	/// \headerfile <x86intrin.h>
351	///
352	/// This intrinsic corresponds to the \c VPHSUBD instruction.
353	///
354	/// \param __a
355	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
356	/// horizontal differences between the values are stored in the lower bits of
357	/// the destination.
358	/// \param __b
359	/// A 128-bit vector of [4 x i32] containing one of the source operands. The
360	/// horizontal differences between the values are stored in the upper bits of
361	/// the destination.
362	/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
363	/// of both operands.
364	static __inline__ __m128i __DEFAULT_FN_ATTRS
365	_mm_hsub_epi32(__m128i __a, __m128i __b)
366	{
367	return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
368	}
369
370	/// Horizontally subtracts the adjacent pairs of values contained in 2
371	/// packed 64-bit vectors of [4 x i16].
372	///
373	/// \headerfile <x86intrin.h>
374	///
375	/// This intrinsic corresponds to the \c PHSUBW instruction.
376	///
377	/// \param __a
378	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
379	/// horizontal differences between the values are stored in the lower bits of
380	/// the destination.
381	/// \param __b
382	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
383	/// horizontal differences between the values are stored in the upper bits of
384	/// the destination.
385	/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
386	/// of both operands.
387	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
388	_mm_hsub_pi16(__m64 __a, __m64 __b)
389	{
390	return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
391	}
392
393	/// Horizontally subtracts the adjacent pairs of values contained in 2
394	/// packed 64-bit vectors of [2 x i32].
395	///
396	/// \headerfile <x86intrin.h>
397	///
398	/// This intrinsic corresponds to the \c PHSUBD instruction.
399	///
400	/// \param __a
401	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
402	/// horizontal differences between the values are stored in the lower bits of
403	/// the destination.
404	/// \param __b
405	/// A 64-bit vector of [2 x i32] containing one of the source operands. The
406	/// horizontal differences between the values are stored in the upper bits of
407	/// the destination.
408	/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
409	/// of both operands.
410	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
411	_mm_hsub_pi32(__m64 __a, __m64 __b)
412	{
413	return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
414	}
415
416	/// Horizontally subtracts the adjacent pairs of values contained in 2
417	/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
418	/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
419	/// saturated to 0x8000.
420	///
421	/// \headerfile <x86intrin.h>
422	///
423	/// This intrinsic corresponds to the \c VPHSUBSW instruction.
424	///
425	/// \param __a
426	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
427	/// horizontal differences between the values are stored in the lower bits of
428	/// the destination.
429	/// \param __b
430	/// A 128-bit vector of [8 x i16] containing one of the source operands. The
431	/// horizontal differences between the values are stored in the upper bits of
432	/// the destination.
433	/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
434	/// differences of both operands.
435	static __inline__ __m128i __DEFAULT_FN_ATTRS
436	_mm_hsubs_epi16(__m128i __a, __m128i __b)
437	{
438	return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
439	}
440
441	/// Horizontally subtracts the adjacent pairs of values contained in 2
442	/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
443	/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
444	/// saturated to 0x8000.
445	///
446	/// \headerfile <x86intrin.h>
447	///
448	/// This intrinsic corresponds to the \c PHSUBSW instruction.
449	///
450	/// \param __a
451	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
452	/// horizontal differences between the values are stored in the lower bits of
453	/// the destination.
454	/// \param __b
455	/// A 64-bit vector of [4 x i16] containing one of the source operands. The
456	/// horizontal differences between the values are stored in the upper bits of
457	/// the destination.
458	/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
459	/// differences of both operands.
460	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
461	_mm_hsubs_pi16(__m64 __a, __m64 __b)
462	{
463	return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
464	}
465
466	/// Multiplies corresponding pairs of packed 8-bit unsigned integer
467	/// values contained in the first source operand and packed 8-bit signed
468	/// integer values contained in the second source operand, adds pairs of
469	/// contiguous products with signed saturation, and writes the 16-bit sums to
470	/// the corresponding bits in the destination.
471	///
472	/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
473	/// both operands are multiplied, and the sum of both results is written to
474	/// bits [15:0] of the destination.
475	///
476	/// \headerfile <x86intrin.h>
477	///
478	/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
479	///
480	/// \param __a
481	/// A 128-bit integer vector containing the first source operand.
482	/// \param __b
483	/// A 128-bit integer vector containing the second source operand.
484	/// \returns A 128-bit integer vector containing the sums of products of both
485	/// operands: \n
486	/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
487	/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
488	/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
489	/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
490	/// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
491	/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
492	/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
493	/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
494	static __inline__ __m128i __DEFAULT_FN_ATTRS
495	_mm_maddubs_epi16(__m128i __a, __m128i __b)
496	{
497	return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
498	}
499
500	/// Multiplies corresponding pairs of packed 8-bit unsigned integer
501	/// values contained in the first source operand and packed 8-bit signed
502	/// integer values contained in the second source operand, adds pairs of
503	/// contiguous products with signed saturation, and writes the 16-bit sums to
504	/// the corresponding bits in the destination.
505	///
506	/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
507	/// both operands are multiplied, and the sum of both results is written to
508	/// bits [15:0] of the destination.
509	///
510	/// \headerfile <x86intrin.h>
511	///
512	/// This intrinsic corresponds to the \c PMADDUBSW instruction.
513	///
514	/// \param __a
515	/// A 64-bit integer vector containing the first source operand.
516	/// \param __b
517	/// A 64-bit integer vector containing the second source operand.
518	/// \returns A 64-bit integer vector containing the sums of products of both
519	/// operands: \n
520	/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
521	/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
522	/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
523	/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
524	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
525	_mm_maddubs_pi16(__m64 __a, __m64 __b)
526	{
527	return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
528	}
529
530	/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
531	/// products to the 18 most significant bits by right-shifting, rounds the
532	/// truncated value by adding 1, and writes bits [16:1] to the destination.
533	///
534	/// \headerfile <x86intrin.h>
535	///
536	/// This intrinsic corresponds to the \c VPMULHRSW instruction.
537	///
538	/// \param __a
539	/// A 128-bit vector of [8 x i16] containing one of the source operands.
540	/// \param __b
541	/// A 128-bit vector of [8 x i16] containing one of the source operands.
542	/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
543	/// products of both operands.
544	static __inline__ __m128i __DEFAULT_FN_ATTRS
545	_mm_mulhrs_epi16(__m128i __a, __m128i __b)
546	{
547	return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
548	}
549
550	/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
551	/// products to the 18 most significant bits by right-shifting, rounds the
552	/// truncated value by adding 1, and writes bits [16:1] to the destination.
553	///
554	/// \headerfile <x86intrin.h>
555	///
556	/// This intrinsic corresponds to the \c PMULHRSW instruction.
557	///
558	/// \param __a
559	/// A 64-bit vector of [4 x i16] containing one of the source operands.
560	/// \param __b
561	/// A 64-bit vector of [4 x i16] containing one of the source operands.
562	/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
563	/// products of both operands.
564	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
565	_mm_mulhrs_pi16(__m64 __a, __m64 __b)
566	{
567	return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
568	}
569
570	/// Copies the 8-bit integers from a 128-bit integer vector to the
571	/// destination or clears 8-bit values in the destination, as specified by
572	/// the second source operand.
573	///
574	/// \headerfile <x86intrin.h>
575	///
576	/// This intrinsic corresponds to the \c VPSHUFB instruction.
577	///
578	/// \param __a
579	/// A 128-bit integer vector containing the values to be copied.
580	/// \param __b
581	/// A 128-bit integer vector containing control bytes corresponding to
582	/// positions in the destination:
583	/// Bit 7: \n
584	/// 1: Clear the corresponding byte in the destination. \n
585	/// 0: Copy the selected source byte to the corresponding byte in the
586	/// destination. \n
587	/// Bits [6:4] Reserved. \n
588	/// Bits [3:0] select the source byte to be copied.
589	/// \returns A 128-bit integer vector containing the copied or cleared values.
590	static __inline__ __m128i __DEFAULT_FN_ATTRS
591	_mm_shuffle_epi8(__m128i __a, __m128i __b)
592	{
593	return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
594	}
595
596	/// Copies the 8-bit integers from a 64-bit integer vector to the
597	/// destination or clears 8-bit values in the destination, as specified by
598	/// the second source operand.
599	///
600	/// \headerfile <x86intrin.h>
601	///
602	/// This intrinsic corresponds to the \c PSHUFB instruction.
603	///
604	/// \param __a
605	/// A 64-bit integer vector containing the values to be copied.
606	/// \param __b
607	/// A 64-bit integer vector containing control bytes corresponding to
608	/// positions in the destination:
609	/// Bit 7: \n
610	/// 1: Clear the corresponding byte in the destination. \n
611	/// 0: Copy the selected source byte to the corresponding byte in the
612	/// destination. \n
613	/// Bits [3:0] select the source byte to be copied.
614	/// \returns A 64-bit integer vector containing the copied or cleared values.
615	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
616	_mm_shuffle_pi8(__m64 __a, __m64 __b)
617	{
618	return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
619	}
620
621	/// For each 8-bit integer in the first source operand, perform one of
622	/// the following actions as specified by the second source operand.
623	///
624	/// If the byte in the second source is negative, calculate the two's
625	/// complement of the corresponding byte in the first source, and write that
626	/// value to the destination. If the byte in the second source is positive,
627	/// copy the corresponding byte from the first source to the destination. If
628	/// the byte in the second source is zero, clear the corresponding byte in
629	/// the destination.
630	///
631	/// \headerfile <x86intrin.h>
632	///
633	/// This intrinsic corresponds to the \c VPSIGNB instruction.
634	///
635	/// \param __a
636	/// A 128-bit integer vector containing the values to be copied.
637	/// \param __b
638	/// A 128-bit integer vector containing control bytes corresponding to
639	/// positions in the destination.
640	/// \returns A 128-bit integer vector containing the resultant values.
641	static __inline__ __m128i __DEFAULT_FN_ATTRS
642	_mm_sign_epi8(__m128i __a, __m128i __b)
643	{
644	return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
645	}
646
647	/// For each 16-bit integer in the first source operand, perform one of
648	/// the following actions as specified by the second source operand.
649	///
650	/// If the word in the second source is negative, calculate the two's
651	/// complement of the corresponding word in the first source, and write that
652	/// value to the destination. If the word in the second source is positive,
653	/// copy the corresponding word from the first source to the destination. If
654	/// the word in the second source is zero, clear the corresponding word in
655	/// the destination.
656	///
657	/// \headerfile <x86intrin.h>
658	///
659	/// This intrinsic corresponds to the \c VPSIGNW instruction.
660	///
661	/// \param __a
662	/// A 128-bit integer vector containing the values to be copied.
663	/// \param __b
664	/// A 128-bit integer vector containing control words corresponding to
665	/// positions in the destination.
666	/// \returns A 128-bit integer vector containing the resultant values.
667	static __inline__ __m128i __DEFAULT_FN_ATTRS
668	_mm_sign_epi16(__m128i __a, __m128i __b)
669	{
670	return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
671	}
672
673	/// For each 32-bit integer in the first source operand, perform one of
674	/// the following actions as specified by the second source operand.
675	///
676	/// If the doubleword in the second source is negative, calculate the two's
677	/// complement of the corresponding word in the first source, and write that
678	/// value to the destination. If the doubleword in the second source is
679	/// positive, copy the corresponding word from the first source to the
680	/// destination. If the doubleword in the second source is zero, clear the
681	/// corresponding word in the destination.
682	///
683	/// \headerfile <x86intrin.h>
684	///
685	/// This intrinsic corresponds to the \c VPSIGND instruction.
686	///
687	/// \param __a
688	/// A 128-bit integer vector containing the values to be copied.
689	/// \param __b
690	/// A 128-bit integer vector containing control doublewords corresponding to
691	/// positions in the destination.
692	/// \returns A 128-bit integer vector containing the resultant values.
693	static __inline__ __m128i __DEFAULT_FN_ATTRS
694	_mm_sign_epi32(__m128i __a, __m128i __b)
695	{
696	return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
697	}
698
699	/// For each 8-bit integer in the first source operand, perform one of
700	/// the following actions as specified by the second source operand.
701	///
702	/// If the byte in the second source is negative, calculate the two's
703	/// complement of the corresponding byte in the first source, and write that
704	/// value to the destination. If the byte in the second source is positive,
705	/// copy the corresponding byte from the first source to the destination. If
706	/// the byte in the second source is zero, clear the corresponding byte in
707	/// the destination.
708	///
709	/// \headerfile <x86intrin.h>
710	///
711	/// This intrinsic corresponds to the \c PSIGNB instruction.
712	///
713	/// \param __a
714	/// A 64-bit integer vector containing the values to be copied.
715	/// \param __b
716	/// A 64-bit integer vector containing control bytes corresponding to
717	/// positions in the destination.
718	/// \returns A 64-bit integer vector containing the resultant values.
719	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
720	_mm_sign_pi8(__m64 __a, __m64 __b)
721	{
722	return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
723	}
724
725	/// For each 16-bit integer in the first source operand, perform one of
726	/// the following actions as specified by the second source operand.
727	///
728	/// If the word in the second source is negative, calculate the two's
729	/// complement of the corresponding word in the first source, and write that
730	/// value to the destination. If the word in the second source is positive,
731	/// copy the corresponding word from the first source to the destination. If
732	/// the word in the second source is zero, clear the corresponding word in
733	/// the destination.
734	///
735	/// \headerfile <x86intrin.h>
736	///
737	/// This intrinsic corresponds to the \c PSIGNW instruction.
738	///
739	/// \param __a
740	/// A 64-bit integer vector containing the values to be copied.
741	/// \param __b
742	/// A 64-bit integer vector containing control words corresponding to
743	/// positions in the destination.
744	/// \returns A 64-bit integer vector containing the resultant values.
745	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
746	_mm_sign_pi16(__m64 __a, __m64 __b)
747	{
748	return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
749	}
750
751	/// For each 32-bit integer in the first source operand, perform one of
752	/// the following actions as specified by the second source operand.
753	///
754	/// If the doubleword in the second source is negative, calculate the two's
755	/// complement of the corresponding doubleword in the first source, and
756	/// write that value to the destination. If the doubleword in the second
757	/// source is positive, copy the corresponding doubleword from the first
758	/// source to the destination. If the doubleword in the second source is
759	/// zero, clear the corresponding doubleword in the destination.
760	///
761	/// \headerfile <x86intrin.h>
762	///
763	/// This intrinsic corresponds to the \c PSIGND instruction.
764	///
765	/// \param __a
766	/// A 64-bit integer vector containing the values to be copied.
767	/// \param __b
768	/// A 64-bit integer vector containing two control doublewords corresponding
769	/// to positions in the destination.
770	/// \returns A 64-bit integer vector containing the resultant values.
771	static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
772	_mm_sign_pi32(__m64 __a, __m64 __b)
773	{
774	return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
775	}
776
777	#undef __DEFAULT_FN_ATTRS
778	#undef __DEFAULT_FN_ATTRS_MMX
779
780	#endif /* __TMMINTRIN_H */
781

Warning: This file is not a C or C++ file. It does not have highlighting.

source code of clang/lib/Headers/tmmintrin.h