Warning: This file is not a C or C++ file. It does not have highlighting.

1/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __TMMINTRIN_H
11#define __TMMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17#include <pmmintrin.h>
18
19/* Define the default attributes for the functions in this file. */
20#define __DEFAULT_FN_ATTRS \
21 __attribute__((__always_inline__, __nodebug__, \
22 __target__("ssse3,no-evex512"), __min_vector_width__(64)))
23#define __DEFAULT_FN_ATTRS_MMX \
24 __attribute__((__always_inline__, __nodebug__, \
25 __target__("mmx,ssse3,no-evex512"), \
26 __min_vector_width__(64)))
27
28/// Computes the absolute value of each of the packed 8-bit signed
29/// integers in the source operand and stores the 8-bit unsigned integer
30/// results in the destination.
31///
32/// \headerfile <x86intrin.h>
33///
34/// This intrinsic corresponds to the \c PABSB instruction.
35///
36/// \param __a
37/// A 64-bit vector of [8 x i8].
38/// \returns A 64-bit integer vector containing the absolute values of the
39/// elements in the operand.
40static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
41_mm_abs_pi8(__m64 __a)
42{
43 return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
44}
45
46/// Computes the absolute value of each of the packed 8-bit signed
47/// integers in the source operand and stores the 8-bit unsigned integer
48/// results in the destination.
49///
50/// \headerfile <x86intrin.h>
51///
52/// This intrinsic corresponds to the \c VPABSB instruction.
53///
54/// \param __a
55/// A 128-bit vector of [16 x i8].
56/// \returns A 128-bit integer vector containing the absolute values of the
57/// elements in the operand.
58static __inline__ __m128i __DEFAULT_FN_ATTRS
59_mm_abs_epi8(__m128i __a)
60{
61 return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
62}
63
64/// Computes the absolute value of each of the packed 16-bit signed
65/// integers in the source operand and stores the 16-bit unsigned integer
66/// results in the destination.
67///
68/// \headerfile <x86intrin.h>
69///
70/// This intrinsic corresponds to the \c PABSW instruction.
71///
72/// \param __a
73/// A 64-bit vector of [4 x i16].
74/// \returns A 64-bit integer vector containing the absolute values of the
75/// elements in the operand.
76static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
77_mm_abs_pi16(__m64 __a)
78{
79 return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
80}
81
82/// Computes the absolute value of each of the packed 16-bit signed
83/// integers in the source operand and stores the 16-bit unsigned integer
84/// results in the destination.
85///
86/// \headerfile <x86intrin.h>
87///
88/// This intrinsic corresponds to the \c VPABSW instruction.
89///
90/// \param __a
91/// A 128-bit vector of [8 x i16].
92/// \returns A 128-bit integer vector containing the absolute values of the
93/// elements in the operand.
94static __inline__ __m128i __DEFAULT_FN_ATTRS
95_mm_abs_epi16(__m128i __a)
96{
97 return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
98}
99
100/// Computes the absolute value of each of the packed 32-bit signed
101/// integers in the source operand and stores the 32-bit unsigned integer
102/// results in the destination.
103///
104/// \headerfile <x86intrin.h>
105///
106/// This intrinsic corresponds to the \c PABSD instruction.
107///
108/// \param __a
109/// A 64-bit vector of [2 x i32].
110/// \returns A 64-bit integer vector containing the absolute values of the
111/// elements in the operand.
112static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
113_mm_abs_pi32(__m64 __a)
114{
115 return (__m64)__builtin_ia32_pabsd((__v2si)__a);
116}
117
118/// Computes the absolute value of each of the packed 32-bit signed
119/// integers in the source operand and stores the 32-bit unsigned integer
120/// results in the destination.
121///
122/// \headerfile <x86intrin.h>
123///
124/// This intrinsic corresponds to the \c VPABSD instruction.
125///
126/// \param __a
127/// A 128-bit vector of [4 x i32].
128/// \returns A 128-bit integer vector containing the absolute values of the
129/// elements in the operand.
130static __inline__ __m128i __DEFAULT_FN_ATTRS
131_mm_abs_epi32(__m128i __a)
132{
133 return (__m128i)__builtin_elementwise_abs((__v4si)__a);
134}
135
136/// Concatenates the two 128-bit integer vector operands, and
137/// right-shifts the result by the number of bytes specified in the immediate
138/// operand.
139///
140/// \headerfile <x86intrin.h>
141///
142/// \code
143/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
144/// \endcode
145///
146/// This intrinsic corresponds to the \c PALIGNR instruction.
147///
148/// \param a
149/// A 128-bit vector of [16 x i8] containing one of the source operands.
150/// \param b
151/// A 128-bit vector of [16 x i8] containing one of the source operands.
152/// \param n
153/// An immediate operand specifying how many bytes to right-shift the result.
154/// \returns A 128-bit integer vector containing the concatenated right-shifted
155/// value.
156#define _mm_alignr_epi8(a, b, n) \
157 ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
158 (__v16qi)(__m128i)(b), (n)))
159
160/// Concatenates the two 64-bit integer vector operands, and right-shifts
161/// the result by the number of bytes specified in the immediate operand.
162///
163/// \headerfile <x86intrin.h>
164///
165/// \code
166/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
167/// \endcode
168///
169/// This intrinsic corresponds to the \c PALIGNR instruction.
170///
171/// \param a
172/// A 64-bit vector of [8 x i8] containing one of the source operands.
173/// \param b
174/// A 64-bit vector of [8 x i8] containing one of the source operands.
175/// \param n
176/// An immediate operand specifying how many bytes to right-shift the result.
177/// \returns A 64-bit integer vector containing the concatenated right-shifted
178/// value.
179#define _mm_alignr_pi8(a, b, n) \
180 ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
181
182/// Horizontally adds the adjacent pairs of values contained in 2 packed
183/// 128-bit vectors of [8 x i16].
184///
185/// \headerfile <x86intrin.h>
186///
187/// This intrinsic corresponds to the \c VPHADDW instruction.
188///
189/// \param __a
190/// A 128-bit vector of [8 x i16] containing one of the source operands. The
191/// horizontal sums of the values are stored in the lower bits of the
192/// destination.
193/// \param __b
194/// A 128-bit vector of [8 x i16] containing one of the source operands. The
195/// horizontal sums of the values are stored in the upper bits of the
196/// destination.
197/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
198/// both operands.
199static __inline__ __m128i __DEFAULT_FN_ATTRS
200_mm_hadd_epi16(__m128i __a, __m128i __b)
201{
202 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
203}
204
205/// Horizontally adds the adjacent pairs of values contained in 2 packed
206/// 128-bit vectors of [4 x i32].
207///
208/// \headerfile <x86intrin.h>
209///
210/// This intrinsic corresponds to the \c VPHADDD instruction.
211///
212/// \param __a
213/// A 128-bit vector of [4 x i32] containing one of the source operands. The
214/// horizontal sums of the values are stored in the lower bits of the
215/// destination.
216/// \param __b
217/// A 128-bit vector of [4 x i32] containing one of the source operands. The
218/// horizontal sums of the values are stored in the upper bits of the
219/// destination.
220/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
221/// both operands.
222static __inline__ __m128i __DEFAULT_FN_ATTRS
223_mm_hadd_epi32(__m128i __a, __m128i __b)
224{
225 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
226}
227
228/// Horizontally adds the adjacent pairs of values contained in 2 packed
229/// 64-bit vectors of [4 x i16].
230///
231/// \headerfile <x86intrin.h>
232///
233/// This intrinsic corresponds to the \c PHADDW instruction.
234///
235/// \param __a
236/// A 64-bit vector of [4 x i16] containing one of the source operands. The
237/// horizontal sums of the values are stored in the lower bits of the
238/// destination.
239/// \param __b
240/// A 64-bit vector of [4 x i16] containing one of the source operands. The
241/// horizontal sums of the values are stored in the upper bits of the
242/// destination.
243/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
244/// operands.
245static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
246_mm_hadd_pi16(__m64 __a, __m64 __b)
247{
248 return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
249}
250
251/// Horizontally adds the adjacent pairs of values contained in 2 packed
252/// 64-bit vectors of [2 x i32].
253///
254/// \headerfile <x86intrin.h>
255///
256/// This intrinsic corresponds to the \c PHADDD instruction.
257///
258/// \param __a
259/// A 64-bit vector of [2 x i32] containing one of the source operands. The
260/// horizontal sums of the values are stored in the lower bits of the
261/// destination.
262/// \param __b
263/// A 64-bit vector of [2 x i32] containing one of the source operands. The
264/// horizontal sums of the values are stored in the upper bits of the
265/// destination.
266/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
267/// operands.
268static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
269_mm_hadd_pi32(__m64 __a, __m64 __b)
270{
271 return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
272}
273
274/// Horizontally adds the adjacent pairs of values contained in 2 packed
275/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
276/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
277/// 0x8000.
278///
279/// \headerfile <x86intrin.h>
280///
281/// This intrinsic corresponds to the \c VPHADDSW instruction.
282///
283/// \param __a
284/// A 128-bit vector of [8 x i16] containing one of the source operands. The
285/// horizontal sums of the values are stored in the lower bits of the
286/// destination.
287/// \param __b
288/// A 128-bit vector of [8 x i16] containing one of the source operands. The
289/// horizontal sums of the values are stored in the upper bits of the
290/// destination.
291/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
292/// sums of both operands.
293static __inline__ __m128i __DEFAULT_FN_ATTRS
294_mm_hadds_epi16(__m128i __a, __m128i __b)
295{
296 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
297}
298
299/// Horizontally adds the adjacent pairs of values contained in 2 packed
300/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
301/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
302/// 0x8000.
303///
304/// \headerfile <x86intrin.h>
305///
306/// This intrinsic corresponds to the \c PHADDSW instruction.
307///
308/// \param __a
309/// A 64-bit vector of [4 x i16] containing one of the source operands. The
310/// horizontal sums of the values are stored in the lower bits of the
311/// destination.
312/// \param __b
313/// A 64-bit vector of [4 x i16] containing one of the source operands. The
314/// horizontal sums of the values are stored in the upper bits of the
315/// destination.
316/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
317/// sums of both operands.
318static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
319_mm_hadds_pi16(__m64 __a, __m64 __b)
320{
321 return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
322}
323
324/// Horizontally subtracts the adjacent pairs of values contained in 2
325/// packed 128-bit vectors of [8 x i16].
326///
327/// \headerfile <x86intrin.h>
328///
329/// This intrinsic corresponds to the \c VPHSUBW instruction.
330///
331/// \param __a
332/// A 128-bit vector of [8 x i16] containing one of the source operands. The
333/// horizontal differences between the values are stored in the lower bits of
334/// the destination.
335/// \param __b
336/// A 128-bit vector of [8 x i16] containing one of the source operands. The
337/// horizontal differences between the values are stored in the upper bits of
338/// the destination.
339/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
340/// of both operands.
341static __inline__ __m128i __DEFAULT_FN_ATTRS
342_mm_hsub_epi16(__m128i __a, __m128i __b)
343{
344 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
345}
346
347/// Horizontally subtracts the adjacent pairs of values contained in 2
348/// packed 128-bit vectors of [4 x i32].
349///
350/// \headerfile <x86intrin.h>
351///
352/// This intrinsic corresponds to the \c VPHSUBD instruction.
353///
354/// \param __a
355/// A 128-bit vector of [4 x i32] containing one of the source operands. The
356/// horizontal differences between the values are stored in the lower bits of
357/// the destination.
358/// \param __b
359/// A 128-bit vector of [4 x i32] containing one of the source operands. The
360/// horizontal differences between the values are stored in the upper bits of
361/// the destination.
362/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
363/// of both operands.
364static __inline__ __m128i __DEFAULT_FN_ATTRS
365_mm_hsub_epi32(__m128i __a, __m128i __b)
366{
367 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
368}
369
370/// Horizontally subtracts the adjacent pairs of values contained in 2
371/// packed 64-bit vectors of [4 x i16].
372///
373/// \headerfile <x86intrin.h>
374///
375/// This intrinsic corresponds to the \c PHSUBW instruction.
376///
377/// \param __a
378/// A 64-bit vector of [4 x i16] containing one of the source operands. The
379/// horizontal differences between the values are stored in the lower bits of
380/// the destination.
381/// \param __b
382/// A 64-bit vector of [4 x i16] containing one of the source operands. The
383/// horizontal differences between the values are stored in the upper bits of
384/// the destination.
385/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
386/// of both operands.
387static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
388_mm_hsub_pi16(__m64 __a, __m64 __b)
389{
390 return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
391}
392
393/// Horizontally subtracts the adjacent pairs of values contained in 2
394/// packed 64-bit vectors of [2 x i32].
395///
396/// \headerfile <x86intrin.h>
397///
398/// This intrinsic corresponds to the \c PHSUBD instruction.
399///
400/// \param __a
401/// A 64-bit vector of [2 x i32] containing one of the source operands. The
402/// horizontal differences between the values are stored in the lower bits of
403/// the destination.
404/// \param __b
405/// A 64-bit vector of [2 x i32] containing one of the source operands. The
406/// horizontal differences between the values are stored in the upper bits of
407/// the destination.
408/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
409/// of both operands.
410static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
411_mm_hsub_pi32(__m64 __a, __m64 __b)
412{
413 return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
414}
415
416/// Horizontally subtracts the adjacent pairs of values contained in 2
417/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
418/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
419/// saturated to 0x8000.
420///
421/// \headerfile <x86intrin.h>
422///
423/// This intrinsic corresponds to the \c VPHSUBSW instruction.
424///
425/// \param __a
426/// A 128-bit vector of [8 x i16] containing one of the source operands. The
427/// horizontal differences between the values are stored in the lower bits of
428/// the destination.
429/// \param __b
430/// A 128-bit vector of [8 x i16] containing one of the source operands. The
431/// horizontal differences between the values are stored in the upper bits of
432/// the destination.
433/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
434/// differences of both operands.
435static __inline__ __m128i __DEFAULT_FN_ATTRS
436_mm_hsubs_epi16(__m128i __a, __m128i __b)
437{
438 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
439}
440
441/// Horizontally subtracts the adjacent pairs of values contained in 2
442/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
443/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
444/// saturated to 0x8000.
445///
446/// \headerfile <x86intrin.h>
447///
448/// This intrinsic corresponds to the \c PHSUBSW instruction.
449///
450/// \param __a
451/// A 64-bit vector of [4 x i16] containing one of the source operands. The
452/// horizontal differences between the values are stored in the lower bits of
453/// the destination.
454/// \param __b
455/// A 64-bit vector of [4 x i16] containing one of the source operands. The
456/// horizontal differences between the values are stored in the upper bits of
457/// the destination.
458/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
459/// differences of both operands.
460static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
461_mm_hsubs_pi16(__m64 __a, __m64 __b)
462{
463 return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
464}
465
466/// Multiplies corresponding pairs of packed 8-bit unsigned integer
467/// values contained in the first source operand and packed 8-bit signed
468/// integer values contained in the second source operand, adds pairs of
469/// contiguous products with signed saturation, and writes the 16-bit sums to
470/// the corresponding bits in the destination.
471///
472/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
473/// both operands are multiplied, and the sum of both results is written to
474/// bits [15:0] of the destination.
475///
476/// \headerfile <x86intrin.h>
477///
478/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
479///
480/// \param __a
481/// A 128-bit integer vector containing the first source operand.
482/// \param __b
483/// A 128-bit integer vector containing the second source operand.
484/// \returns A 128-bit integer vector containing the sums of products of both
485/// operands: \n
486/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
487/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
488/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
489/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
490/// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
491/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
492/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
493/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
494static __inline__ __m128i __DEFAULT_FN_ATTRS
495_mm_maddubs_epi16(__m128i __a, __m128i __b)
496{
497 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
498}
499
500/// Multiplies corresponding pairs of packed 8-bit unsigned integer
501/// values contained in the first source operand and packed 8-bit signed
502/// integer values contained in the second source operand, adds pairs of
503/// contiguous products with signed saturation, and writes the 16-bit sums to
504/// the corresponding bits in the destination.
505///
506/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
507/// both operands are multiplied, and the sum of both results is written to
508/// bits [15:0] of the destination.
509///
510/// \headerfile <x86intrin.h>
511///
512/// This intrinsic corresponds to the \c PMADDUBSW instruction.
513///
514/// \param __a
515/// A 64-bit integer vector containing the first source operand.
516/// \param __b
517/// A 64-bit integer vector containing the second source operand.
518/// \returns A 64-bit integer vector containing the sums of products of both
519/// operands: \n
520/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
521/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
522/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
523/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
524static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
525_mm_maddubs_pi16(__m64 __a, __m64 __b)
526{
527 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
528}
529
530/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
531/// products to the 18 most significant bits by right-shifting, rounds the
532/// truncated value by adding 1, and writes bits [16:1] to the destination.
533///
534/// \headerfile <x86intrin.h>
535///
536/// This intrinsic corresponds to the \c VPMULHRSW instruction.
537///
538/// \param __a
539/// A 128-bit vector of [8 x i16] containing one of the source operands.
540/// \param __b
541/// A 128-bit vector of [8 x i16] containing one of the source operands.
542/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
543/// products of both operands.
544static __inline__ __m128i __DEFAULT_FN_ATTRS
545_mm_mulhrs_epi16(__m128i __a, __m128i __b)
546{
547 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
548}
549
550/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
551/// products to the 18 most significant bits by right-shifting, rounds the
552/// truncated value by adding 1, and writes bits [16:1] to the destination.
553///
554/// \headerfile <x86intrin.h>
555///
556/// This intrinsic corresponds to the \c PMULHRSW instruction.
557///
558/// \param __a
559/// A 64-bit vector of [4 x i16] containing one of the source operands.
560/// \param __b
561/// A 64-bit vector of [4 x i16] containing one of the source operands.
562/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
563/// products of both operands.
564static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
565_mm_mulhrs_pi16(__m64 __a, __m64 __b)
566{
567 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
568}
569
570/// Copies the 8-bit integers from a 128-bit integer vector to the
571/// destination or clears 8-bit values in the destination, as specified by
572/// the second source operand.
573///
574/// \headerfile <x86intrin.h>
575///
576/// This intrinsic corresponds to the \c VPSHUFB instruction.
577///
578/// \param __a
579/// A 128-bit integer vector containing the values to be copied.
580/// \param __b
581/// A 128-bit integer vector containing control bytes corresponding to
582/// positions in the destination:
583/// Bit 7: \n
584/// 1: Clear the corresponding byte in the destination. \n
585/// 0: Copy the selected source byte to the corresponding byte in the
586/// destination. \n
587/// Bits [6:4] Reserved. \n
588/// Bits [3:0] select the source byte to be copied.
589/// \returns A 128-bit integer vector containing the copied or cleared values.
590static __inline__ __m128i __DEFAULT_FN_ATTRS
591_mm_shuffle_epi8(__m128i __a, __m128i __b)
592{
593 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
594}
595
596/// Copies the 8-bit integers from a 64-bit integer vector to the
597/// destination or clears 8-bit values in the destination, as specified by
598/// the second source operand.
599///
600/// \headerfile <x86intrin.h>
601///
602/// This intrinsic corresponds to the \c PSHUFB instruction.
603///
604/// \param __a
605/// A 64-bit integer vector containing the values to be copied.
606/// \param __b
607/// A 64-bit integer vector containing control bytes corresponding to
608/// positions in the destination:
609/// Bit 7: \n
610/// 1: Clear the corresponding byte in the destination. \n
611/// 0: Copy the selected source byte to the corresponding byte in the
612/// destination. \n
613/// Bits [3:0] select the source byte to be copied.
614/// \returns A 64-bit integer vector containing the copied or cleared values.
615static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
616_mm_shuffle_pi8(__m64 __a, __m64 __b)
617{
618 return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
619}
620
621/// For each 8-bit integer in the first source operand, perform one of
622/// the following actions as specified by the second source operand.
623///
624/// If the byte in the second source is negative, calculate the two's
625/// complement of the corresponding byte in the first source, and write that
626/// value to the destination. If the byte in the second source is positive,
627/// copy the corresponding byte from the first source to the destination. If
628/// the byte in the second source is zero, clear the corresponding byte in
629/// the destination.
630///
631/// \headerfile <x86intrin.h>
632///
633/// This intrinsic corresponds to the \c VPSIGNB instruction.
634///
635/// \param __a
636/// A 128-bit integer vector containing the values to be copied.
637/// \param __b
638/// A 128-bit integer vector containing control bytes corresponding to
639/// positions in the destination.
640/// \returns A 128-bit integer vector containing the resultant values.
641static __inline__ __m128i __DEFAULT_FN_ATTRS
642_mm_sign_epi8(__m128i __a, __m128i __b)
643{
644 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
645}
646
647/// For each 16-bit integer in the first source operand, perform one of
648/// the following actions as specified by the second source operand.
649///
650/// If the word in the second source is negative, calculate the two's
651/// complement of the corresponding word in the first source, and write that
652/// value to the destination. If the word in the second source is positive,
653/// copy the corresponding word from the first source to the destination. If
654/// the word in the second source is zero, clear the corresponding word in
655/// the destination.
656///
657/// \headerfile <x86intrin.h>
658///
659/// This intrinsic corresponds to the \c VPSIGNW instruction.
660///
661/// \param __a
662/// A 128-bit integer vector containing the values to be copied.
663/// \param __b
664/// A 128-bit integer vector containing control words corresponding to
665/// positions in the destination.
666/// \returns A 128-bit integer vector containing the resultant values.
667static __inline__ __m128i __DEFAULT_FN_ATTRS
668_mm_sign_epi16(__m128i __a, __m128i __b)
669{
670 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
671}
672
673/// For each 32-bit integer in the first source operand, perform one of
674/// the following actions as specified by the second source operand.
675///
676/// If the doubleword in the second source is negative, calculate the two's
677/// complement of the corresponding word in the first source, and write that
678/// value to the destination. If the doubleword in the second source is
679/// positive, copy the corresponding word from the first source to the
680/// destination. If the doubleword in the second source is zero, clear the
681/// corresponding word in the destination.
682///
683/// \headerfile <x86intrin.h>
684///
685/// This intrinsic corresponds to the \c VPSIGND instruction.
686///
687/// \param __a
688/// A 128-bit integer vector containing the values to be copied.
689/// \param __b
690/// A 128-bit integer vector containing control doublewords corresponding to
691/// positions in the destination.
692/// \returns A 128-bit integer vector containing the resultant values.
693static __inline__ __m128i __DEFAULT_FN_ATTRS
694_mm_sign_epi32(__m128i __a, __m128i __b)
695{
696 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
697}
698
699/// For each 8-bit integer in the first source operand, perform one of
700/// the following actions as specified by the second source operand.
701///
702/// If the byte in the second source is negative, calculate the two's
703/// complement of the corresponding byte in the first source, and write that
704/// value to the destination. If the byte in the second source is positive,
705/// copy the corresponding byte from the first source to the destination. If
706/// the byte in the second source is zero, clear the corresponding byte in
707/// the destination.
708///
709/// \headerfile <x86intrin.h>
710///
711/// This intrinsic corresponds to the \c PSIGNB instruction.
712///
713/// \param __a
714/// A 64-bit integer vector containing the values to be copied.
715/// \param __b
716/// A 64-bit integer vector containing control bytes corresponding to
717/// positions in the destination.
718/// \returns A 64-bit integer vector containing the resultant values.
719static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
720_mm_sign_pi8(__m64 __a, __m64 __b)
721{
722 return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
723}
724
725/// For each 16-bit integer in the first source operand, perform one of
726/// the following actions as specified by the second source operand.
727///
728/// If the word in the second source is negative, calculate the two's
729/// complement of the corresponding word in the first source, and write that
730/// value to the destination. If the word in the second source is positive,
731/// copy the corresponding word from the first source to the destination. If
732/// the word in the second source is zero, clear the corresponding word in
733/// the destination.
734///
735/// \headerfile <x86intrin.h>
736///
737/// This intrinsic corresponds to the \c PSIGNW instruction.
738///
739/// \param __a
740/// A 64-bit integer vector containing the values to be copied.
741/// \param __b
742/// A 64-bit integer vector containing control words corresponding to
743/// positions in the destination.
744/// \returns A 64-bit integer vector containing the resultant values.
745static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
746_mm_sign_pi16(__m64 __a, __m64 __b)
747{
748 return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
749}
750
751/// For each 32-bit integer in the first source operand, perform one of
752/// the following actions as specified by the second source operand.
753///
754/// If the doubleword in the second source is negative, calculate the two's
755/// complement of the corresponding doubleword in the first source, and
756/// write that value to the destination. If the doubleword in the second
757/// source is positive, copy the corresponding doubleword from the first
758/// source to the destination. If the doubleword in the second source is
759/// zero, clear the corresponding doubleword in the destination.
760///
761/// \headerfile <x86intrin.h>
762///
763/// This intrinsic corresponds to the \c PSIGND instruction.
764///
765/// \param __a
766/// A 64-bit integer vector containing the values to be copied.
767/// \param __b
768/// A 64-bit integer vector containing two control doublewords corresponding
769/// to positions in the destination.
770/// \returns A 64-bit integer vector containing the resultant values.
771static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
772_mm_sign_pi32(__m64 __a, __m64 __b)
773{
774 return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
775}
776
777#undef __DEFAULT_FN_ATTRS
778#undef __DEFAULT_FN_ATTRS_MMX
779
780#endif /* __TMMINTRIN_H */
781

Warning: This file is not a C or C++ file. It does not have highlighting.

source code of clang/lib/Headers/tmmintrin.h