Warning: This file is not a C or C++ file. It does not have highlighting.
1 | /*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------=== |
---|---|
2 | * |
3 | * |
4 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | * See https://llvm.org/LICENSE.txt for license information. |
6 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | * |
8 | *===-----------------------------------------------------------------------=== |
9 | */ |
10 | #ifndef __IMMINTRIN_H |
11 | #error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead." |
12 | #endif |
13 | |
14 | #ifndef __AVX512VLVBMI2INTRIN_H |
15 | #define __AVX512VLVBMI2INTRIN_H |
16 | |
17 | /* Define the default attributes for the functions in this file. */ |
18 | #define __DEFAULT_FN_ATTRS128 \ |
19 | __attribute__((__always_inline__, __nodebug__, \ |
20 | __target__("avx512vl,avx512vbmi2,no-evex512"), \ |
21 | __min_vector_width__(128))) |
22 | #define __DEFAULT_FN_ATTRS256 \ |
23 | __attribute__((__always_inline__, __nodebug__, \ |
24 | __target__("avx512vl,avx512vbmi2,no-evex512"), \ |
25 | __min_vector_width__(256))) |
26 | |
27 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
28 | _mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) |
29 | { |
30 | return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, |
31 | (__v8hi) __S, |
32 | __U); |
33 | } |
34 | |
35 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
36 | _mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) |
37 | { |
38 | return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, |
39 | (__v8hi) _mm_setzero_si128(), |
40 | __U); |
41 | } |
42 | |
43 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
44 | _mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) |
45 | { |
46 | return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, |
47 | (__v16qi) __S, |
48 | __U); |
49 | } |
50 | |
51 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
52 | _mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) |
53 | { |
54 | return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, |
55 | (__v16qi) _mm_setzero_si128(), |
56 | __U); |
57 | } |
58 | |
59 | static __inline__ void __DEFAULT_FN_ATTRS128 |
60 | _mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) |
61 | { |
62 | __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D, |
63 | __U); |
64 | } |
65 | |
66 | static __inline__ void __DEFAULT_FN_ATTRS128 |
67 | _mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) |
68 | { |
69 | __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D, |
70 | __U); |
71 | } |
72 | |
73 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
74 | _mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) |
75 | { |
76 | return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, |
77 | (__v8hi) __S, |
78 | __U); |
79 | } |
80 | |
81 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
82 | _mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) |
83 | { |
84 | return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, |
85 | (__v8hi) _mm_setzero_si128(), |
86 | __U); |
87 | } |
88 | |
89 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
90 | _mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) |
91 | { |
92 | return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, |
93 | (__v16qi) __S, |
94 | __U); |
95 | } |
96 | |
97 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
98 | _mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) |
99 | { |
100 | return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, |
101 | (__v16qi) _mm_setzero_si128(), |
102 | __U); |
103 | } |
104 | |
105 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
106 | _mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) |
107 | { |
108 | return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, |
109 | (__v8hi) __S, |
110 | __U); |
111 | } |
112 | |
113 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
114 | _mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) |
115 | { |
116 | return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, |
117 | (__v8hi) _mm_setzero_si128(), |
118 | __U); |
119 | } |
120 | |
121 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
122 | _mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) |
123 | { |
124 | return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, |
125 | (__v16qi) __S, |
126 | __U); |
127 | } |
128 | |
129 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
130 | _mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) |
131 | { |
132 | return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, |
133 | (__v16qi) _mm_setzero_si128(), |
134 | __U); |
135 | } |
136 | |
137 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
138 | _mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) |
139 | { |
140 | return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, |
141 | (__v16hi) __S, |
142 | __U); |
143 | } |
144 | |
145 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
146 | _mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) |
147 | { |
148 | return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, |
149 | (__v16hi) _mm256_setzero_si256(), |
150 | __U); |
151 | } |
152 | |
153 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
154 | _mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) |
155 | { |
156 | return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, |
157 | (__v32qi) __S, |
158 | __U); |
159 | } |
160 | |
161 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
162 | _mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) |
163 | { |
164 | return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, |
165 | (__v32qi) _mm256_setzero_si256(), |
166 | __U); |
167 | } |
168 | |
169 | static __inline__ void __DEFAULT_FN_ATTRS256 |
170 | _mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) |
171 | { |
172 | __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D, |
173 | __U); |
174 | } |
175 | |
176 | static __inline__ void __DEFAULT_FN_ATTRS256 |
177 | _mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) |
178 | { |
179 | __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D, |
180 | __U); |
181 | } |
182 | |
183 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
184 | _mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) |
185 | { |
186 | return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, |
187 | (__v16hi) __S, |
188 | __U); |
189 | } |
190 | |
191 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
192 | _mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) |
193 | { |
194 | return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, |
195 | (__v16hi) _mm256_setzero_si256(), |
196 | __U); |
197 | } |
198 | |
199 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
200 | _mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) |
201 | { |
202 | return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, |
203 | (__v32qi) __S, |
204 | __U); |
205 | } |
206 | |
207 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
208 | _mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) |
209 | { |
210 | return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, |
211 | (__v32qi) _mm256_setzero_si256(), |
212 | __U); |
213 | } |
214 | |
215 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
216 | _mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P) |
217 | { |
218 | return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, |
219 | (__v16hi) __S, |
220 | __U); |
221 | } |
222 | |
223 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
224 | _mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P) |
225 | { |
226 | return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, |
227 | (__v16hi) _mm256_setzero_si256(), |
228 | __U); |
229 | } |
230 | |
231 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
232 | _mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P) |
233 | { |
234 | return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, |
235 | (__v32qi) __S, |
236 | __U); |
237 | } |
238 | |
239 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
240 | _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) |
241 | { |
242 | return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, |
243 | (__v32qi) _mm256_setzero_si256(), |
244 | __U); |
245 | } |
246 | |
247 | #define _mm256_shldi_epi64(A, B, I) \ |
248 | ((__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \ |
249 | (__v4di)(__m256i)(B), (int)(I))) |
250 | |
251 | #define _mm256_mask_shldi_epi64(S, U, A, B, I) \ |
252 | ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ |
253 | (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ |
254 | (__v4di)(__m256i)(S))) |
255 | |
256 | #define _mm256_maskz_shldi_epi64(U, A, B, I) \ |
257 | ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ |
258 | (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ |
259 | (__v4di)_mm256_setzero_si256())) |
260 | |
261 | #define _mm_shldi_epi64(A, B, I) \ |
262 | ((__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \ |
263 | (__v2di)(__m128i)(B), (int)(I))) |
264 | |
265 | #define _mm_mask_shldi_epi64(S, U, A, B, I) \ |
266 | ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ |
267 | (__v2di)_mm_shldi_epi64((A), (B), (I)), \ |
268 | (__v2di)(__m128i)(S))) |
269 | |
270 | #define _mm_maskz_shldi_epi64(U, A, B, I) \ |
271 | ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ |
272 | (__v2di)_mm_shldi_epi64((A), (B), (I)), \ |
273 | (__v2di)_mm_setzero_si128())) |
274 | |
275 | #define _mm256_shldi_epi32(A, B, I) \ |
276 | ((__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \ |
277 | (__v8si)(__m256i)(B), (int)(I))) |
278 | |
279 | #define _mm256_mask_shldi_epi32(S, U, A, B, I) \ |
280 | ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ |
281 | (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ |
282 | (__v8si)(__m256i)(S))) |
283 | |
284 | #define _mm256_maskz_shldi_epi32(U, A, B, I) \ |
285 | ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ |
286 | (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ |
287 | (__v8si)_mm256_setzero_si256())) |
288 | |
289 | #define _mm_shldi_epi32(A, B, I) \ |
290 | ((__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \ |
291 | (__v4si)(__m128i)(B), (int)(I))) |
292 | |
293 | #define _mm_mask_shldi_epi32(S, U, A, B, I) \ |
294 | ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ |
295 | (__v4si)_mm_shldi_epi32((A), (B), (I)), \ |
296 | (__v4si)(__m128i)(S))) |
297 | |
298 | #define _mm_maskz_shldi_epi32(U, A, B, I) \ |
299 | ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ |
300 | (__v4si)_mm_shldi_epi32((A), (B), (I)), \ |
301 | (__v4si)_mm_setzero_si128())) |
302 | |
303 | #define _mm256_shldi_epi16(A, B, I) \ |
304 | ((__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \ |
305 | (__v16hi)(__m256i)(B), (int)(I))) |
306 | |
307 | #define _mm256_mask_shldi_epi16(S, U, A, B, I) \ |
308 | ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ |
309 | (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ |
310 | (__v16hi)(__m256i)(S))) |
311 | |
312 | #define _mm256_maskz_shldi_epi16(U, A, B, I) \ |
313 | ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ |
314 | (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ |
315 | (__v16hi)_mm256_setzero_si256())) |
316 | |
317 | #define _mm_shldi_epi16(A, B, I) \ |
318 | ((__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \ |
319 | (__v8hi)(__m128i)(B), (int)(I))) |
320 | |
321 | #define _mm_mask_shldi_epi16(S, U, A, B, I) \ |
322 | ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ |
323 | (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ |
324 | (__v8hi)(__m128i)(S))) |
325 | |
326 | #define _mm_maskz_shldi_epi16(U, A, B, I) \ |
327 | ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ |
328 | (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ |
329 | (__v8hi)_mm_setzero_si128())) |
330 | |
331 | #define _mm256_shrdi_epi64(A, B, I) \ |
332 | ((__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \ |
333 | (__v4di)(__m256i)(B), (int)(I))) |
334 | |
335 | #define _mm256_mask_shrdi_epi64(S, U, A, B, I) \ |
336 | ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ |
337 | (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ |
338 | (__v4di)(__m256i)(S))) |
339 | |
340 | #define _mm256_maskz_shrdi_epi64(U, A, B, I) \ |
341 | ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ |
342 | (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ |
343 | (__v4di)_mm256_setzero_si256())) |
344 | |
345 | #define _mm_shrdi_epi64(A, B, I) \ |
346 | ((__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \ |
347 | (__v2di)(__m128i)(B), (int)(I))) |
348 | |
349 | #define _mm_mask_shrdi_epi64(S, U, A, B, I) \ |
350 | ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ |
351 | (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ |
352 | (__v2di)(__m128i)(S))) |
353 | |
354 | #define _mm_maskz_shrdi_epi64(U, A, B, I) \ |
355 | ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ |
356 | (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ |
357 | (__v2di)_mm_setzero_si128())) |
358 | |
359 | #define _mm256_shrdi_epi32(A, B, I) \ |
360 | ((__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \ |
361 | (__v8si)(__m256i)(B), (int)(I))) |
362 | |
363 | #define _mm256_mask_shrdi_epi32(S, U, A, B, I) \ |
364 | ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ |
365 | (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ |
366 | (__v8si)(__m256i)(S))) |
367 | |
368 | #define _mm256_maskz_shrdi_epi32(U, A, B, I) \ |
369 | ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ |
370 | (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ |
371 | (__v8si)_mm256_setzero_si256())) |
372 | |
373 | #define _mm_shrdi_epi32(A, B, I) \ |
374 | ((__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \ |
375 | (__v4si)(__m128i)(B), (int)(I))) |
376 | |
377 | #define _mm_mask_shrdi_epi32(S, U, A, B, I) \ |
378 | ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ |
379 | (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ |
380 | (__v4si)(__m128i)(S))) |
381 | |
382 | #define _mm_maskz_shrdi_epi32(U, A, B, I) \ |
383 | ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ |
384 | (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ |
385 | (__v4si)_mm_setzero_si128())) |
386 | |
387 | #define _mm256_shrdi_epi16(A, B, I) \ |
388 | ((__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \ |
389 | (__v16hi)(__m256i)(B), (int)(I))) |
390 | |
391 | #define _mm256_mask_shrdi_epi16(S, U, A, B, I) \ |
392 | ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ |
393 | (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ |
394 | (__v16hi)(__m256i)(S))) |
395 | |
396 | #define _mm256_maskz_shrdi_epi16(U, A, B, I) \ |
397 | ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ |
398 | (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ |
399 | (__v16hi)_mm256_setzero_si256())) |
400 | |
401 | #define _mm_shrdi_epi16(A, B, I) \ |
402 | ((__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \ |
403 | (__v8hi)(__m128i)(B), (int)(I))) |
404 | |
405 | #define _mm_mask_shrdi_epi16(S, U, A, B, I) \ |
406 | ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ |
407 | (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ |
408 | (__v8hi)(__m128i)(S))) |
409 | |
410 | #define _mm_maskz_shrdi_epi16(U, A, B, I) \ |
411 | ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ |
412 | (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ |
413 | (__v8hi)_mm_setzero_si128())) |
414 | |
415 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
416 | _mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C) |
417 | { |
418 | return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B, |
419 | (__v4di)__C); |
420 | } |
421 | |
422 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
423 | _mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) |
424 | { |
425 | return (__m256i)__builtin_ia32_selectq_256(__U, |
426 | (__v4di)_mm256_shldv_epi64(__A, __B, __C), |
427 | (__v4di)__A); |
428 | } |
429 | |
430 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
431 | _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) |
432 | { |
433 | return (__m256i)__builtin_ia32_selectq_256(__U, |
434 | (__v4di)_mm256_shldv_epi64(__A, __B, __C), |
435 | (__v4di)_mm256_setzero_si256()); |
436 | } |
437 | |
438 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
439 | _mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C) |
440 | { |
441 | return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B, |
442 | (__v2di)__C); |
443 | } |
444 | |
445 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
446 | _mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) |
447 | { |
448 | return (__m128i)__builtin_ia32_selectq_128(__U, |
449 | (__v2di)_mm_shldv_epi64(__A, __B, __C), |
450 | (__v2di)__A); |
451 | } |
452 | |
453 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
454 | _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) |
455 | { |
456 | return (__m128i)__builtin_ia32_selectq_128(__U, |
457 | (__v2di)_mm_shldv_epi64(__A, __B, __C), |
458 | (__v2di)_mm_setzero_si128()); |
459 | } |
460 | |
461 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
462 | _mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C) |
463 | { |
464 | return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B, |
465 | (__v8si)__C); |
466 | } |
467 | |
468 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
469 | _mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) |
470 | { |
471 | return (__m256i)__builtin_ia32_selectd_256(__U, |
472 | (__v8si)_mm256_shldv_epi32(__A, __B, __C), |
473 | (__v8si)__A); |
474 | } |
475 | |
476 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
477 | _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) |
478 | { |
479 | return (__m256i)__builtin_ia32_selectd_256(__U, |
480 | (__v8si)_mm256_shldv_epi32(__A, __B, __C), |
481 | (__v8si)_mm256_setzero_si256()); |
482 | } |
483 | |
484 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
485 | _mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C) |
486 | { |
487 | return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B, |
488 | (__v4si)__C); |
489 | } |
490 | |
491 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
492 | _mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) |
493 | { |
494 | return (__m128i)__builtin_ia32_selectd_128(__U, |
495 | (__v4si)_mm_shldv_epi32(__A, __B, __C), |
496 | (__v4si)__A); |
497 | } |
498 | |
499 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
500 | _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) |
501 | { |
502 | return (__m128i)__builtin_ia32_selectd_128(__U, |
503 | (__v4si)_mm_shldv_epi32(__A, __B, __C), |
504 | (__v4si)_mm_setzero_si128()); |
505 | } |
506 | |
507 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
508 | _mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C) |
509 | { |
510 | return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B, |
511 | (__v16hi)__C); |
512 | } |
513 | |
514 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
515 | _mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) |
516 | { |
517 | return (__m256i)__builtin_ia32_selectw_256(__U, |
518 | (__v16hi)_mm256_shldv_epi16(__A, __B, __C), |
519 | (__v16hi)__A); |
520 | } |
521 | |
522 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
523 | _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) |
524 | { |
525 | return (__m256i)__builtin_ia32_selectw_256(__U, |
526 | (__v16hi)_mm256_shldv_epi16(__A, __B, __C), |
527 | (__v16hi)_mm256_setzero_si256()); |
528 | } |
529 | |
530 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
531 | _mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C) |
532 | { |
533 | return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B, |
534 | (__v8hi)__C); |
535 | } |
536 | |
537 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
538 | _mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) |
539 | { |
540 | return (__m128i)__builtin_ia32_selectw_128(__U, |
541 | (__v8hi)_mm_shldv_epi16(__A, __B, __C), |
542 | (__v8hi)__A); |
543 | } |
544 | |
545 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
546 | _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) |
547 | { |
548 | return (__m128i)__builtin_ia32_selectw_128(__U, |
549 | (__v8hi)_mm_shldv_epi16(__A, __B, __C), |
550 | (__v8hi)_mm_setzero_si128()); |
551 | } |
552 | |
553 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
554 | _mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C) |
555 | { |
556 | return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B, |
557 | (__v4di)__C); |
558 | } |
559 | |
560 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
561 | _mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) |
562 | { |
563 | return (__m256i)__builtin_ia32_selectq_256(__U, |
564 | (__v4di)_mm256_shrdv_epi64(__A, __B, __C), |
565 | (__v4di)__A); |
566 | } |
567 | |
568 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
569 | _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) |
570 | { |
571 | return (__m256i)__builtin_ia32_selectq_256(__U, |
572 | (__v4di)_mm256_shrdv_epi64(__A, __B, __C), |
573 | (__v4di)_mm256_setzero_si256()); |
574 | } |
575 | |
576 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
577 | _mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C) |
578 | { |
579 | return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B, |
580 | (__v2di)__C); |
581 | } |
582 | |
583 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
584 | _mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) |
585 | { |
586 | return (__m128i)__builtin_ia32_selectq_128(__U, |
587 | (__v2di)_mm_shrdv_epi64(__A, __B, __C), |
588 | (__v2di)__A); |
589 | } |
590 | |
591 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
592 | _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) |
593 | { |
594 | return (__m128i)__builtin_ia32_selectq_128(__U, |
595 | (__v2di)_mm_shrdv_epi64(__A, __B, __C), |
596 | (__v2di)_mm_setzero_si128()); |
597 | } |
598 | |
599 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
600 | _mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C) |
601 | { |
602 | return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B, |
603 | (__v8si)__C); |
604 | } |
605 | |
606 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
607 | _mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) |
608 | { |
609 | return (__m256i)__builtin_ia32_selectd_256(__U, |
610 | (__v8si)_mm256_shrdv_epi32(__A, __B, __C), |
611 | (__v8si)__A); |
612 | } |
613 | |
614 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
615 | _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) |
616 | { |
617 | return (__m256i)__builtin_ia32_selectd_256(__U, |
618 | (__v8si)_mm256_shrdv_epi32(__A, __B, __C), |
619 | (__v8si)_mm256_setzero_si256()); |
620 | } |
621 | |
622 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
623 | _mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C) |
624 | { |
625 | return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B, |
626 | (__v4si)__C); |
627 | } |
628 | |
629 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
630 | _mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) |
631 | { |
632 | return (__m128i)__builtin_ia32_selectd_128(__U, |
633 | (__v4si)_mm_shrdv_epi32(__A, __B, __C), |
634 | (__v4si)__A); |
635 | } |
636 | |
637 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
638 | _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) |
639 | { |
640 | return (__m128i)__builtin_ia32_selectd_128(__U, |
641 | (__v4si)_mm_shrdv_epi32(__A, __B, __C), |
642 | (__v4si)_mm_setzero_si128()); |
643 | } |
644 | |
645 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
646 | _mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C) |
647 | { |
648 | return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B, |
649 | (__v16hi)__C); |
650 | } |
651 | |
652 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
653 | _mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) |
654 | { |
655 | return (__m256i)__builtin_ia32_selectw_256(__U, |
656 | (__v16hi)_mm256_shrdv_epi16(__A, __B, __C), |
657 | (__v16hi)__A); |
658 | } |
659 | |
660 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
661 | _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) |
662 | { |
663 | return (__m256i)__builtin_ia32_selectw_256(__U, |
664 | (__v16hi)_mm256_shrdv_epi16(__A, __B, __C), |
665 | (__v16hi)_mm256_setzero_si256()); |
666 | } |
667 | |
668 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
669 | _mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C) |
670 | { |
671 | return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B, |
672 | (__v8hi)__C); |
673 | } |
674 | |
675 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
676 | _mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) |
677 | { |
678 | return (__m128i)__builtin_ia32_selectw_128(__U, |
679 | (__v8hi)_mm_shrdv_epi16(__A, __B, __C), |
680 | (__v8hi)__A); |
681 | } |
682 | |
683 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
684 | _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) |
685 | { |
686 | return (__m128i)__builtin_ia32_selectw_128(__U, |
687 | (__v8hi)_mm_shrdv_epi16(__A, __B, __C), |
688 | (__v8hi)_mm_setzero_si128()); |
689 | } |
690 | |
691 | |
692 | #undef __DEFAULT_FN_ATTRS128 |
693 | #undef __DEFAULT_FN_ATTRS256 |
694 | |
695 | #endif |
696 |
Warning: This file is not a C or C++ file. It does not have highlighting.