Warning: This file is not a C or C++ file. It does not have highlighting.
1 | /*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------=== |
---|---|
2 | * |
3 | * |
4 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | * See https://llvm.org/LICENSE.txt for license information. |
6 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | * |
8 | *===-----------------------------------------------------------------------=== |
9 | */ |
10 | #ifndef __IMMINTRIN_H |
11 | #error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead." |
12 | #endif |
13 | |
14 | #ifndef __AVX512VBMI2INTRIN_H |
15 | #define __AVX512VBMI2INTRIN_H |
16 | |
17 | /* Define the default attributes for the functions in this file. */ |
18 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2,evex512"), __min_vector_width__(512))) |
19 | |
20 | |
21 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
22 | _mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) |
23 | { |
24 | return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, |
25 | (__v32hi) __S, |
26 | __U); |
27 | } |
28 | |
29 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
30 | _mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) |
31 | { |
32 | return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, |
33 | (__v32hi) _mm512_setzero_si512(), |
34 | __U); |
35 | } |
36 | |
37 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
38 | _mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) |
39 | { |
40 | return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, |
41 | (__v64qi) __S, |
42 | __U); |
43 | } |
44 | |
45 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
46 | _mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) |
47 | { |
48 | return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, |
49 | (__v64qi) _mm512_setzero_si512(), |
50 | __U); |
51 | } |
52 | |
53 | static __inline__ void __DEFAULT_FN_ATTRS |
54 | _mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) |
55 | { |
56 | __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D, |
57 | __U); |
58 | } |
59 | |
60 | static __inline__ void __DEFAULT_FN_ATTRS |
61 | _mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) |
62 | { |
63 | __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D, |
64 | __U); |
65 | } |
66 | |
67 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
68 | _mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) |
69 | { |
70 | return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, |
71 | (__v32hi) __S, |
72 | __U); |
73 | } |
74 | |
75 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
76 | _mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) |
77 | { |
78 | return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, |
79 | (__v32hi) _mm512_setzero_si512(), |
80 | __U); |
81 | } |
82 | |
83 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
84 | _mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) |
85 | { |
86 | return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, |
87 | (__v64qi) __S, |
88 | __U); |
89 | } |
90 | |
91 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
92 | _mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) |
93 | { |
94 | return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, |
95 | (__v64qi) _mm512_setzero_si512(), |
96 | __U); |
97 | } |
98 | |
99 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
100 | _mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P) |
101 | { |
102 | return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, |
103 | (__v32hi) __S, |
104 | __U); |
105 | } |
106 | |
107 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
108 | _mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) |
109 | { |
110 | return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, |
111 | (__v32hi) _mm512_setzero_si512(), |
112 | __U); |
113 | } |
114 | |
115 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
116 | _mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P) |
117 | { |
118 | return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, |
119 | (__v64qi) __S, |
120 | __U); |
121 | } |
122 | |
123 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
124 | _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) |
125 | { |
126 | return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, |
127 | (__v64qi) _mm512_setzero_si512(), |
128 | __U); |
129 | } |
130 | |
131 | #define _mm512_shldi_epi64(A, B, I) \ |
132 | ((__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \ |
133 | (__v8di)(__m512i)(B), (int)(I))) |
134 | |
135 | #define _mm512_mask_shldi_epi64(S, U, A, B, I) \ |
136 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
137 | (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ |
138 | (__v8di)(__m512i)(S))) |
139 | |
140 | #define _mm512_maskz_shldi_epi64(U, A, B, I) \ |
141 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
142 | (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ |
143 | (__v8di)_mm512_setzero_si512())) |
144 | |
145 | #define _mm512_shldi_epi32(A, B, I) \ |
146 | ((__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \ |
147 | (__v16si)(__m512i)(B), (int)(I))) |
148 | |
149 | #define _mm512_mask_shldi_epi32(S, U, A, B, I) \ |
150 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
151 | (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ |
152 | (__v16si)(__m512i)(S))) |
153 | |
154 | #define _mm512_maskz_shldi_epi32(U, A, B, I) \ |
155 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
156 | (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ |
157 | (__v16si)_mm512_setzero_si512())) |
158 | |
159 | #define _mm512_shldi_epi16(A, B, I) \ |
160 | ((__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \ |
161 | (__v32hi)(__m512i)(B), (int)(I))) |
162 | |
163 | #define _mm512_mask_shldi_epi16(S, U, A, B, I) \ |
164 | ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ |
165 | (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ |
166 | (__v32hi)(__m512i)(S))) |
167 | |
168 | #define _mm512_maskz_shldi_epi16(U, A, B, I) \ |
169 | ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ |
170 | (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ |
171 | (__v32hi)_mm512_setzero_si512())) |
172 | |
173 | #define _mm512_shrdi_epi64(A, B, I) \ |
174 | ((__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \ |
175 | (__v8di)(__m512i)(B), (int)(I))) |
176 | |
177 | #define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ |
178 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
179 | (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ |
180 | (__v8di)(__m512i)(S))) |
181 | |
182 | #define _mm512_maskz_shrdi_epi64(U, A, B, I) \ |
183 | ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ |
184 | (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ |
185 | (__v8di)_mm512_setzero_si512())) |
186 | |
187 | #define _mm512_shrdi_epi32(A, B, I) \ |
188 | ((__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \ |
189 | (__v16si)(__m512i)(B), (int)(I))) |
190 | |
191 | #define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ |
192 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
193 | (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ |
194 | (__v16si)(__m512i)(S))) |
195 | |
196 | #define _mm512_maskz_shrdi_epi32(U, A, B, I) \ |
197 | ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ |
198 | (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ |
199 | (__v16si)_mm512_setzero_si512())) |
200 | |
201 | #define _mm512_shrdi_epi16(A, B, I) \ |
202 | ((__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \ |
203 | (__v32hi)(__m512i)(B), (int)(I))) |
204 | |
205 | #define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ |
206 | ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ |
207 | (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ |
208 | (__v32hi)(__m512i)(S))) |
209 | |
210 | #define _mm512_maskz_shrdi_epi16(U, A, B, I) \ |
211 | ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ |
212 | (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ |
213 | (__v32hi)_mm512_setzero_si512())) |
214 | |
215 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
216 | _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) |
217 | { |
218 | return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B, |
219 | (__v8di)__C); |
220 | } |
221 | |
222 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
223 | _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) |
224 | { |
225 | return (__m512i)__builtin_ia32_selectq_512(__U, |
226 | (__v8di)_mm512_shldv_epi64(__A, __B, __C), |
227 | (__v8di)__A); |
228 | } |
229 | |
230 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
231 | _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) |
232 | { |
233 | return (__m512i)__builtin_ia32_selectq_512(__U, |
234 | (__v8di)_mm512_shldv_epi64(__A, __B, __C), |
235 | (__v8di)_mm512_setzero_si512()); |
236 | } |
237 | |
238 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
239 | _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) |
240 | { |
241 | return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B, |
242 | (__v16si)__C); |
243 | } |
244 | |
245 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
246 | _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) |
247 | { |
248 | return (__m512i)__builtin_ia32_selectd_512(__U, |
249 | (__v16si)_mm512_shldv_epi32(__A, __B, __C), |
250 | (__v16si)__A); |
251 | } |
252 | |
253 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
254 | _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) |
255 | { |
256 | return (__m512i)__builtin_ia32_selectd_512(__U, |
257 | (__v16si)_mm512_shldv_epi32(__A, __B, __C), |
258 | (__v16si)_mm512_setzero_si512()); |
259 | } |
260 | |
261 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
262 | _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) |
263 | { |
264 | return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B, |
265 | (__v32hi)__C); |
266 | } |
267 | |
268 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
269 | _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) |
270 | { |
271 | return (__m512i)__builtin_ia32_selectw_512(__U, |
272 | (__v32hi)_mm512_shldv_epi16(__A, __B, __C), |
273 | (__v32hi)__A); |
274 | } |
275 | |
276 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
277 | _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) |
278 | { |
279 | return (__m512i)__builtin_ia32_selectw_512(__U, |
280 | (__v32hi)_mm512_shldv_epi16(__A, __B, __C), |
281 | (__v32hi)_mm512_setzero_si512()); |
282 | } |
283 | |
284 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
285 | _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) |
286 | { |
287 | return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B, |
288 | (__v8di)__C); |
289 | } |
290 | |
291 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
292 | _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) |
293 | { |
294 | return (__m512i)__builtin_ia32_selectq_512(__U, |
295 | (__v8di)_mm512_shrdv_epi64(__A, __B, __C), |
296 | (__v8di)__A); |
297 | } |
298 | |
299 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
300 | _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) |
301 | { |
302 | return (__m512i)__builtin_ia32_selectq_512(__U, |
303 | (__v8di)_mm512_shrdv_epi64(__A, __B, __C), |
304 | (__v8di)_mm512_setzero_si512()); |
305 | } |
306 | |
307 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
308 | _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) |
309 | { |
310 | return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B, |
311 | (__v16si)__C); |
312 | } |
313 | |
314 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
315 | _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) |
316 | { |
317 | return (__m512i) __builtin_ia32_selectd_512(__U, |
318 | (__v16si)_mm512_shrdv_epi32(__A, __B, __C), |
319 | (__v16si)__A); |
320 | } |
321 | |
322 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
323 | _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) |
324 | { |
325 | return (__m512i) __builtin_ia32_selectd_512(__U, |
326 | (__v16si)_mm512_shrdv_epi32(__A, __B, __C), |
327 | (__v16si)_mm512_setzero_si512()); |
328 | } |
329 | |
330 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
331 | _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) |
332 | { |
333 | return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B, |
334 | (__v32hi)__C); |
335 | } |
336 | |
337 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
338 | _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) |
339 | { |
340 | return (__m512i)__builtin_ia32_selectw_512(__U, |
341 | (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), |
342 | (__v32hi)__A); |
343 | } |
344 | |
345 | static __inline__ __m512i __DEFAULT_FN_ATTRS |
346 | _mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) |
347 | { |
348 | return (__m512i)__builtin_ia32_selectw_512(__U, |
349 | (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), |
350 | (__v32hi)_mm512_setzero_si512()); |
351 | } |
352 | |
353 | |
354 | #undef __DEFAULT_FN_ATTRS |
355 | |
356 | #endif |
357 | |
358 |
Warning: This file is not a C or C++ file. It does not have highlighting.