Warning: This file is not a C or C++ file. It does not have highlighting.
1 | /*===---- immintrin.h - Intel intrinsics -----------------------------------=== |
---|---|
2 | * |
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | * See https://llvm.org/LICENSE.txt for license information. |
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | * |
7 | *===-----------------------------------------------------------------------=== |
8 | */ |
9 | |
10 | #ifndef __IMMINTRIN_H |
11 | #define __IMMINTRIN_H |
12 | |
13 | #if !defined(__i386__) && !defined(__x86_64__) |
14 | #error "This header is only meant to be used on x86 and x64 architecture" |
15 | #endif |
16 | |
17 | #include <x86gprintrin.h> |
18 | |
19 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
20 | defined(__MMX__) |
21 | #include <mmintrin.h> |
22 | #endif |
23 | |
24 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
25 | defined(__SSE__) |
26 | #include <xmmintrin.h> |
27 | #endif |
28 | |
29 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
30 | defined(__SSE2__) |
31 | #include <emmintrin.h> |
32 | #endif |
33 | |
34 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
35 | defined(__SSE3__) |
36 | #include <pmmintrin.h> |
37 | #endif |
38 | |
39 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
40 | defined(__SSSE3__) |
41 | #include <tmmintrin.h> |
42 | #endif |
43 | |
44 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
45 | (defined(__SSE4_2__) || defined(__SSE4_1__)) |
46 | #include <smmintrin.h> |
47 | #endif |
48 | |
49 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
50 | (defined(__AES__) || defined(__PCLMUL__)) |
51 | #include <wmmintrin.h> |
52 | #endif |
53 | |
54 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
55 | defined(__CLFLUSHOPT__) |
56 | #include <clflushoptintrin.h> |
57 | #endif |
58 | |
59 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
60 | defined(__CLWB__) |
61 | #include <clwbintrin.h> |
62 | #endif |
63 | |
64 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
65 | defined(__AVX__) |
66 | #include <avxintrin.h> |
67 | #endif |
68 | |
69 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
70 | defined(__AVX2__) |
71 | #include <avx2intrin.h> |
72 | #endif |
73 | |
74 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
75 | defined(__F16C__) |
76 | #include <f16cintrin.h> |
77 | #endif |
78 | |
79 | /* No feature check desired due to internal checks */ |
80 | #include <bmiintrin.h> |
81 | |
82 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
83 | defined(__BMI2__) |
84 | #include <bmi2intrin.h> |
85 | #endif |
86 | |
87 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
88 | defined(__LZCNT__) |
89 | #include <lzcntintrin.h> |
90 | #endif |
91 | |
92 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
93 | defined(__POPCNT__) |
94 | #include <popcntintrin.h> |
95 | #endif |
96 | |
97 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
98 | defined(__FMA__) |
99 | #include <fmaintrin.h> |
100 | #endif |
101 | |
102 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
103 | defined(__AVX512F__) |
104 | #include <avx512fintrin.h> |
105 | #endif |
106 | |
107 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
108 | defined(__AVX512VL__) |
109 | #include <avx512vlintrin.h> |
110 | #endif |
111 | |
112 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
113 | defined(__AVX512BW__) |
114 | #include <avx512bwintrin.h> |
115 | #endif |
116 | |
117 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
118 | defined(__AVX512BITALG__) |
119 | #include <avx512bitalgintrin.h> |
120 | #endif |
121 | |
122 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
123 | defined(__AVX512CD__) |
124 | #include <avx512cdintrin.h> |
125 | #endif |
126 | |
127 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
128 | defined(__AVX512VPOPCNTDQ__) |
129 | #include <avx512vpopcntdqintrin.h> |
130 | #endif |
131 | |
132 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
133 | (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) |
134 | #include <avx512vpopcntdqvlintrin.h> |
135 | #endif |
136 | |
137 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
138 | defined(__AVX512VNNI__) |
139 | #include <avx512vnniintrin.h> |
140 | #endif |
141 | |
142 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
143 | (defined(__AVX512VL__) && defined(__AVX512VNNI__)) |
144 | #include <avx512vlvnniintrin.h> |
145 | #endif |
146 | |
147 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
148 | defined(__AVXVNNI__) |
149 | #include <avxvnniintrin.h> |
150 | #endif |
151 | |
152 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
153 | defined(__AVX512DQ__) |
154 | #include <avx512dqintrin.h> |
155 | #endif |
156 | |
157 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
158 | (defined(__AVX512VL__) && defined(__AVX512BITALG__)) |
159 | #include <avx512vlbitalgintrin.h> |
160 | #endif |
161 | |
162 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
163 | (defined(__AVX512VL__) && defined(__AVX512BW__)) |
164 | #include <avx512vlbwintrin.h> |
165 | #endif |
166 | |
167 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
168 | (defined(__AVX512VL__) && defined(__AVX512CD__)) |
169 | #include <avx512vlcdintrin.h> |
170 | #endif |
171 | |
172 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
173 | (defined(__AVX512VL__) && defined(__AVX512DQ__)) |
174 | #include <avx512vldqintrin.h> |
175 | #endif |
176 | |
177 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
178 | defined(__AVX512ER__) |
179 | #include <avx512erintrin.h> |
180 | #endif |
181 | |
182 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
183 | defined(__AVX512IFMA__) |
184 | #include <avx512ifmaintrin.h> |
185 | #endif |
186 | |
187 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
188 | (defined(__AVX512IFMA__) && defined(__AVX512VL__)) |
189 | #include <avx512ifmavlintrin.h> |
190 | #endif |
191 | |
192 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
193 | defined(__AVXIFMA__) |
194 | #include <avxifmaintrin.h> |
195 | #endif |
196 | |
197 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
198 | defined(__AVX512VBMI__) |
199 | #include <avx512vbmiintrin.h> |
200 | #endif |
201 | |
202 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
203 | (defined(__AVX512VBMI__) && defined(__AVX512VL__)) |
204 | #include <avx512vbmivlintrin.h> |
205 | #endif |
206 | |
207 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
208 | defined(__AVX512VBMI2__) |
209 | #include <avx512vbmi2intrin.h> |
210 | #endif |
211 | |
212 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
213 | (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) |
214 | #include <avx512vlvbmi2intrin.h> |
215 | #endif |
216 | |
217 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
218 | defined(__AVX512PF__) |
219 | #include <avx512pfintrin.h> |
220 | #endif |
221 | |
222 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
223 | defined(__AVX512FP16__) |
224 | #include <avx512fp16intrin.h> |
225 | #endif |
226 | |
227 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
228 | (defined(__AVX512VL__) && defined(__AVX512FP16__)) |
229 | #include <avx512vlfp16intrin.h> |
230 | #endif |
231 | |
232 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
233 | defined(__AVX512BF16__) |
234 | #include <avx512bf16intrin.h> |
235 | #endif |
236 | |
237 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
238 | (defined(__AVX512VL__) && defined(__AVX512BF16__)) |
239 | #include <avx512vlbf16intrin.h> |
240 | #endif |
241 | |
242 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
243 | defined(__PKU__) |
244 | #include <pkuintrin.h> |
245 | #endif |
246 | |
247 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
248 | defined(__VPCLMULQDQ__) |
249 | #include <vpclmulqdqintrin.h> |
250 | #endif |
251 | |
252 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
253 | defined(__VAES__) |
254 | #include <vaesintrin.h> |
255 | #endif |
256 | |
257 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
258 | defined(__GFNI__) |
259 | #include <gfniintrin.h> |
260 | #endif |
261 | |
262 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
263 | defined(__AVXVNNIINT8__) |
264 | #include <avxvnniint8intrin.h> |
265 | #endif |
266 | |
267 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
268 | defined(__AVXNECONVERT__) |
269 | #include <avxneconvertintrin.h> |
270 | #endif |
271 | |
272 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
273 | defined(__SHA512__) |
274 | #include <sha512intrin.h> |
275 | #endif |
276 | |
277 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
278 | defined(__SM3__) |
279 | #include <sm3intrin.h> |
280 | #endif |
281 | |
282 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
283 | defined(__SM4__) |
284 | #include <sm4intrin.h> |
285 | #endif |
286 | |
287 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
288 | defined(__AVXVNNIINT16__) |
289 | #include <avxvnniint16intrin.h> |
290 | #endif |
291 | |
292 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
293 | defined(__RDPID__) |
294 | /// Reads the value of the IA32_TSC_AUX MSR (0xc0000103). |
295 | /// |
296 | /// \headerfile <immintrin.h> |
297 | /// |
298 | /// This intrinsic corresponds to the <c> RDPID </c> instruction. |
299 | /// |
300 | /// \returns The 32-bit contents of the MSR. |
301 | static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid"))) |
302 | _rdpid_u32(void) { |
303 | return __builtin_ia32_rdpid(); |
304 | } |
305 | #endif // __RDPID__ |
306 | |
307 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
308 | defined(__RDRND__) |
309 | /// Returns a 16-bit hardware-generated random value. |
310 | /// |
311 | /// \headerfile <immintrin.h> |
312 | /// |
313 | /// This intrinsic corresponds to the <c> RDRAND </c> instruction. |
314 | /// |
315 | /// \param __p |
316 | /// A pointer to a 16-bit memory location to place the random value. |
317 | /// \returns 1 if the value was successfully generated, 0 otherwise. |
318 | static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) |
319 | _rdrand16_step(unsigned short *__p) |
320 | { |
321 | return (int)__builtin_ia32_rdrand16_step(__p); |
322 | } |
323 | |
324 | /// Returns a 32-bit hardware-generated random value. |
325 | /// |
326 | /// \headerfile <immintrin.h> |
327 | /// |
328 | /// This intrinsic corresponds to the <c> RDRAND </c> instruction. |
329 | /// |
330 | /// \param __p |
331 | /// A pointer to a 32-bit memory location to place the random value. |
332 | /// \returns 1 if the value was successfully generated, 0 otherwise. |
333 | static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) |
334 | _rdrand32_step(unsigned int *__p) |
335 | { |
336 | return (int)__builtin_ia32_rdrand32_step(__p); |
337 | } |
338 | |
339 | /// Returns a 64-bit hardware-generated random value. |
340 | /// |
341 | /// \headerfile <immintrin.h> |
342 | /// |
343 | /// This intrinsic corresponds to the <c> RDRAND </c> instruction. |
344 | /// |
345 | /// \param __p |
346 | /// A pointer to a 64-bit memory location to place the random value. |
347 | /// \returns 1 if the value was successfully generated, 0 otherwise. |
348 | static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) |
349 | _rdrand64_step(unsigned long long *__p) |
350 | { |
351 | #ifdef __x86_64__ |
352 | return (int)__builtin_ia32_rdrand64_step(__p); |
353 | #else |
354 | // We need to emulate the functionality of 64-bit rdrand with 2 32-bit |
355 | // rdrand instructions. |
356 | unsigned int __lo, __hi; |
357 | unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo); |
358 | unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi); |
359 | if (__res_lo && __res_hi) { |
360 | *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo; |
361 | return 1; |
362 | } else { |
363 | *__p = 0; |
364 | return 0; |
365 | } |
366 | #endif |
367 | } |
368 | #endif /* __RDRND__ */ |
369 | |
370 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
371 | defined(__FSGSBASE__) |
372 | #ifdef __x86_64__ |
373 | /// Reads the FS base register. |
374 | /// |
375 | /// \headerfile <immintrin.h> |
376 | /// |
377 | /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction. |
378 | /// |
379 | /// \returns The lower 32 bits of the FS base register. |
380 | static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
381 | _readfsbase_u32(void) |
382 | { |
383 | return __builtin_ia32_rdfsbase32(); |
384 | } |
385 | |
386 | /// Reads the FS base register. |
387 | /// |
388 | /// \headerfile <immintrin.h> |
389 | /// |
390 | /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction. |
391 | /// |
392 | /// \returns The contents of the FS base register. |
393 | static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
394 | _readfsbase_u64(void) |
395 | { |
396 | return __builtin_ia32_rdfsbase64(); |
397 | } |
398 | |
399 | /// Reads the GS base register. |
400 | /// |
401 | /// \headerfile <immintrin.h> |
402 | /// |
403 | /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction. |
404 | /// |
405 | /// \returns The lower 32 bits of the GS base register. |
406 | static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
407 | _readgsbase_u32(void) |
408 | { |
409 | return __builtin_ia32_rdgsbase32(); |
410 | } |
411 | |
412 | /// Reads the GS base register. |
413 | /// |
414 | /// \headerfile <immintrin.h> |
415 | /// |
416 | /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction. |
417 | /// |
418 | /// \returns The contents of the GS base register. |
419 | static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
420 | _readgsbase_u64(void) |
421 | { |
422 | return __builtin_ia32_rdgsbase64(); |
423 | } |
424 | |
425 | /// Modifies the FS base register. |
426 | /// |
427 | /// \headerfile <immintrin.h> |
428 | /// |
429 | /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. |
430 | /// |
431 | /// \param __V |
432 | /// Value to use for the lower 32 bits of the FS base register. |
433 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
434 | _writefsbase_u32(unsigned int __V) |
435 | { |
436 | __builtin_ia32_wrfsbase32(__V); |
437 | } |
438 | |
439 | /// Modifies the FS base register. |
440 | /// |
441 | /// \headerfile <immintrin.h> |
442 | /// |
443 | /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. |
444 | /// |
445 | /// \param __V |
446 | /// Value to use for the FS base register. |
447 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
448 | _writefsbase_u64(unsigned long long __V) |
449 | { |
450 | __builtin_ia32_wrfsbase64(__V); |
451 | } |
452 | |
453 | /// Modifies the GS base register. |
454 | /// |
455 | /// \headerfile <immintrin.h> |
456 | /// |
457 | /// This intrinsic corresponds to the <c> WRGSBASE </c> instruction. |
458 | /// |
459 | /// \param __V |
460 | /// Value to use for the lower 32 bits of the GS base register. |
461 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
462 | _writegsbase_u32(unsigned int __V) |
463 | { |
464 | __builtin_ia32_wrgsbase32(__V); |
465 | } |
466 | |
467 | /// Modifies the GS base register. |
468 | /// |
469 | /// \headerfile <immintrin.h> |
470 | /// |
471 | /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction. |
472 | /// |
473 | /// \param __V |
474 | /// Value to use for GS base register. |
475 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) |
476 | _writegsbase_u64(unsigned long long __V) |
477 | { |
478 | __builtin_ia32_wrgsbase64(__V); |
479 | } |
480 | |
481 | #endif |
482 | #endif /* __FSGSBASE__ */ |
483 | |
484 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
485 | defined(__MOVBE__) |
486 | |
487 | /* The structs used below are to force the load/store to be unaligned. This |
488 | * is accomplished with the __packed__ attribute. The __may_alias__ prevents |
489 | * tbaa metadata from being generated based on the struct and the type of the |
490 | * field inside of it. |
491 | */ |
492 | |
493 | /// Load a 16-bit value from memory and swap its bytes. |
494 | /// |
495 | /// \headerfile <x86intrin.h> |
496 | /// |
497 | /// This intrinsic corresponds to the MOVBE instruction. |
498 | /// |
499 | /// \param __P |
500 | /// A pointer to the 16-bit value to load. |
501 | /// \returns The byte-swapped value. |
502 | static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
503 | _loadbe_i16(void const * __P) { |
504 | struct __loadu_i16 { |
505 | unsigned short __v; |
506 | } __attribute__((__packed__, __may_alias__)); |
507 | return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v); |
508 | } |
509 | |
510 | /// Swap the bytes of a 16-bit value and store it to memory. |
511 | /// |
512 | /// \headerfile <x86intrin.h> |
513 | /// |
514 | /// This intrinsic corresponds to the MOVBE instruction. |
515 | /// |
516 | /// \param __P |
517 | /// A pointer to the memory for storing the swapped value. |
518 | /// \param __D |
519 | /// The 16-bit value to be byte-swapped. |
520 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
521 | _storebe_i16(void * __P, short __D) { |
522 | struct __storeu_i16 { |
523 | unsigned short __v; |
524 | } __attribute__((__packed__, __may_alias__)); |
525 | ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D); |
526 | } |
527 | |
528 | /// Load a 32-bit value from memory and swap its bytes. |
529 | /// |
530 | /// \headerfile <x86intrin.h> |
531 | /// |
532 | /// This intrinsic corresponds to the MOVBE instruction. |
533 | /// |
534 | /// \param __P |
535 | /// A pointer to the 32-bit value to load. |
536 | /// \returns The byte-swapped value. |
537 | static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
538 | _loadbe_i32(void const * __P) { |
539 | struct __loadu_i32 { |
540 | unsigned int __v; |
541 | } __attribute__((__packed__, __may_alias__)); |
542 | return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v); |
543 | } |
544 | |
545 | /// Swap the bytes of a 32-bit value and store it to memory. |
546 | /// |
547 | /// \headerfile <x86intrin.h> |
548 | /// |
549 | /// This intrinsic corresponds to the MOVBE instruction. |
550 | /// |
551 | /// \param __P |
552 | /// A pointer to the memory for storing the swapped value. |
553 | /// \param __D |
554 | /// The 32-bit value to be byte-swapped. |
555 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
556 | _storebe_i32(void * __P, int __D) { |
557 | struct __storeu_i32 { |
558 | unsigned int __v; |
559 | } __attribute__((__packed__, __may_alias__)); |
560 | ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D); |
561 | } |
562 | |
563 | #ifdef __x86_64__ |
564 | /// Load a 64-bit value from memory and swap its bytes. |
565 | /// |
566 | /// \headerfile <x86intrin.h> |
567 | /// |
568 | /// This intrinsic corresponds to the MOVBE instruction. |
569 | /// |
570 | /// \param __P |
571 | /// A pointer to the 64-bit value to load. |
572 | /// \returns The byte-swapped value. |
573 | static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
574 | _loadbe_i64(void const * __P) { |
575 | struct __loadu_i64 { |
576 | unsigned long long __v; |
577 | } __attribute__((__packed__, __may_alias__)); |
578 | return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v); |
579 | } |
580 | |
581 | /// Swap the bytes of a 64-bit value and store it to memory. |
582 | /// |
583 | /// \headerfile <x86intrin.h> |
584 | /// |
585 | /// This intrinsic corresponds to the MOVBE instruction. |
586 | /// |
587 | /// \param __P |
588 | /// A pointer to the memory for storing the swapped value. |
589 | /// \param __D |
590 | /// The 64-bit value to be byte-swapped. |
591 | static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe"))) |
592 | _storebe_i64(void * __P, long long __D) { |
593 | struct __storeu_i64 { |
594 | unsigned long long __v; |
595 | } __attribute__((__packed__, __may_alias__)); |
596 | ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D); |
597 | } |
598 | #endif |
599 | #endif /* __MOVBE */ |
600 | |
601 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
602 | defined(__RTM__) |
603 | #include <rtmintrin.h> |
604 | #include <xtestintrin.h> |
605 | #endif |
606 | |
607 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
608 | defined(__SHA__) |
609 | #include <shaintrin.h> |
610 | #endif |
611 | |
612 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
613 | defined(__FXSR__) |
614 | #include <fxsrintrin.h> |
615 | #endif |
616 | |
617 | /* No feature check desired due to internal MSC_VER checks */ |
618 | #include <xsaveintrin.h> |
619 | |
620 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
621 | defined(__XSAVEOPT__) |
622 | #include <xsaveoptintrin.h> |
623 | #endif |
624 | |
625 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
626 | defined(__XSAVEC__) |
627 | #include <xsavecintrin.h> |
628 | #endif |
629 | |
630 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
631 | defined(__XSAVES__) |
632 | #include <xsavesintrin.h> |
633 | #endif |
634 | |
635 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
636 | defined(__SHSTK__) |
637 | #include <cetintrin.h> |
638 | #endif |
639 | |
640 | /* Intrinsics inside adcintrin.h are available at all times. */ |
641 | #include <adcintrin.h> |
642 | |
643 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
644 | defined(__ADX__) |
645 | #include <adxintrin.h> |
646 | #endif |
647 | |
648 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
649 | defined(__RDSEED__) |
650 | #include <rdseedintrin.h> |
651 | #endif |
652 | |
653 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
654 | defined(__WBNOINVD__) |
655 | #include <wbnoinvdintrin.h> |
656 | #endif |
657 | |
658 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
659 | defined(__CLDEMOTE__) |
660 | #include <cldemoteintrin.h> |
661 | #endif |
662 | |
663 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
664 | defined(__WAITPKG__) |
665 | #include <waitpkgintrin.h> |
666 | #endif |
667 | |
668 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
669 | defined(__MOVDIRI__) || defined(__MOVDIR64B__) |
670 | #include <movdirintrin.h> |
671 | #endif |
672 | |
673 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
674 | defined(__PCONFIG__) |
675 | #include <pconfigintrin.h> |
676 | #endif |
677 | |
678 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
679 | defined(__SGX__) |
680 | #include <sgxintrin.h> |
681 | #endif |
682 | |
683 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
684 | defined(__PTWRITE__) |
685 | #include <ptwriteintrin.h> |
686 | #endif |
687 | |
688 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
689 | defined(__INVPCID__) |
690 | #include <invpcidintrin.h> |
691 | #endif |
692 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
693 | defined(__AMX_FP16__) |
694 | #include <amxfp16intrin.h> |
695 | #endif |
696 | |
697 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
698 | defined(__KL__) || defined(__WIDEKL__) |
699 | #include <keylockerintrin.h> |
700 | #endif |
701 | |
702 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
703 | defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__) |
704 | #include <amxintrin.h> |
705 | #endif |
706 | |
707 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
708 | defined(__AMX_COMPLEX__) |
709 | #include <amxcomplexintrin.h> |
710 | #endif |
711 | |
712 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
713 | defined(__AVX512VP2INTERSECT__) |
714 | #include <avx512vp2intersectintrin.h> |
715 | #endif |
716 | |
717 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
718 | (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) |
719 | #include <avx512vlvp2intersectintrin.h> |
720 | #endif |
721 | |
722 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
723 | defined(__ENQCMD__) |
724 | #include <enqcmdintrin.h> |
725 | #endif |
726 | |
727 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
728 | defined(__SERIALIZE__) |
729 | #include <serializeintrin.h> |
730 | #endif |
731 | |
732 | #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ |
733 | defined(__TSXLDTRK__) |
734 | #include <tsxldtrkintrin.h> |
735 | #endif |
736 | |
737 | #if defined(_MSC_VER) && __has_extension(gnu_asm) |
738 | /* Define the default attributes for these intrinsics */ |
739 | #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) |
740 | #ifdef __cplusplus |
741 | extern "C" { |
742 | #endif |
743 | /*----------------------------------------------------------------------------*\ |
744 | |* Interlocked Exchange HLE |
745 | \*----------------------------------------------------------------------------*/ |
746 | #if defined(__i386__) || defined(__x86_64__) |
747 | static __inline__ long __DEFAULT_FN_ATTRS |
748 | _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) { |
749 | __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" |
750 | : "+r" (_Value), "+m" (*_Target) :: "memory"); |
751 | return _Value; |
752 | } |
753 | static __inline__ long __DEFAULT_FN_ATTRS |
754 | _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) { |
755 | __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" |
756 | : "+r" (_Value), "+m" (*_Target) :: "memory"); |
757 | return _Value; |
758 | } |
759 | #endif |
760 | #if defined(__x86_64__) |
761 | static __inline__ __int64 __DEFAULT_FN_ATTRS |
762 | _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) { |
763 | __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}" |
764 | : "+r" (_Value), "+m" (*_Target) :: "memory"); |
765 | return _Value; |
766 | } |
767 | static __inline__ __int64 __DEFAULT_FN_ATTRS |
768 | _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) { |
769 | __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}" |
770 | : "+r" (_Value), "+m" (*_Target) :: "memory"); |
771 | return _Value; |
772 | } |
773 | #endif |
774 | /*----------------------------------------------------------------------------*\ |
775 | |* Interlocked Compare Exchange HLE |
776 | \*----------------------------------------------------------------------------*/ |
777 | #if defined(__i386__) || defined(__x86_64__) |
778 | static __inline__ long __DEFAULT_FN_ATTRS |
779 | _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination, |
780 | long _Exchange, long _Comparand) { |
781 | __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" |
782 | : "+a" (_Comparand), "+m" (*_Destination) |
783 | : "r" (_Exchange) : "memory"); |
784 | return _Comparand; |
785 | } |
786 | static __inline__ long __DEFAULT_FN_ATTRS |
787 | _InterlockedCompareExchange_HLERelease(long volatile *_Destination, |
788 | long _Exchange, long _Comparand) { |
789 | __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" |
790 | : "+a" (_Comparand), "+m" (*_Destination) |
791 | : "r" (_Exchange) : "memory"); |
792 | return _Comparand; |
793 | } |
794 | #endif |
795 | #if defined(__x86_64__) |
796 | static __inline__ __int64 __DEFAULT_FN_ATTRS |
797 | _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination, |
798 | __int64 _Exchange, __int64 _Comparand) { |
799 | __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}" |
800 | : "+a" (_Comparand), "+m" (*_Destination) |
801 | : "r" (_Exchange) : "memory"); |
802 | return _Comparand; |
803 | } |
804 | static __inline__ __int64 __DEFAULT_FN_ATTRS |
805 | _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, |
806 | __int64 _Exchange, __int64 _Comparand) { |
807 | __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}" |
808 | : "+a" (_Comparand), "+m" (*_Destination) |
809 | : "r" (_Exchange) : "memory"); |
810 | return _Comparand; |
811 | } |
812 | #endif |
813 | #ifdef __cplusplus |
814 | } |
815 | #endif |
816 | |
817 | #undef __DEFAULT_FN_ATTRS |
818 | |
819 | #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ |
820 | |
821 | #endif /* __IMMINTRIN_H */ |
822 |
Warning: This file is not a C or C++ file. It does not have highlighting.