Warning: This file is not a C or C++ file. It does not have highlighting.
1 | /*===----------------- avxifmaintrin.h - IFMA intrinsics -------------------=== |
---|---|
2 | * |
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | * See https://llvm.org/LICENSE.txt for license information. |
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | * |
7 | *===-----------------------------------------------------------------------=== |
8 | */ |
9 | |
10 | #ifndef __IMMINTRIN_H |
11 | #error "Never use <avxifmaintrin.h> directly; include <immintrin.h> instead." |
12 | #endif |
13 | |
14 | #ifndef __AVXIFMAINTRIN_H |
15 | #define __AVXIFMAINTRIN_H |
16 | |
17 | /* Define the default attributes for the functions in this file. */ |
18 | #define __DEFAULT_FN_ATTRS128 \ |
19 | __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ |
20 | __min_vector_width__(128))) |
21 | #define __DEFAULT_FN_ATTRS256 \ |
22 | __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ |
23 | __min_vector_width__(256))) |
24 | |
25 | // must vex-encoding |
26 | |
27 | /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y |
28 | /// and \a __Z to form a 104-bit intermediate result. Add the high 52-bit |
29 | /// unsigned integer from the intermediate result with the corresponding |
30 | /// unsigned 64-bit integer in \a __X, and store the results in \a dst. |
31 | /// |
32 | /// \headerfile <immintrin.h> |
33 | /// |
34 | /// \code |
35 | /// __m128i |
36 | /// _mm_madd52hi_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z) |
37 | /// \endcode |
38 | /// |
39 | /// This intrinsic corresponds to the \c VPMADD52HUQ instruction. |
40 | /// |
41 | /// \return |
42 | /// return __m128i dst. |
43 | /// \param __X |
44 | /// A 128-bit vector of [2 x i64] |
45 | /// \param __Y |
46 | /// A 128-bit vector of [2 x i64] |
47 | /// \param __Z |
48 | /// A 128-bit vector of [2 x i64] |
49 | /// |
50 | /// \code{.operation} |
51 | /// FOR j := 0 to 1 |
52 | /// i := j*64 |
53 | /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) |
54 | /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) |
55 | /// ENDFOR |
56 | /// dst[MAX:128] := 0 |
57 | /// \endcode |
58 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
59 | _mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) { |
60 | return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y, |
61 | (__v2di)__Z); |
62 | } |
63 | |
64 | /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y |
65 | /// and \a __Z to form a 104-bit intermediate result. Add the high 52-bit |
66 | /// unsigned integer from the intermediate result with the corresponding |
67 | /// unsigned 64-bit integer in \a __X, and store the results in \a dst. |
68 | /// |
69 | /// \headerfile <immintrin.h> |
70 | /// |
71 | /// \code |
72 | /// __m256i |
73 | /// _mm256_madd52hi_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z) |
74 | /// \endcode |
75 | /// |
76 | /// This intrinsic corresponds to the \c VPMADD52HUQ instruction. |
77 | /// |
78 | /// \return |
79 | /// return __m256i dst. |
80 | /// \param __X |
81 | /// A 256-bit vector of [4 x i64] |
82 | /// \param __Y |
83 | /// A 256-bit vector of [4 x i64] |
84 | /// \param __Z |
85 | /// A 256-bit vector of [4 x i64] |
86 | /// |
87 | /// \code{.operation} |
88 | /// FOR j := 0 to 3 |
89 | /// i := j*64 |
90 | /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) |
91 | /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52]) |
92 | /// ENDFOR |
93 | /// dst[MAX:256] := 0 |
94 | /// \endcode |
95 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
96 | _mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) { |
97 | return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y, |
98 | (__v4di)__Z); |
99 | } |
100 | |
101 | /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y |
102 | /// and \a __Z to form a 104-bit intermediate result. Add the low 52-bit |
103 | /// unsigned integer from the intermediate result with the corresponding |
104 | /// unsigned 64-bit integer in \a __X, and store the results in \a dst. |
105 | /// |
106 | /// \headerfile <immintrin.h> |
107 | /// |
108 | /// \code |
109 | /// __m128i |
110 | /// _mm_madd52lo_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z) |
111 | /// \endcode |
112 | /// |
113 | /// This intrinsic corresponds to the \c VPMADD52LUQ instruction. |
114 | /// |
115 | /// \return |
116 | /// return __m128i dst. |
117 | /// \param __X |
118 | /// A 128-bit vector of [2 x i64] |
119 | /// \param __Y |
120 | /// A 128-bit vector of [2 x i64] |
121 | /// \param __Z |
122 | /// A 128-bit vector of [2 x i64] |
123 | /// |
124 | /// \code{.operation} |
125 | /// FOR j := 0 to 1 |
126 | /// i := j*64 |
127 | /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) |
128 | /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) |
129 | /// ENDFOR |
130 | /// dst[MAX:128] := 0 |
131 | /// \endcode |
132 | static __inline__ __m128i __DEFAULT_FN_ATTRS128 |
133 | _mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) { |
134 | return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y, |
135 | (__v2di)__Z); |
136 | } |
137 | |
138 | /// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y |
139 | /// and \a __Z to form a 104-bit intermediate result. Add the low 52-bit |
140 | /// unsigned integer from the intermediate result with the corresponding |
141 | /// unsigned 64-bit integer in \a __X, and store the results in \a dst. |
142 | /// |
143 | /// \headerfile <immintrin.h> |
144 | /// |
145 | /// \code |
146 | /// __m256i |
147 | /// _mm256_madd52lo_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z) |
148 | /// \endcode |
149 | /// |
150 | /// This intrinsic corresponds to the \c VPMADD52LUQ instruction. |
151 | /// |
152 | /// \return |
153 | /// return __m256i dst. |
154 | /// \param __X |
155 | /// A 256-bit vector of [4 x i64] |
156 | /// \param __Y |
157 | /// A 256-bit vector of [4 x i64] |
158 | /// \param __Z |
159 | /// A 256-bit vector of [4 x i64] |
160 | /// |
161 | /// \code{.operation} |
162 | /// FOR j := 0 to 3 |
163 | /// i := j*64 |
164 | /// tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i]) |
165 | /// dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0]) |
166 | /// ENDFOR |
167 | /// dst[MAX:256] := 0 |
168 | /// \endcode |
169 | static __inline__ __m256i __DEFAULT_FN_ATTRS256 |
170 | _mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) { |
171 | return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y, |
172 | (__v4di)__Z); |
173 | } |
174 | #undef __DEFAULT_FN_ATTRS128 |
175 | #undef __DEFAULT_FN_ATTRS256 |
176 | |
177 | #endif // __AVXIFMAINTRIN_H |
178 |
Warning: This file is not a C or C++ file. It does not have highlighting.