Warning: This file is not a C or C++ file. It does not have highlighting.
1 | /*===--------------- sm4intrin.h - SM4 intrinsics -----------------=== |
---|---|
2 | * |
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | * See https://llvm.org/LICENSE.txt for license information. |
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | * |
7 | *===-----------------------------------------------------------------------=== |
8 | */ |
9 | |
10 | #ifndef __IMMINTRIN_H |
11 | #error "Never use <sm4intrin.h> directly; include <immintrin.h> instead." |
12 | #endif // __IMMINTRIN_H |
13 | |
14 | #ifndef __SM4INTRIN_H |
15 | #define __SM4INTRIN_H |
16 | |
17 | /// This intrinsic performs four rounds of SM4 key expansion. The intrinsic |
18 | /// operates on independent 128-bit lanes. The calculated results are |
19 | /// stored in \a dst. |
20 | /// \headerfile <immintrin.h> |
21 | /// |
22 | /// \code |
23 | /// __m128i _mm_sm4key4_epi32(__m128i __A, __m128i __B) |
24 | /// \endcode |
25 | /// |
26 | /// This intrinsic corresponds to the \c VSM4KEY4 instruction. |
27 | /// |
28 | /// \param __A |
29 | /// A 128-bit vector of [4 x int]. |
30 | /// \param __B |
31 | /// A 128-bit vector of [4 x int]. |
32 | /// \returns |
33 | /// A 128-bit vector of [4 x int]. |
34 | /// |
35 | /// \code{.operation} |
36 | /// DEFINE ROL32(dword, n) { |
37 | /// count := n % 32 |
38 | /// dest := (dword << count) | (dword >> (32-count)) |
39 | /// RETURN dest |
40 | /// } |
41 | /// DEFINE SBOX_BYTE(dword, i) { |
42 | /// RETURN sbox[dword.byte[i]] |
43 | /// } |
44 | /// DEFINE lower_t(dword) { |
45 | /// tmp.byte[0] := SBOX_BYTE(dword, 0) |
46 | /// tmp.byte[1] := SBOX_BYTE(dword, 1) |
47 | /// tmp.byte[2] := SBOX_BYTE(dword, 2) |
48 | /// tmp.byte[3] := SBOX_BYTE(dword, 3) |
49 | /// RETURN tmp |
50 | /// } |
51 | /// DEFINE L_KEY(dword) { |
52 | /// RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23) |
53 | /// } |
54 | /// DEFINE T_KEY(dword) { |
55 | /// RETURN L_KEY(lower_t(dword)) |
56 | /// } |
57 | /// DEFINE F_KEY(X0, X1, X2, X3, round_key) { |
58 | /// RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key) |
59 | /// } |
60 | /// FOR i:= 0 to 0 |
61 | /// P[0] := __B.xmm[i].dword[0] |
62 | /// P[1] := __B.xmm[i].dword[1] |
63 | /// P[2] := __B.xmm[i].dword[2] |
64 | /// P[3] := __B.xmm[i].dword[3] |
65 | /// C[0] := F_KEY(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0]) |
66 | /// C[1] := F_KEY(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1]) |
67 | /// C[2] := F_KEY(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2]) |
68 | /// C[3] := F_KEY(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3]) |
69 | /// DEST.xmm[i].dword[0] := C[0] |
70 | /// DEST.xmm[i].dword[1] := C[1] |
71 | /// DEST.xmm[i].dword[2] := C[2] |
72 | /// DEST.xmm[i].dword[3] := C[3] |
73 | /// ENDFOR |
74 | /// DEST[MAX:128] := 0 |
75 | /// \endcode |
76 | #define _mm_sm4key4_epi32(A, B) \ |
77 | (__m128i) __builtin_ia32_vsm4key4128((__v4su)A, (__v4su)B) |
78 | |
79 | /// This intrinsic performs four rounds of SM4 key expansion. The intrinsic |
80 | /// operates on independent 128-bit lanes. The calculated results are |
81 | /// stored in \a dst. |
82 | /// \headerfile <immintrin.h> |
83 | /// |
84 | /// \code |
85 | /// __m256i _mm256_sm4key4_epi32(__m256i __A, __m256i __B) |
86 | /// \endcode |
87 | /// |
88 | /// This intrinsic corresponds to the \c VSM4KEY4 instruction. |
89 | /// |
90 | /// \param __A |
91 | /// A 256-bit vector of [8 x int]. |
92 | /// \param __B |
93 | /// A 256-bit vector of [8 x int]. |
94 | /// \returns |
95 | /// A 256-bit vector of [8 x int]. |
96 | /// |
97 | /// \code{.operation} |
98 | /// DEFINE ROL32(dword, n) { |
99 | /// count := n % 32 |
100 | /// dest := (dword << count) | (dword >> (32-count)) |
101 | /// RETURN dest |
102 | /// } |
103 | /// DEFINE SBOX_BYTE(dword, i) { |
104 | /// RETURN sbox[dword.byte[i]] |
105 | /// } |
106 | /// DEFINE lower_t(dword) { |
107 | /// tmp.byte[0] := SBOX_BYTE(dword, 0) |
108 | /// tmp.byte[1] := SBOX_BYTE(dword, 1) |
109 | /// tmp.byte[2] := SBOX_BYTE(dword, 2) |
110 | /// tmp.byte[3] := SBOX_BYTE(dword, 3) |
111 | /// RETURN tmp |
112 | /// } |
113 | /// DEFINE L_KEY(dword) { |
114 | /// RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23) |
115 | /// } |
116 | /// DEFINE T_KEY(dword) { |
117 | /// RETURN L_KEY(lower_t(dword)) |
118 | /// } |
119 | /// DEFINE F_KEY(X0, X1, X2, X3, round_key) { |
120 | /// RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key) |
121 | /// } |
122 | /// FOR i:= 0 to 1 |
123 | /// P[0] := __B.xmm[i].dword[0] |
124 | /// P[1] := __B.xmm[i].dword[1] |
125 | /// P[2] := __B.xmm[i].dword[2] |
126 | /// P[3] := __B.xmm[i].dword[3] |
127 | /// C[0] := F_KEY(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0]) |
128 | /// C[1] := F_KEY(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1]) |
129 | /// C[2] := F_KEY(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2]) |
130 | /// C[3] := F_KEY(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3]) |
131 | /// DEST.xmm[i].dword[0] := C[0] |
132 | /// DEST.xmm[i].dword[1] := C[1] |
133 | /// DEST.xmm[i].dword[2] := C[2] |
134 | /// DEST.xmm[i].dword[3] := C[3] |
135 | /// ENDFOR |
136 | /// DEST[MAX:256] := 0 |
137 | /// \endcode |
138 | #define _mm256_sm4key4_epi32(A, B) \ |
139 | (__m256i) __builtin_ia32_vsm4key4256((__v8su)A, (__v8su)B) |
140 | |
141 | /// This intrinisc performs four rounds of SM4 encryption. The intrinisc |
142 | /// operates on independent 128-bit lanes. The calculated results are |
143 | /// stored in \a dst. |
144 | /// \headerfile <immintrin.h> |
145 | /// |
146 | /// \code |
147 | /// __m128i _mm_sm4rnds4_epi32(__m128i __A, __m128i __B) |
148 | /// \endcode |
149 | /// |
150 | /// This intrinsic corresponds to the \c VSM4RNDS4 instruction. |
151 | /// |
152 | /// \param __A |
153 | /// A 128-bit vector of [4 x int]. |
154 | /// \param __B |
155 | /// A 128-bit vector of [4 x int]. |
156 | /// \returns |
157 | /// A 128-bit vector of [4 x int]. |
158 | /// |
159 | /// \code{.operation} |
160 | /// DEFINE ROL32(dword, n) { |
161 | /// count := n % 32 |
162 | /// dest := (dword << count) | (dword >> (32-count)) |
163 | /// RETURN dest |
164 | /// } |
165 | /// DEFINE lower_t(dword) { |
166 | /// tmp.byte[0] := SBOX_BYTE(dword, 0) |
167 | /// tmp.byte[1] := SBOX_BYTE(dword, 1) |
168 | /// tmp.byte[2] := SBOX_BYTE(dword, 2) |
169 | /// tmp.byte[3] := SBOX_BYTE(dword, 3) |
170 | /// RETURN tmp |
171 | /// } |
172 | /// DEFINE L_RND(dword) { |
173 | /// tmp := dword |
174 | /// tmp := tmp ^ ROL32(dword, 2) |
175 | /// tmp := tmp ^ ROL32(dword, 10) |
176 | /// tmp := tmp ^ ROL32(dword, 18) |
177 | /// tmp := tmp ^ ROL32(dword, 24) |
178 | /// RETURN tmp |
179 | /// } |
180 | /// DEFINE T_RND(dword) { |
181 | /// RETURN L_RND(lower_t(dword)) |
182 | /// } |
183 | /// DEFINE F_RND(X0, X1, X2, X3, round_key) { |
184 | /// RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key) |
185 | /// } |
186 | /// FOR i:= 0 to 0 |
187 | /// P[0] := __B.xmm[i].dword[0] |
188 | /// P[1] := __B.xmm[i].dword[1] |
189 | /// P[2] := __B.xmm[i].dword[2] |
190 | /// P[3] := __B.xmm[i].dword[3] |
191 | /// C[0] := F_RND(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0]) |
192 | /// C[1] := F_RND(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1]) |
193 | /// C[2] := F_RND(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2]) |
194 | /// C[3] := F_RND(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3]) |
195 | /// DEST.xmm[i].dword[0] := C[0] |
196 | /// DEST.xmm[i].dword[1] := C[1] |
197 | /// DEST.xmm[i].dword[2] := C[2] |
198 | /// DEST.xmm[i].dword[3] := C[3] |
199 | /// ENDFOR |
200 | /// DEST[MAX:128] := 0 |
201 | /// \endcode |
202 | #define _mm_sm4rnds4_epi32(A, B) \ |
203 | (__m128i) __builtin_ia32_vsm4rnds4128((__v4su)A, (__v4su)B) |
204 | |
205 | /// This intrinisc performs four rounds of SM4 encryption. The intrinisc |
206 | /// operates on independent 128-bit lanes. The calculated results are |
207 | /// stored in \a dst. |
208 | /// \headerfile <immintrin.h> |
209 | /// |
210 | /// \code |
211 | /// __m256i _mm256_sm4rnds4_epi32(__m256i __A, __m256i __B) |
212 | /// \endcode |
213 | /// |
214 | /// This intrinsic corresponds to the \c VSM4RNDS4 instruction. |
215 | /// |
216 | /// \param __A |
217 | /// A 256-bit vector of [8 x int]. |
218 | /// \param __B |
219 | /// A 256-bit vector of [8 x int]. |
220 | /// \returns |
221 | /// A 256-bit vector of [8 x int]. |
222 | /// |
223 | /// \code{.operation} |
224 | /// DEFINE ROL32(dword, n) { |
225 | /// count := n % 32 |
226 | /// dest := (dword << count) | (dword >> (32-count)) |
227 | /// RETURN dest |
228 | /// } |
229 | /// DEFINE lower_t(dword) { |
230 | /// tmp.byte[0] := SBOX_BYTE(dword, 0) |
231 | /// tmp.byte[1] := SBOX_BYTE(dword, 1) |
232 | /// tmp.byte[2] := SBOX_BYTE(dword, 2) |
233 | /// tmp.byte[3] := SBOX_BYTE(dword, 3) |
234 | /// RETURN tmp |
235 | /// } |
236 | /// DEFINE L_RND(dword) { |
237 | /// tmp := dword |
238 | /// tmp := tmp ^ ROL32(dword, 2) |
239 | /// tmp := tmp ^ ROL32(dword, 10) |
240 | /// tmp := tmp ^ ROL32(dword, 18) |
241 | /// tmp := tmp ^ ROL32(dword, 24) |
242 | /// RETURN tmp |
243 | /// } |
244 | /// DEFINE T_RND(dword) { |
245 | /// RETURN L_RND(lower_t(dword)) |
246 | /// } |
247 | /// DEFINE F_RND(X0, X1, X2, X3, round_key) { |
248 | /// RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key) |
249 | /// } |
250 | /// FOR i:= 0 to 0 |
251 | /// P[0] := __B.xmm[i].dword[0] |
252 | /// P[1] := __B.xmm[i].dword[1] |
253 | /// P[2] := __B.xmm[i].dword[2] |
254 | /// P[3] := __B.xmm[i].dword[3] |
255 | /// C[0] := F_RND(P[0], P[1], P[2], P[3], __A.xmm[i].dword[0]) |
256 | /// C[1] := F_RND(P[1], P[2], P[3], C[0], __A.xmm[i].dword[1]) |
257 | /// C[2] := F_RND(P[2], P[3], C[0], C[1], __A.xmm[i].dword[2]) |
258 | /// C[3] := F_RND(P[3], C[0], C[1], C[2], __A.xmm[i].dword[3]) |
259 | /// DEST.xmm[i].dword[0] := C[0] |
260 | /// DEST.xmm[i].dword[1] := C[1] |
261 | /// DEST.xmm[i].dword[2] := C[2] |
262 | /// DEST.xmm[i].dword[3] := C[3] |
263 | /// ENDFOR |
264 | /// DEST[MAX:256] := 0 |
265 | /// \endcode |
266 | #define _mm256_sm4rnds4_epi32(A, B) \ |
267 | (__m256i) __builtin_ia32_vsm4rnds4256((__v8su)A, (__v8su)B) |
268 | |
269 | #endif // __SM4INTRIN_H |
270 |
Warning: This file is not a C or C++ file. It does not have highlighting.