1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * SM4 helper macros for Crypto Extensions
4 * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
5 */
6
7#define SM4_PREPARE(ptr) \
8 ld1 {v24.16b-v27.16b}, [ptr], #64; \
9 ld1 {v28.16b-v31.16b}, [ptr];
10
11#define SM4_CRYPT_BLK_BE(b0) \
12 sm4e b0.4s, v24.4s; \
13 sm4e b0.4s, v25.4s; \
14 sm4e b0.4s, v26.4s; \
15 sm4e b0.4s, v27.4s; \
16 sm4e b0.4s, v28.4s; \
17 sm4e b0.4s, v29.4s; \
18 sm4e b0.4s, v30.4s; \
19 sm4e b0.4s, v31.4s; \
20 rev64 b0.4s, b0.4s; \
21 ext b0.16b, b0.16b, b0.16b, #8; \
22 rev32 b0.16b, b0.16b;
23
24#define SM4_CRYPT_BLK(b0) \
25 rev32 b0.16b, b0.16b; \
26 SM4_CRYPT_BLK_BE(b0);
27
28#define SM4_CRYPT_BLK2_BE(b0, b1) \
29 sm4e b0.4s, v24.4s; \
30 sm4e b1.4s, v24.4s; \
31 sm4e b0.4s, v25.4s; \
32 sm4e b1.4s, v25.4s; \
33 sm4e b0.4s, v26.4s; \
34 sm4e b1.4s, v26.4s; \
35 sm4e b0.4s, v27.4s; \
36 sm4e b1.4s, v27.4s; \
37 sm4e b0.4s, v28.4s; \
38 sm4e b1.4s, v28.4s; \
39 sm4e b0.4s, v29.4s; \
40 sm4e b1.4s, v29.4s; \
41 sm4e b0.4s, v30.4s; \
42 sm4e b1.4s, v30.4s; \
43 sm4e b0.4s, v31.4s; \
44 sm4e b1.4s, v31.4s; \
45 rev64 b0.4s, b0.4s; \
46 rev64 b1.4s, b1.4s; \
47 ext b0.16b, b0.16b, b0.16b, #8; \
48 ext b1.16b, b1.16b, b1.16b, #8; \
49 rev32 b0.16b, b0.16b; \
50 rev32 b1.16b, b1.16b; \
51
52#define SM4_CRYPT_BLK2(b0, b1) \
53 rev32 b0.16b, b0.16b; \
54 rev32 b1.16b, b1.16b; \
55 SM4_CRYPT_BLK2_BE(b0, b1);
56
57#define SM4_CRYPT_BLK4_BE(b0, b1, b2, b3) \
58 sm4e b0.4s, v24.4s; \
59 sm4e b1.4s, v24.4s; \
60 sm4e b2.4s, v24.4s; \
61 sm4e b3.4s, v24.4s; \
62 sm4e b0.4s, v25.4s; \
63 sm4e b1.4s, v25.4s; \
64 sm4e b2.4s, v25.4s; \
65 sm4e b3.4s, v25.4s; \
66 sm4e b0.4s, v26.4s; \
67 sm4e b1.4s, v26.4s; \
68 sm4e b2.4s, v26.4s; \
69 sm4e b3.4s, v26.4s; \
70 sm4e b0.4s, v27.4s; \
71 sm4e b1.4s, v27.4s; \
72 sm4e b2.4s, v27.4s; \
73 sm4e b3.4s, v27.4s; \
74 sm4e b0.4s, v28.4s; \
75 sm4e b1.4s, v28.4s; \
76 sm4e b2.4s, v28.4s; \
77 sm4e b3.4s, v28.4s; \
78 sm4e b0.4s, v29.4s; \
79 sm4e b1.4s, v29.4s; \
80 sm4e b2.4s, v29.4s; \
81 sm4e b3.4s, v29.4s; \
82 sm4e b0.4s, v30.4s; \
83 sm4e b1.4s, v30.4s; \
84 sm4e b2.4s, v30.4s; \
85 sm4e b3.4s, v30.4s; \
86 sm4e b0.4s, v31.4s; \
87 sm4e b1.4s, v31.4s; \
88 sm4e b2.4s, v31.4s; \
89 sm4e b3.4s, v31.4s; \
90 rev64 b0.4s, b0.4s; \
91 rev64 b1.4s, b1.4s; \
92 rev64 b2.4s, b2.4s; \
93 rev64 b3.4s, b3.4s; \
94 ext b0.16b, b0.16b, b0.16b, #8; \
95 ext b1.16b, b1.16b, b1.16b, #8; \
96 ext b2.16b, b2.16b, b2.16b, #8; \
97 ext b3.16b, b3.16b, b3.16b, #8; \
98 rev32 b0.16b, b0.16b; \
99 rev32 b1.16b, b1.16b; \
100 rev32 b2.16b, b2.16b; \
101 rev32 b3.16b, b3.16b;
102
103#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \
104 rev32 b0.16b, b0.16b; \
105 rev32 b1.16b, b1.16b; \
106 rev32 b2.16b, b2.16b; \
107 rev32 b3.16b, b3.16b; \
108 SM4_CRYPT_BLK4_BE(b0, b1, b2, b3);
109
110#define SM4_CRYPT_BLK8_BE(b0, b1, b2, b3, b4, b5, b6, b7) \
111 sm4e b0.4s, v24.4s; \
112 sm4e b1.4s, v24.4s; \
113 sm4e b2.4s, v24.4s; \
114 sm4e b3.4s, v24.4s; \
115 sm4e b4.4s, v24.4s; \
116 sm4e b5.4s, v24.4s; \
117 sm4e b6.4s, v24.4s; \
118 sm4e b7.4s, v24.4s; \
119 sm4e b0.4s, v25.4s; \
120 sm4e b1.4s, v25.4s; \
121 sm4e b2.4s, v25.4s; \
122 sm4e b3.4s, v25.4s; \
123 sm4e b4.4s, v25.4s; \
124 sm4e b5.4s, v25.4s; \
125 sm4e b6.4s, v25.4s; \
126 sm4e b7.4s, v25.4s; \
127 sm4e b0.4s, v26.4s; \
128 sm4e b1.4s, v26.4s; \
129 sm4e b2.4s, v26.4s; \
130 sm4e b3.4s, v26.4s; \
131 sm4e b4.4s, v26.4s; \
132 sm4e b5.4s, v26.4s; \
133 sm4e b6.4s, v26.4s; \
134 sm4e b7.4s, v26.4s; \
135 sm4e b0.4s, v27.4s; \
136 sm4e b1.4s, v27.4s; \
137 sm4e b2.4s, v27.4s; \
138 sm4e b3.4s, v27.4s; \
139 sm4e b4.4s, v27.4s; \
140 sm4e b5.4s, v27.4s; \
141 sm4e b6.4s, v27.4s; \
142 sm4e b7.4s, v27.4s; \
143 sm4e b0.4s, v28.4s; \
144 sm4e b1.4s, v28.4s; \
145 sm4e b2.4s, v28.4s; \
146 sm4e b3.4s, v28.4s; \
147 sm4e b4.4s, v28.4s; \
148 sm4e b5.4s, v28.4s; \
149 sm4e b6.4s, v28.4s; \
150 sm4e b7.4s, v28.4s; \
151 sm4e b0.4s, v29.4s; \
152 sm4e b1.4s, v29.4s; \
153 sm4e b2.4s, v29.4s; \
154 sm4e b3.4s, v29.4s; \
155 sm4e b4.4s, v29.4s; \
156 sm4e b5.4s, v29.4s; \
157 sm4e b6.4s, v29.4s; \
158 sm4e b7.4s, v29.4s; \
159 sm4e b0.4s, v30.4s; \
160 sm4e b1.4s, v30.4s; \
161 sm4e b2.4s, v30.4s; \
162 sm4e b3.4s, v30.4s; \
163 sm4e b4.4s, v30.4s; \
164 sm4e b5.4s, v30.4s; \
165 sm4e b6.4s, v30.4s; \
166 sm4e b7.4s, v30.4s; \
167 sm4e b0.4s, v31.4s; \
168 sm4e b1.4s, v31.4s; \
169 sm4e b2.4s, v31.4s; \
170 sm4e b3.4s, v31.4s; \
171 sm4e b4.4s, v31.4s; \
172 sm4e b5.4s, v31.4s; \
173 sm4e b6.4s, v31.4s; \
174 sm4e b7.4s, v31.4s; \
175 rev64 b0.4s, b0.4s; \
176 rev64 b1.4s, b1.4s; \
177 rev64 b2.4s, b2.4s; \
178 rev64 b3.4s, b3.4s; \
179 rev64 b4.4s, b4.4s; \
180 rev64 b5.4s, b5.4s; \
181 rev64 b6.4s, b6.4s; \
182 rev64 b7.4s, b7.4s; \
183 ext b0.16b, b0.16b, b0.16b, #8; \
184 ext b1.16b, b1.16b, b1.16b, #8; \
185 ext b2.16b, b2.16b, b2.16b, #8; \
186 ext b3.16b, b3.16b, b3.16b, #8; \
187 ext b4.16b, b4.16b, b4.16b, #8; \
188 ext b5.16b, b5.16b, b5.16b, #8; \
189 ext b6.16b, b6.16b, b6.16b, #8; \
190 ext b7.16b, b7.16b, b7.16b, #8; \
191 rev32 b0.16b, b0.16b; \
192 rev32 b1.16b, b1.16b; \
193 rev32 b2.16b, b2.16b; \
194 rev32 b3.16b, b3.16b; \
195 rev32 b4.16b, b4.16b; \
196 rev32 b5.16b, b5.16b; \
197 rev32 b6.16b, b6.16b; \
198 rev32 b7.16b, b7.16b;
199
200#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \
201 rev32 b0.16b, b0.16b; \
202 rev32 b1.16b, b1.16b; \
203 rev32 b2.16b, b2.16b; \
204 rev32 b3.16b, b3.16b; \
205 rev32 b4.16b, b4.16b; \
206 rev32 b5.16b, b5.16b; \
207 rev32 b6.16b, b6.16b; \
208 rev32 b7.16b, b7.16b; \
209 SM4_CRYPT_BLK8_BE(b0, b1, b2, b3, b4, b5, b6, b7);
210

source code of linux/arch/arm64/crypto/sm4-ce-asm.h