1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | /* |
3 | * SM4 Cipher Algorithm, AES-NI/AVX optimized. |
4 | * as specified in |
5 | * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html |
6 | * |
7 | * Copyright (c) 2021, Alibaba Group. |
8 | * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> |
9 | */ |
10 | |
11 | #include <linux/module.h> |
12 | #include <linux/crypto.h> |
13 | #include <linux/kernel.h> |
14 | #include <asm/simd.h> |
15 | #include <crypto/internal/simd.h> |
16 | #include <crypto/internal/skcipher.h> |
17 | #include <crypto/sm4.h> |
18 | #include "sm4-avx.h" |
19 | |
20 | #define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8) |
21 | |
22 | asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, |
23 | const u8 *src, int nblocks); |
24 | asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, |
25 | const u8 *src, int nblocks); |
26 | asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, |
27 | const u8 *src, u8 *iv); |
28 | asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, |
29 | const u8 *src, u8 *iv); |
30 | |
31 | static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, |
32 | unsigned int key_len) |
33 | { |
34 | struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); |
35 | |
36 | return sm4_expandkey(ctx, in_key: key, key_len); |
37 | } |
38 | |
39 | static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) |
40 | { |
41 | struct skcipher_walk walk; |
42 | unsigned int nbytes; |
43 | int err; |
44 | |
45 | err = skcipher_walk_virt(walk: &walk, req, atomic: false); |
46 | |
47 | while ((nbytes = walk.nbytes) > 0) { |
48 | const u8 *src = walk.src.virt.addr; |
49 | u8 *dst = walk.dst.virt.addr; |
50 | |
51 | kernel_fpu_begin(); |
52 | while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) { |
53 | sm4_aesni_avx_crypt8(rk: rkey, dst, src, nblocks: 8); |
54 | dst += SM4_CRYPT8_BLOCK_SIZE; |
55 | src += SM4_CRYPT8_BLOCK_SIZE; |
56 | nbytes -= SM4_CRYPT8_BLOCK_SIZE; |
57 | } |
58 | while (nbytes >= SM4_BLOCK_SIZE) { |
59 | unsigned int nblocks = min(nbytes >> 4, 4u); |
60 | sm4_aesni_avx_crypt4(rk: rkey, dst, src, nblocks); |
61 | dst += nblocks * SM4_BLOCK_SIZE; |
62 | src += nblocks * SM4_BLOCK_SIZE; |
63 | nbytes -= nblocks * SM4_BLOCK_SIZE; |
64 | } |
65 | kernel_fpu_end(); |
66 | |
67 | err = skcipher_walk_done(walk: &walk, err: nbytes); |
68 | } |
69 | |
70 | return err; |
71 | } |
72 | |
73 | int sm4_avx_ecb_encrypt(struct skcipher_request *req) |
74 | { |
75 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
76 | struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); |
77 | |
78 | return ecb_do_crypt(req, rkey: ctx->rkey_enc); |
79 | } |
80 | EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt); |
81 | |
82 | int sm4_avx_ecb_decrypt(struct skcipher_request *req) |
83 | { |
84 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
85 | struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); |
86 | |
87 | return ecb_do_crypt(req, rkey: ctx->rkey_dec); |
88 | } |
89 | EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt); |
90 | |
91 | int sm4_cbc_encrypt(struct skcipher_request *req) |
92 | { |
93 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
94 | struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); |
95 | struct skcipher_walk walk; |
96 | unsigned int nbytes; |
97 | int err; |
98 | |
99 | err = skcipher_walk_virt(walk: &walk, req, atomic: false); |
100 | |
101 | while ((nbytes = walk.nbytes) > 0) { |
102 | const u8 *iv = walk.iv; |
103 | const u8 *src = walk.src.virt.addr; |
104 | u8 *dst = walk.dst.virt.addr; |
105 | |
106 | while (nbytes >= SM4_BLOCK_SIZE) { |
107 | crypto_xor_cpy(dst, src1: src, src2: iv, SM4_BLOCK_SIZE); |
108 | sm4_crypt_block(rk: ctx->rkey_enc, out: dst, in: dst); |
109 | iv = dst; |
110 | src += SM4_BLOCK_SIZE; |
111 | dst += SM4_BLOCK_SIZE; |
112 | nbytes -= SM4_BLOCK_SIZE; |
113 | } |
114 | if (iv != walk.iv) |
115 | memcpy(walk.iv, iv, SM4_BLOCK_SIZE); |
116 | |
117 | err = skcipher_walk_done(walk: &walk, err: nbytes); |
118 | } |
119 | |
120 | return err; |
121 | } |
122 | EXPORT_SYMBOL_GPL(sm4_cbc_encrypt); |
123 | |
124 | int sm4_avx_cbc_decrypt(struct skcipher_request *req, |
125 | unsigned int bsize, sm4_crypt_func func) |
126 | { |
127 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
128 | struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); |
129 | struct skcipher_walk walk; |
130 | unsigned int nbytes; |
131 | int err; |
132 | |
133 | err = skcipher_walk_virt(walk: &walk, req, atomic: false); |
134 | |
135 | while ((nbytes = walk.nbytes) > 0) { |
136 | const u8 *src = walk.src.virt.addr; |
137 | u8 *dst = walk.dst.virt.addr; |
138 | |
139 | kernel_fpu_begin(); |
140 | |
141 | while (nbytes >= bsize) { |
142 | func(ctx->rkey_dec, dst, src, walk.iv); |
143 | dst += bsize; |
144 | src += bsize; |
145 | nbytes -= bsize; |
146 | } |
147 | |
148 | while (nbytes >= SM4_BLOCK_SIZE) { |
149 | u8 keystream[SM4_BLOCK_SIZE * 8]; |
150 | u8 iv[SM4_BLOCK_SIZE]; |
151 | unsigned int nblocks = min(nbytes >> 4, 8u); |
152 | int i; |
153 | |
154 | sm4_aesni_avx_crypt8(rk: ctx->rkey_dec, dst: keystream, |
155 | src, nblocks); |
156 | |
157 | src += ((int)nblocks - 2) * SM4_BLOCK_SIZE; |
158 | dst += (nblocks - 1) * SM4_BLOCK_SIZE; |
159 | memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE); |
160 | |
161 | for (i = nblocks - 1; i > 0; i--) { |
162 | crypto_xor_cpy(dst, src1: src, |
163 | src2: &keystream[i * SM4_BLOCK_SIZE], |
164 | SM4_BLOCK_SIZE); |
165 | src -= SM4_BLOCK_SIZE; |
166 | dst -= SM4_BLOCK_SIZE; |
167 | } |
168 | crypto_xor_cpy(dst, src1: walk.iv, src2: keystream, SM4_BLOCK_SIZE); |
169 | memcpy(walk.iv, iv, SM4_BLOCK_SIZE); |
170 | dst += nblocks * SM4_BLOCK_SIZE; |
171 | src += (nblocks + 1) * SM4_BLOCK_SIZE; |
172 | nbytes -= nblocks * SM4_BLOCK_SIZE; |
173 | } |
174 | |
175 | kernel_fpu_end(); |
176 | err = skcipher_walk_done(walk: &walk, err: nbytes); |
177 | } |
178 | |
179 | return err; |
180 | } |
181 | EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt); |
182 | |
183 | static int cbc_decrypt(struct skcipher_request *req) |
184 | { |
185 | return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE, |
186 | sm4_aesni_avx_cbc_dec_blk8); |
187 | } |
188 | |
189 | int sm4_avx_ctr_crypt(struct skcipher_request *req, |
190 | unsigned int bsize, sm4_crypt_func func) |
191 | { |
192 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
193 | struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); |
194 | struct skcipher_walk walk; |
195 | unsigned int nbytes; |
196 | int err; |
197 | |
198 | err = skcipher_walk_virt(walk: &walk, req, atomic: false); |
199 | |
200 | while ((nbytes = walk.nbytes) > 0) { |
201 | const u8 *src = walk.src.virt.addr; |
202 | u8 *dst = walk.dst.virt.addr; |
203 | |
204 | kernel_fpu_begin(); |
205 | |
206 | while (nbytes >= bsize) { |
207 | func(ctx->rkey_enc, dst, src, walk.iv); |
208 | dst += bsize; |
209 | src += bsize; |
210 | nbytes -= bsize; |
211 | } |
212 | |
213 | while (nbytes >= SM4_BLOCK_SIZE) { |
214 | u8 keystream[SM4_BLOCK_SIZE * 8]; |
215 | unsigned int nblocks = min(nbytes >> 4, 8u); |
216 | int i; |
217 | |
218 | for (i = 0; i < nblocks; i++) { |
219 | memcpy(&keystream[i * SM4_BLOCK_SIZE], |
220 | walk.iv, SM4_BLOCK_SIZE); |
221 | crypto_inc(a: walk.iv, SM4_BLOCK_SIZE); |
222 | } |
223 | sm4_aesni_avx_crypt8(rk: ctx->rkey_enc, dst: keystream, |
224 | src: keystream, nblocks); |
225 | |
226 | crypto_xor_cpy(dst, src1: src, src2: keystream, |
227 | size: nblocks * SM4_BLOCK_SIZE); |
228 | dst += nblocks * SM4_BLOCK_SIZE; |
229 | src += nblocks * SM4_BLOCK_SIZE; |
230 | nbytes -= nblocks * SM4_BLOCK_SIZE; |
231 | } |
232 | |
233 | kernel_fpu_end(); |
234 | |
235 | /* tail */ |
236 | if (walk.nbytes == walk.total && nbytes > 0) { |
237 | u8 keystream[SM4_BLOCK_SIZE]; |
238 | |
239 | memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); |
240 | crypto_inc(a: walk.iv, SM4_BLOCK_SIZE); |
241 | |
242 | sm4_crypt_block(rk: ctx->rkey_enc, out: keystream, in: keystream); |
243 | |
244 | crypto_xor_cpy(dst, src1: src, src2: keystream, size: nbytes); |
245 | dst += nbytes; |
246 | src += nbytes; |
247 | nbytes = 0; |
248 | } |
249 | |
250 | err = skcipher_walk_done(walk: &walk, err: nbytes); |
251 | } |
252 | |
253 | return err; |
254 | } |
255 | EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt); |
256 | |
257 | static int ctr_crypt(struct skcipher_request *req) |
258 | { |
259 | return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE, |
260 | sm4_aesni_avx_ctr_enc_blk8); |
261 | } |
262 | |
263 | static struct skcipher_alg sm4_aesni_avx_skciphers[] = { |
264 | { |
265 | .base = { |
266 | .cra_name = "__ecb(sm4)" , |
267 | .cra_driver_name = "__ecb-sm4-aesni-avx" , |
268 | .cra_priority = 400, |
269 | .cra_flags = CRYPTO_ALG_INTERNAL, |
270 | .cra_blocksize = SM4_BLOCK_SIZE, |
271 | .cra_ctxsize = sizeof(struct sm4_ctx), |
272 | .cra_module = THIS_MODULE, |
273 | }, |
274 | .min_keysize = SM4_KEY_SIZE, |
275 | .max_keysize = SM4_KEY_SIZE, |
276 | .walksize = 8 * SM4_BLOCK_SIZE, |
277 | .setkey = sm4_skcipher_setkey, |
278 | .encrypt = sm4_avx_ecb_encrypt, |
279 | .decrypt = sm4_avx_ecb_decrypt, |
280 | }, { |
281 | .base = { |
282 | .cra_name = "__cbc(sm4)" , |
283 | .cra_driver_name = "__cbc-sm4-aesni-avx" , |
284 | .cra_priority = 400, |
285 | .cra_flags = CRYPTO_ALG_INTERNAL, |
286 | .cra_blocksize = SM4_BLOCK_SIZE, |
287 | .cra_ctxsize = sizeof(struct sm4_ctx), |
288 | .cra_module = THIS_MODULE, |
289 | }, |
290 | .min_keysize = SM4_KEY_SIZE, |
291 | .max_keysize = SM4_KEY_SIZE, |
292 | .ivsize = SM4_BLOCK_SIZE, |
293 | .walksize = 8 * SM4_BLOCK_SIZE, |
294 | .setkey = sm4_skcipher_setkey, |
295 | .encrypt = sm4_cbc_encrypt, |
296 | .decrypt = cbc_decrypt, |
297 | }, { |
298 | .base = { |
299 | .cra_name = "__ctr(sm4)" , |
300 | .cra_driver_name = "__ctr-sm4-aesni-avx" , |
301 | .cra_priority = 400, |
302 | .cra_flags = CRYPTO_ALG_INTERNAL, |
303 | .cra_blocksize = 1, |
304 | .cra_ctxsize = sizeof(struct sm4_ctx), |
305 | .cra_module = THIS_MODULE, |
306 | }, |
307 | .min_keysize = SM4_KEY_SIZE, |
308 | .max_keysize = SM4_KEY_SIZE, |
309 | .ivsize = SM4_BLOCK_SIZE, |
310 | .chunksize = SM4_BLOCK_SIZE, |
311 | .walksize = 8 * SM4_BLOCK_SIZE, |
312 | .setkey = sm4_skcipher_setkey, |
313 | .encrypt = ctr_crypt, |
314 | .decrypt = ctr_crypt, |
315 | } |
316 | }; |
317 | |
318 | static struct simd_skcipher_alg * |
319 | simd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)]; |
320 | |
321 | static int __init sm4_init(void) |
322 | { |
323 | const char *feature_name; |
324 | |
325 | if (!boot_cpu_has(X86_FEATURE_AVX) || |
326 | !boot_cpu_has(X86_FEATURE_AES) || |
327 | !boot_cpu_has(X86_FEATURE_OSXSAVE)) { |
328 | pr_info("AVX or AES-NI instructions are not detected.\n" ); |
329 | return -ENODEV; |
330 | } |
331 | |
332 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, |
333 | feature_name: &feature_name)) { |
334 | pr_info("CPU feature '%s' is not supported.\n" , feature_name); |
335 | return -ENODEV; |
336 | } |
337 | |
338 | return simd_register_skciphers_compat(algs: sm4_aesni_avx_skciphers, |
339 | ARRAY_SIZE(sm4_aesni_avx_skciphers), |
340 | simd_algs: simd_sm4_aesni_avx_skciphers); |
341 | } |
342 | |
343 | static void __exit sm4_exit(void) |
344 | { |
345 | simd_unregister_skciphers(algs: sm4_aesni_avx_skciphers, |
346 | ARRAY_SIZE(sm4_aesni_avx_skciphers), |
347 | simd_algs: simd_sm4_aesni_avx_skciphers); |
348 | } |
349 | |
350 | module_init(sm4_init); |
351 | module_exit(sm4_exit); |
352 | |
353 | MODULE_LICENSE("GPL v2" ); |
354 | MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>" ); |
355 | MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized" ); |
356 | MODULE_ALIAS_CRYPTO("sm4" ); |
357 | MODULE_ALIAS_CRYPTO("sm4-aesni-avx" ); |
358 | |