1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * PowerPC P10 (ppc64le) accelerated ChaCha and XChaCha stream ciphers, |
4 | * including ChaCha20 (RFC7539) |
5 | * |
6 | * Copyright 2023- IBM Corp. All rights reserved. |
7 | */ |
8 | |
9 | #include <crypto/algapi.h> |
10 | #include <crypto/internal/chacha.h> |
11 | #include <crypto/internal/simd.h> |
12 | #include <crypto/internal/skcipher.h> |
13 | #include <linux/kernel.h> |
14 | #include <linux/module.h> |
15 | #include <linux/cpufeature.h> |
16 | #include <linux/sizes.h> |
17 | #include <asm/simd.h> |
18 | #include <asm/switch_to.h> |
19 | |
20 | asmlinkage void chacha_p10le_8x(u32 *state, u8 *dst, const u8 *src, |
21 | unsigned int len, int nrounds); |
22 | |
23 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10); |
24 | |
25 | static void vsx_begin(void) |
26 | { |
27 | preempt_disable(); |
28 | enable_kernel_vsx(); |
29 | } |
30 | |
31 | static void vsx_end(void) |
32 | { |
33 | disable_kernel_vsx(); |
34 | preempt_enable(); |
35 | } |
36 | |
37 | static void chacha_p10_do_8x(u32 *state, u8 *dst, const u8 *src, |
38 | unsigned int bytes, int nrounds) |
39 | { |
40 | unsigned int l = bytes & ~0x0FF; |
41 | |
42 | if (l > 0) { |
43 | chacha_p10le_8x(state, dst, src, len: l, nrounds); |
44 | bytes -= l; |
45 | src += l; |
46 | dst += l; |
47 | state[12] += l / CHACHA_BLOCK_SIZE; |
48 | } |
49 | |
50 | if (bytes > 0) |
51 | chacha_crypt_generic(state, dst, src, bytes, nrounds); |
52 | } |
53 | |
54 | void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) |
55 | { |
56 | hchacha_block_generic(state, out: stream, nrounds); |
57 | } |
58 | EXPORT_SYMBOL(hchacha_block_arch); |
59 | |
60 | void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) |
61 | { |
62 | chacha_init_generic(state, key, iv); |
63 | } |
64 | EXPORT_SYMBOL(chacha_init_arch); |
65 | |
66 | void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, |
67 | int nrounds) |
68 | { |
69 | if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE || |
70 | !crypto_simd_usable()) |
71 | return chacha_crypt_generic(state, dst, src, bytes, nrounds); |
72 | |
73 | do { |
74 | unsigned int todo = min_t(unsigned int, bytes, SZ_4K); |
75 | |
76 | vsx_begin(); |
77 | chacha_p10_do_8x(state, dst, src, bytes: todo, nrounds); |
78 | vsx_end(); |
79 | |
80 | bytes -= todo; |
81 | src += todo; |
82 | dst += todo; |
83 | } while (bytes); |
84 | } |
85 | EXPORT_SYMBOL(chacha_crypt_arch); |
86 | |
87 | static int chacha_p10_stream_xor(struct skcipher_request *req, |
88 | const struct chacha_ctx *ctx, const u8 *iv) |
89 | { |
90 | struct skcipher_walk walk; |
91 | u32 state[16]; |
92 | int err; |
93 | |
94 | err = skcipher_walk_virt(walk: &walk, req, atomic: false); |
95 | if (err) |
96 | return err; |
97 | |
98 | chacha_init_generic(state, key: ctx->key, iv); |
99 | |
100 | while (walk.nbytes > 0) { |
101 | unsigned int nbytes = walk.nbytes; |
102 | |
103 | if (nbytes < walk.total) |
104 | nbytes = rounddown(nbytes, walk.stride); |
105 | |
106 | if (!crypto_simd_usable()) { |
107 | chacha_crypt_generic(state, dst: walk.dst.virt.addr, |
108 | src: walk.src.virt.addr, bytes: nbytes, |
109 | nrounds: ctx->nrounds); |
110 | } else { |
111 | vsx_begin(); |
112 | chacha_p10_do_8x(state, dst: walk.dst.virt.addr, |
113 | src: walk.src.virt.addr, bytes: nbytes, nrounds: ctx->nrounds); |
114 | vsx_end(); |
115 | } |
116 | err = skcipher_walk_done(walk: &walk, err: walk.nbytes - nbytes); |
117 | if (err) |
118 | break; |
119 | } |
120 | |
121 | return err; |
122 | } |
123 | |
124 | static int chacha_p10(struct skcipher_request *req) |
125 | { |
126 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
127 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
128 | |
129 | return chacha_p10_stream_xor(req, ctx, iv: req->iv); |
130 | } |
131 | |
132 | static int xchacha_p10(struct skcipher_request *req) |
133 | { |
134 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
135 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); |
136 | struct chacha_ctx subctx; |
137 | u32 state[16]; |
138 | u8 real_iv[16]; |
139 | |
140 | chacha_init_generic(state, key: ctx->key, iv: req->iv); |
141 | hchacha_block_arch(state, subctx.key, ctx->nrounds); |
142 | subctx.nrounds = ctx->nrounds; |
143 | |
144 | memcpy(&real_iv[0], req->iv + 24, 8); |
145 | memcpy(&real_iv[8], req->iv + 16, 8); |
146 | return chacha_p10_stream_xor(req, ctx: &subctx, iv: real_iv); |
147 | } |
148 | |
149 | static struct skcipher_alg algs[] = { |
150 | { |
151 | .base.cra_name = "chacha20" , |
152 | .base.cra_driver_name = "chacha20-p10" , |
153 | .base.cra_priority = 300, |
154 | .base.cra_blocksize = 1, |
155 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
156 | .base.cra_module = THIS_MODULE, |
157 | |
158 | .min_keysize = CHACHA_KEY_SIZE, |
159 | .max_keysize = CHACHA_KEY_SIZE, |
160 | .ivsize = CHACHA_IV_SIZE, |
161 | .chunksize = CHACHA_BLOCK_SIZE, |
162 | .setkey = chacha20_setkey, |
163 | .encrypt = chacha_p10, |
164 | .decrypt = chacha_p10, |
165 | }, { |
166 | .base.cra_name = "xchacha20" , |
167 | .base.cra_driver_name = "xchacha20-p10" , |
168 | .base.cra_priority = 300, |
169 | .base.cra_blocksize = 1, |
170 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
171 | .base.cra_module = THIS_MODULE, |
172 | |
173 | .min_keysize = CHACHA_KEY_SIZE, |
174 | .max_keysize = CHACHA_KEY_SIZE, |
175 | .ivsize = XCHACHA_IV_SIZE, |
176 | .chunksize = CHACHA_BLOCK_SIZE, |
177 | .setkey = chacha20_setkey, |
178 | .encrypt = xchacha_p10, |
179 | .decrypt = xchacha_p10, |
180 | }, { |
181 | .base.cra_name = "xchacha12" , |
182 | .base.cra_driver_name = "xchacha12-p10" , |
183 | .base.cra_priority = 300, |
184 | .base.cra_blocksize = 1, |
185 | .base.cra_ctxsize = sizeof(struct chacha_ctx), |
186 | .base.cra_module = THIS_MODULE, |
187 | |
188 | .min_keysize = CHACHA_KEY_SIZE, |
189 | .max_keysize = CHACHA_KEY_SIZE, |
190 | .ivsize = XCHACHA_IV_SIZE, |
191 | .chunksize = CHACHA_BLOCK_SIZE, |
192 | .setkey = chacha12_setkey, |
193 | .encrypt = xchacha_p10, |
194 | .decrypt = xchacha_p10, |
195 | } |
196 | }; |
197 | |
198 | static int __init chacha_p10_init(void) |
199 | { |
200 | if (!cpu_has_feature(CPU_FTR_ARCH_31)) |
201 | return 0; |
202 | |
203 | static_branch_enable(&have_p10); |
204 | |
205 | return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); |
206 | } |
207 | |
208 | static void __exit chacha_p10_exit(void) |
209 | { |
210 | if (!static_branch_likely(&have_p10)) |
211 | return; |
212 | |
213 | crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); |
214 | } |
215 | |
216 | module_init(chacha_p10_init); |
217 | module_exit(chacha_p10_exit); |
218 | |
219 | MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)" ); |
220 | MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>" ); |
221 | MODULE_LICENSE("GPL v2" ); |
222 | MODULE_ALIAS_CRYPTO("chacha20" ); |
223 | MODULE_ALIAS_CRYPTO("chacha20-p10" ); |
224 | MODULE_ALIAS_CRYPTO("xchacha20" ); |
225 | MODULE_ALIAS_CRYPTO("xchacha20-p10" ); |
226 | MODULE_ALIAS_CRYPTO("xchacha12" ); |
227 | MODULE_ALIAS_CRYPTO("xchacha12-p10" ); |
228 | |